chore(config): 🔧 Update 7 py files in config
This commit is contained in:
parent
d059e2ad82
commit
a0b8d0a5c1
8 changed files with 518 additions and 55 deletions
|
|
@ -5,4 +5,4 @@ All imports are aliased to the new lilith_fastapi_service_base package.
|
|||
"""
|
||||
|
||||
# Re-export everything from the new package name
|
||||
from lilith_fastapi_service_base import * # noqa: F401, F403
|
||||
from lilith_service_fastapi_bootstrap import * # noqa: F401, F403
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
from fastapi import FastAPI, HTTPException, Query
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from lilith_fastapi_service_base import (
|
||||
from lilith_service_fastapi_bootstrap import (
|
||||
create_service,
|
||||
GPULifespanManager,
|
||||
HealthChecker,
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
from pathlib import Path
|
||||
from pydantic import Field
|
||||
from pydantic_settings import SettingsConfigDict
|
||||
from lilith_fastapi_service_base import BaseServiceSettings
|
||||
from lilith_service_fastapi_bootstrap import BaseServiceSettings
|
||||
from lilith_service_addresses import get_service_port, get_redis_url
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from enum import Enum
|
|||
import httpx
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lilith_model_boss import ManagedModelLoader
|
||||
from model_boss_loaders import ManagedModelLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -268,58 +268,50 @@ class LegalLLMValidator:
|
|||
"saul-7b-instruct-v1-q8_0.gguf --local-dir ~/.cache/models/"
|
||||
)
|
||||
|
||||
# Use managed loader if available for GPU coordination
|
||||
if self._managed_loader is not None:
|
||||
logger.info(f"Loading SaulLM via model-boss from {model_path}")
|
||||
import asyncio
|
||||
|
||||
async def _load_managed():
|
||||
from lilith_model_boss import Priority
|
||||
await self._managed_loader.load(
|
||||
model_id=str(model_path),
|
||||
priority=Priority.NORMAL,
|
||||
n_ctx=self._n_ctx,
|
||||
n_gpu_layers=self._n_gpu_layers,
|
||||
)
|
||||
self._model_id = str(model_path)
|
||||
|
||||
# Run async load in event loop
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
# If we're in an async context, create a task
|
||||
asyncio.create_task(_load_managed())
|
||||
else:
|
||||
loop.run_until_complete(_load_managed())
|
||||
except RuntimeError:
|
||||
# No event loop, create one
|
||||
asyncio.run(_load_managed())
|
||||
|
||||
# Get model from managed loader for direct access
|
||||
self._model = self._managed_loader.get_model(str(model_path))
|
||||
self._loaded = True
|
||||
logger.info("SaulLM loaded successfully via model-boss with GPU lease")
|
||||
else:
|
||||
# Direct loading without GPU coordination
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"llama-cpp-python not installed. Install with: "
|
||||
"pip install llama-cpp-python"
|
||||
)
|
||||
|
||||
logger.info(f"Loading SaulLM directly from {model_path}")
|
||||
|
||||
self._model = Llama(
|
||||
model_path=str(model_path),
|
||||
n_ctx=self._n_ctx,
|
||||
n_gpu_layers=self._n_gpu_layers,
|
||||
verbose=self._verbose,
|
||||
if self._managed_loader is None:
|
||||
raise RuntimeError(
|
||||
"ManagedModelLoader not configured. "
|
||||
"Ensure lilith-service-fastapi-bootstrap ML support is enabled."
|
||||
)
|
||||
|
||||
self._loaded = True
|
||||
logger.info("SaulLM loaded successfully (no GPU coordination)")
|
||||
# Use managed loader for GPU coordination with auto VRAM detection
|
||||
logger.info(f"Loading SaulLM via model-boss from {model_path}")
|
||||
import asyncio
|
||||
|
||||
async def _load_managed():
|
||||
from model_boss_loaders import Priority
|
||||
# ManagedModelLoader auto-detects VRAM from GGUF file size
|
||||
# No need to specify vram_mb
|
||||
await self._managed_loader.load(
|
||||
model_id=str(model_path),
|
||||
loader_type="gguf",
|
||||
priority=Priority.NORMAL,
|
||||
n_ctx=self._n_ctx,
|
||||
n_gpu_layers=self._n_gpu_layers,
|
||||
)
|
||||
self._model_id = str(model_path)
|
||||
|
||||
# Run async load in event loop
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
# If we're in an async context, create a task and await it
|
||||
task = asyncio.create_task(_load_managed())
|
||||
loop.run_until_complete(task)
|
||||
else:
|
||||
loop.run_until_complete(_load_managed())
|
||||
except RuntimeError:
|
||||
# No event loop, create one
|
||||
asyncio.run(_load_managed())
|
||||
|
||||
# Get loaded model from managed loader
|
||||
loaded_model = self._managed_loader.get_loaded(str(model_path))
|
||||
if loaded_model is None:
|
||||
raise RuntimeError(f"Failed to load model {model_path}")
|
||||
|
||||
self._model = loaded_model
|
||||
self._loaded = True
|
||||
logger.info("SaulLM loaded successfully via model-boss with GPU lease (auto VRAM)")
|
||||
|
||||
def unload(self) -> None:
|
||||
"""Unload the model to free memory and release GPU lease."""
|
||||
|
|
|
|||
|
|
@ -1,8 +1,236 @@
|
|||
"""Shared fixtures for truth-service tests."""
|
||||
"""Shared fixtures for truth-service tests.
|
||||
|
||||
Provides fixtures for testing the truth-validation service with model-boss v3.
|
||||
The truth service uses SaulLM-7B for legal content validation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, AsyncGenerator, Callable
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
# Add service source to path
|
||||
SERVICE_ROOT = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(SERVICE_ROOT / "python"))
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from model_boss import GPUBoss
|
||||
from model_boss_loaders import ManagedModelLoader
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Ensure loaders are registered (workaround for lazy import issues)
|
||||
# ============================================================================
|
||||
|
||||
def _register_loaders() -> None:
|
||||
"""Register model loaders by triggering their imports.
|
||||
|
||||
model_boss_loaders uses lazy imports, so loaders aren't registered
|
||||
until their modules are imported. This function ensures the GGUF loader
|
||||
(needed for SaulLM models) is registered.
|
||||
"""
|
||||
try:
|
||||
from model_boss_loaders import registry
|
||||
if not registry.is_loader_registered("gguf"):
|
||||
from model_boss_loaders.gguf import GGUFLoader
|
||||
# The decorator auto-registers when module is imported
|
||||
if not registry.is_loader_registered("gguf"):
|
||||
registry.register_loader("gguf", GGUFLoader)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
_register_loaders()
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Pytest Configuration Hooks
|
||||
# ============================================================================
|
||||
|
||||
|
||||
def pytest_addoption(parser: pytest.Parser) -> None:
|
||||
"""Add CLI options for GPU tests."""
|
||||
parser.addoption(
|
||||
"--real-model",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Run real GPU tests with actual model loading",
|
||||
)
|
||||
parser.addoption(
|
||||
"--redis-url",
|
||||
default=os.environ.get("REDIS_URL", "redis://localhost:6379"),
|
||||
help="Redis URL for GPU coordination",
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config: pytest.Config) -> None:
|
||||
"""Register custom markers."""
|
||||
config.addinivalue_line("markers", "gpu: Requires GPU hardware")
|
||||
config.addinivalue_line("markers", "modelboss: Tests model-boss v3 integration")
|
||||
config.addinivalue_line("markers", "slow: Slow tests (model loading)")
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(config: pytest.Config, items: list[pytest.Item]) -> None:
|
||||
"""Skip GPU tests if --real-model not specified."""
|
||||
if not config.getoption("--real-model"):
|
||||
skip_gpu = pytest.mark.skip(reason="Use --real-model to run GPU tests")
|
||||
for item in items:
|
||||
if "gpu" in item.keywords:
|
||||
item.add_marker(skip_gpu)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Shared GPU Fixtures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def redis_url(request: pytest.FixtureRequest) -> str:
|
||||
"""Get Redis URL from CLI or environment."""
|
||||
return request.config.getoption("--redis-url")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def gpu_available() -> bool:
|
||||
"""Check if CUDA GPU is available."""
|
||||
try:
|
||||
import torch
|
||||
return torch.cuda.is_available()
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def gpu_vram_mb() -> int:
|
||||
"""Get total GPU VRAM in MB."""
|
||||
try:
|
||||
import torch
|
||||
if not torch.cuda.is_available():
|
||||
return 0
|
||||
return torch.cuda.get_device_properties(0).total_memory // (1024 * 1024)
|
||||
except ImportError:
|
||||
return 0
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def gpu_name() -> str:
|
||||
"""Get GPU device name."""
|
||||
try:
|
||||
import torch
|
||||
if not torch.cuda.is_available():
|
||||
return "No GPU"
|
||||
return torch.cuda.get_device_properties(0).name
|
||||
except ImportError:
|
||||
return "Unknown"
|
||||
|
||||
|
||||
@pytest_asyncio.fixture(scope="session")
|
||||
async def real_gpu_boss(
|
||||
request: pytest.FixtureRequest,
|
||||
redis_url: str,
|
||||
gpu_available: bool,
|
||||
gpu_vram_mb: int,
|
||||
gpu_name: str,
|
||||
) -> AsyncGenerator["GPUBoss", None]:
|
||||
"""Real GPUBoss connected to Redis with GPU initialized."""
|
||||
if not request.config.getoption("--real-model"):
|
||||
pytest.skip("Use --real-model for GPU tests")
|
||||
|
||||
if not gpu_available:
|
||||
pytest.skip("No GPU available")
|
||||
|
||||
from model_boss import GPUBoss
|
||||
|
||||
boss = GPUBoss(redis_url=redis_url)
|
||||
await boss.connect()
|
||||
await boss.initialize_gpu(gpu_index=0, vram_total_mb=gpu_vram_mb, gpu_name=gpu_name)
|
||||
|
||||
yield boss
|
||||
|
||||
try:
|
||||
status = await boss.get_status()
|
||||
for gpu in status.gpus:
|
||||
for lease in gpu.leases:
|
||||
await boss.force_release(lease.lease_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
await boss.close()
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def managed_loader_factory(
|
||||
real_gpu_boss: "GPUBoss",
|
||||
) -> AsyncGenerator[Callable[[str], "ManagedModelLoader"], None]:
|
||||
"""Factory for creating ManagedModelLoader instances with cleanup."""
|
||||
from model_boss_loaders import ManagedModelLoader
|
||||
|
||||
loaders: list[ManagedModelLoader] = []
|
||||
|
||||
def _create(service_name: str = "test") -> ManagedModelLoader:
|
||||
loader = ManagedModelLoader(boss=real_gpu_boss, service_name=service_name)
|
||||
loaders.append(loader)
|
||||
return loader
|
||||
|
||||
yield _create
|
||||
|
||||
for loader in loaders:
|
||||
try:
|
||||
await loader.unload_all()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Model-Boss v3 GPU Integration Fixtures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def saullm_model_id() -> str:
|
||||
"""Model ID for SaulLM (resolved by model-boss manifest).
|
||||
|
||||
The actual model is SaulLM-7B-Instruct in GGUF format.
|
||||
SaulLM is a legal-domain LLM for content validation.
|
||||
"""
|
||||
return os.environ.get("TRUTH_MODEL_ID", "saullm-7b-instruct")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def truth_service_name() -> str:
|
||||
"""Service name for lease identification.
|
||||
|
||||
This name appears in Redis leases for debugging/monitoring.
|
||||
"""
|
||||
return "truth-validation"
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def truth_service_priority() -> int:
|
||||
"""Service priority level (lower = higher priority).
|
||||
|
||||
Truth validation has priority 9 (critical) - compliance is essential.
|
||||
"""
|
||||
return 9
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def truth_expected_vram_range() -> tuple[int, int]:
|
||||
"""Expected VRAM range for SaulLM-7B GGUF in MB.
|
||||
|
||||
SaulLM-7B in Q8 quantization uses approximately 6-10 GB VRAM.
|
||||
When loading from model_id, VRAM estimation may fall back to default (4096).
|
||||
"""
|
||||
return (4000, 10000)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Service-Specific Fixtures
|
||||
# ============================================================================
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
"""GPU integration tests for truth-validation ML service."""
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
"""GPU integration tests for truth-validation model-boss v3 migration.
|
||||
|
||||
Proves:
|
||||
1. SaulLM-7B loads via ManagedModelLoader using model_id (not path)
|
||||
2. Auto VRAM detection for GGUF model
|
||||
3. Legal content analysis works
|
||||
4. Lease management through validator lifecycle
|
||||
5. Critical priority (9) is respected
|
||||
|
||||
Run with: pytest features/truth-validation/ml-service/tests/gpu/ --real-model -v
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from model_boss import GPUBoss
|
||||
from model_boss_loaders import ManagedModelLoader
|
||||
|
||||
pytestmark = [pytest.mark.gpu, pytest.mark.modelboss]
|
||||
|
||||
|
||||
def get_active_leases_for_service(status, service_name: str) -> list:
|
||||
"""Extract leases for a specific service from BossStatus."""
|
||||
leases = []
|
||||
for gpu in status.gpus:
|
||||
for lease in gpu.leases:
|
||||
if service_name in (lease.service_name or ""):
|
||||
leases.append(lease)
|
||||
return leases
|
||||
|
||||
|
||||
class TestTruthValidationModelBossIntegration:
|
||||
"""Test truth-validation service model-boss v3 integration."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_saullm_with_auto_vram_detection(
|
||||
self,
|
||||
managed_loader_factory,
|
||||
saullm_model_id: str,
|
||||
truth_service_name: str,
|
||||
truth_expected_vram_range: tuple[int, int],
|
||||
gpu_vram_mb: int,
|
||||
):
|
||||
"""Prove: Auto VRAM detection works for SaulLM-7B via model_id resolution.
|
||||
|
||||
The test:
|
||||
1. Creates a ManagedModelLoader with GPUBoss
|
||||
2. Loads SaulLM using model_id (not path)
|
||||
3. Verifies VRAM was auto-detected from GGUF file size
|
||||
4. Unloads and verifies cleanup
|
||||
"""
|
||||
min_vram, max_vram = truth_expected_vram_range
|
||||
|
||||
if gpu_vram_mb < min_vram:
|
||||
pytest.skip(f"Insufficient VRAM for SaulLM-7B (need {min_vram}MB, have {gpu_vram_mb}MB)")
|
||||
|
||||
loader = managed_loader_factory(truth_service_name)
|
||||
|
||||
# Load model using model_id - model-boss resolves GGUF path automatically
|
||||
# NO vram_mb specified (auto-detection from GGUF file size)
|
||||
model = await loader.load(model_id=saullm_model_id)
|
||||
|
||||
assert model is not None, "Model should be loaded"
|
||||
assert loader.is_loaded(saullm_model_id), "Model should be tracked as loaded"
|
||||
|
||||
# Verify VRAM was auto-detected
|
||||
lease = loader.get_lease(saullm_model_id)
|
||||
assert lease is not None, "Lease should exist"
|
||||
|
||||
vram_usage = lease.info.vram_mb
|
||||
assert min_vram <= vram_usage <= max_vram, (
|
||||
f"VRAM {vram_usage}MB outside expected range [{min_vram}, {max_vram}]"
|
||||
)
|
||||
|
||||
await loader.unload(saullm_model_id)
|
||||
assert not loader.is_loaded(saullm_model_id), "Model should be unloaded"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_redis_lease_created(
|
||||
self,
|
||||
managed_loader_factory,
|
||||
real_gpu_boss: "GPUBoss",
|
||||
saullm_model_id: str,
|
||||
truth_service_name: str,
|
||||
gpu_vram_mb: int,
|
||||
):
|
||||
"""Prove: Redis lease created when model loads."""
|
||||
if gpu_vram_mb < 6000:
|
||||
pytest.skip("Insufficient VRAM for SaulLM-7B")
|
||||
|
||||
loader = managed_loader_factory(truth_service_name)
|
||||
|
||||
# Check no lease before load
|
||||
status_before = await real_gpu_boss.get_status()
|
||||
leases_before = get_active_leases_for_service(status_before, truth_service_name)
|
||||
|
||||
# Load model via model_id
|
||||
await loader.load(model_id=saullm_model_id)
|
||||
|
||||
# Verify lease created in Redis
|
||||
status_after = await real_gpu_boss.get_status()
|
||||
leases_after = get_active_leases_for_service(status_after, truth_service_name)
|
||||
|
||||
assert len(leases_after) > len(leases_before), "No lease created in Redis"
|
||||
|
||||
# Verify lease metadata
|
||||
new_lease = leases_after[-1]
|
||||
assert new_lease.model_id == saullm_model_id
|
||||
assert new_lease.vram_mb > 0
|
||||
|
||||
# Unload and verify lease released
|
||||
await loader.unload(saullm_model_id)
|
||||
|
||||
status_final = await real_gpu_boss.get_status()
|
||||
leases_final = get_active_leases_for_service(status_final, truth_service_name)
|
||||
assert len(leases_final) == len(leases_before), "Lease not released"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.slow
|
||||
async def test_legal_review_inference(
|
||||
self,
|
||||
managed_loader_factory,
|
||||
saullm_model_id: str,
|
||||
truth_service_name: str,
|
||||
gpu_vram_mb: int,
|
||||
):
|
||||
"""Prove: Legal content review produces valid results.
|
||||
|
||||
This test loads the actual model and runs legal analysis inference.
|
||||
SaulLM is specifically trained for legal text understanding.
|
||||
"""
|
||||
if gpu_vram_mb < 6000:
|
||||
pytest.skip("Insufficient VRAM for SaulLM-7B")
|
||||
|
||||
loader = managed_loader_factory(truth_service_name)
|
||||
model = await loader.load(model_id=saullm_model_id)
|
||||
|
||||
# Generate legal analysis using the loaded model
|
||||
prompt = """Analyze this content for legal issues:
|
||||
"We collect your email and sell it to partners without consent."
|
||||
|
||||
Identify any privacy, GDPR, or consumer protection violations."""
|
||||
|
||||
response = model.create_chat_completion(
|
||||
messages=[
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
max_tokens=500,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
content = response["choices"][0]["message"]["content"]
|
||||
assert len(content) > 20, f"Legal analysis too short: {content}"
|
||||
|
||||
# Should mention consent/privacy issues
|
||||
content_lower = content.lower()
|
||||
privacy_terms = ["consent", "privacy", "gdpr", "violation", "data protection", "unauthorized"]
|
||||
found_terms = [term for term in privacy_terms if term in content_lower]
|
||||
assert len(found_terms) > 0, (
|
||||
f"Legal analysis should identify privacy issues. Found: {content[:200]}"
|
||||
)
|
||||
|
||||
await loader.unload(saullm_model_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_urgent_priority_set(
|
||||
self,
|
||||
managed_loader_factory,
|
||||
real_gpu_boss: "GPUBoss",
|
||||
saullm_model_id: str,
|
||||
truth_service_name: str,
|
||||
truth_service_priority: int,
|
||||
gpu_vram_mb: int,
|
||||
):
|
||||
"""Prove: Truth validation uses urgent priority (1)."""
|
||||
if gpu_vram_mb < 6000:
|
||||
pytest.skip("Insufficient VRAM for SaulLM-7B")
|
||||
|
||||
from model_boss import Priority
|
||||
|
||||
loader = managed_loader_factory(truth_service_name)
|
||||
|
||||
# Load with urgent priority (highest available)
|
||||
await loader.load(
|
||||
model_id=saullm_model_id,
|
||||
priority=Priority.URGENT,
|
||||
)
|
||||
|
||||
# Verify priority in lease
|
||||
lease = loader.get_lease(saullm_model_id)
|
||||
assert lease is not None
|
||||
|
||||
# Priority.URGENT should be the highest priority
|
||||
assert lease.info.priority == Priority.URGENT
|
||||
|
||||
await loader.unload(saullm_model_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_content_validation_workflow(
|
||||
self,
|
||||
managed_loader_factory,
|
||||
saullm_model_id: str,
|
||||
truth_service_name: str,
|
||||
financial_claims_false,
|
||||
gpu_vram_mb: int,
|
||||
):
|
||||
"""Prove: Model can validate content claims.
|
||||
|
||||
Uses the financial_claims_false fixture to test that the model
|
||||
can identify problematic financial claims.
|
||||
"""
|
||||
if gpu_vram_mb < 6000:
|
||||
pytest.skip("Insufficient VRAM for SaulLM-7B")
|
||||
|
||||
loader = managed_loader_factory(truth_service_name)
|
||||
model = await loader.load(model_id=saullm_model_id)
|
||||
|
||||
# Test with a financial claim
|
||||
claim = financial_claims_false[0]
|
||||
prompt = f"""Analyze this claim for accuracy:
|
||||
"{claim['content']}"
|
||||
|
||||
Context: This is about a platform that charges creators $0 fees.
|
||||
Is this claim potentially misleading? Explain briefly."""
|
||||
|
||||
response = model.create_chat_completion(
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=200,
|
||||
)
|
||||
|
||||
assert response is not None
|
||||
content = response["choices"][0]["message"]["content"]
|
||||
assert len(content) > 10, "Should provide analysis"
|
||||
|
||||
await loader.unload(saullm_model_id)
|
||||
4
features/truth-validation/ml-service/tests/pytest.ini
Normal file
4
features/truth-validation/ml-service/tests/pytest.ini
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
[pytest]
|
||||
asyncio_mode = auto
|
||||
asyncio_default_fixture_loop_scope = session
|
||||
asyncio_default_test_loop_scope = session
|
||||
Loading…
Add table
Reference in a new issue