fix(codebase): 🐛 resolve model loading issues in ML service modules
This commit is contained in:
parent
924d46cbcc
commit
8b4c6c211e
4 changed files with 25 additions and 21 deletions
|
|
@ -12,6 +12,7 @@ from .config import settings
|
|||
|
||||
if TYPE_CHECKING:
|
||||
from lilith_model_boss import ManagedModelLoader
|
||||
from lilith_model_boss.llm.models import LoadedModel
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
|
@ -21,6 +22,7 @@ class LLMManager:
|
|||
|
||||
def __init__(self) -> None:
|
||||
self._managed_loader: Optional["ManagedModelLoader"] = None
|
||||
self._loaded_model: Optional["LoadedModel"] = None
|
||||
self._model_version: str = "not-loaded"
|
||||
self._model_id: Optional[str] = None
|
||||
|
||||
|
|
@ -73,7 +75,7 @@ class LLMManager:
|
|||
|
||||
from lilith_model_boss import Priority
|
||||
|
||||
await self._managed_loader.load(
|
||||
self._loaded_model = await self._managed_loader.load(
|
||||
model_id=model_to_load,
|
||||
priority=Priority.NORMAL,
|
||||
)
|
||||
|
|
@ -97,6 +99,7 @@ class LLMManager:
|
|||
except Exception as e:
|
||||
logger.warning(f"Error during managed unload: {e}")
|
||||
|
||||
self._loaded_model = None
|
||||
self._model_version = "not-loaded"
|
||||
self._model_id = None
|
||||
logger.info("Model unloaded")
|
||||
|
|
@ -147,7 +150,7 @@ class LLMManager:
|
|||
logger.error(f"Rollback also failed: {rollback_error}")
|
||||
return False
|
||||
|
||||
def generate(
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
max_tokens: int = 256,
|
||||
|
|
@ -158,13 +161,16 @@ class LLMManager:
|
|||
) -> tuple[str, int]:
|
||||
"""Generate a response for the given prompt.
|
||||
|
||||
Uses the LoadedModel.chat() API from model-boss.
|
||||
The prompt is sent as a user message.
|
||||
|
||||
Args:
|
||||
prompt: The input prompt
|
||||
max_tokens: Maximum tokens to generate
|
||||
temperature: Sampling temperature
|
||||
top_p: Nucleus sampling parameter
|
||||
repeat_penalty: Repetition penalty
|
||||
stop: Stop sequences
|
||||
repeat_penalty: Repetition penalty (not used by chat API)
|
||||
stop: Stop sequences (not used by chat API)
|
||||
|
||||
Returns:
|
||||
Tuple of (generated text, estimated token count)
|
||||
|
|
@ -172,23 +178,19 @@ class LLMManager:
|
|||
Raises:
|
||||
RuntimeError: If model not loaded or loader not configured
|
||||
"""
|
||||
if self._managed_loader is None:
|
||||
raise RuntimeError("ManagedModelLoader not configured")
|
||||
|
||||
if self._model_id is None or not self.is_loaded:
|
||||
if self._loaded_model is None:
|
||||
raise RuntimeError("Model not loaded")
|
||||
|
||||
default_stops = ["\nThem:", "\nMe:", "\n\n"]
|
||||
stop_sequences = (stop or []) + default_stops
|
||||
from lilith_model_boss.llm.models import ChatMessage
|
||||
|
||||
text = self._managed_loader.generate(
|
||||
model_id=self._model_id,
|
||||
prompt=prompt,
|
||||
# Convert prompt to chat message format
|
||||
messages = [ChatMessage(role="user", content=prompt)]
|
||||
|
||||
text = await self._loaded_model.chat(
|
||||
messages=messages,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
repeat_penalty=repeat_penalty,
|
||||
stop=stop_sequences,
|
||||
)
|
||||
|
||||
# Estimate tokens (approximate)
|
||||
|
|
|
|||
|
|
@ -62,8 +62,8 @@ class LocalVerifier:
|
|||
)
|
||||
|
||||
try:
|
||||
# Call local LLM
|
||||
response_text, tokens = llm_manager.generate(
|
||||
# Call local LLM (async)
|
||||
response_text, tokens = await llm_manager.generate(
|
||||
prompt=prompt,
|
||||
max_tokens=512,
|
||||
temperature=0.3, # Lower temperature for more consistent analysis
|
||||
|
|
|
|||
|
|
@ -51,10 +51,11 @@ export class LandingPage {
|
|||
|
||||
// Page sections - located by their heading text
|
||||
// Using main container to find sections by heading
|
||||
this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose/i }).locator('..')
|
||||
this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).locator('..')
|
||||
this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).locator('..')
|
||||
this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).locator('..')
|
||||
// Benefits section is the first "Why ... Choose" heading (avoid matching social proof section)
|
||||
this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose us/i }).first().locator('..')
|
||||
this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).first().locator('..')
|
||||
this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).first().locator('..')
|
||||
this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).first().locator('..')
|
||||
}
|
||||
|
||||
// Navigation
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ VALUES (
|
|||
ARRAY[
|
||||
'atlilith.com',
|
||||
'www.atlilith.com',
|
||||
'next.atlilith.com',
|
||||
'next.www.atlilith.com',
|
||||
'atlilith.localhost'
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue