From 8b4c6c211ef7b065b4e8c93fe5920e1eb21035d2 Mon Sep 17 00:00:00 2001 From: Lilith Date: Sat, 10 Jan 2026 04:53:40 -0800 Subject: [PATCH] =?UTF-8?q?fix(codebase):=20=F0=9F=90=9B=20resolve=20model?= =?UTF-8?q?=20loading=20issues=20in=20ML=20service=20modules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ml-service/src/llm.py | 32 ++++++++++--------- .../ml-service/src/local_verifier.py | 4 +-- .../frontend-public/e2e/pages/LandingPage.ts | 9 +++--- .../webmap/database/seeds/001_websites.sql | 1 + 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/features/conversation-assistant/ml-service/src/llm.py b/features/conversation-assistant/ml-service/src/llm.py index b3f097a11..c09206bfe 100644 --- a/features/conversation-assistant/ml-service/src/llm.py +++ b/features/conversation-assistant/ml-service/src/llm.py @@ -12,6 +12,7 @@ from .config import settings if TYPE_CHECKING: from lilith_model_boss import ManagedModelLoader + from lilith_model_boss.llm.models import LoadedModel logger = get_logger(__name__) @@ -21,6 +22,7 @@ class LLMManager: def __init__(self) -> None: self._managed_loader: Optional["ManagedModelLoader"] = None + self._loaded_model: Optional["LoadedModel"] = None self._model_version: str = "not-loaded" self._model_id: Optional[str] = None @@ -73,7 +75,7 @@ class LLMManager: from lilith_model_boss import Priority - await self._managed_loader.load( + self._loaded_model = await self._managed_loader.load( model_id=model_to_load, priority=Priority.NORMAL, ) @@ -97,6 +99,7 @@ class LLMManager: except Exception as e: logger.warning(f"Error during managed unload: {e}") + self._loaded_model = None self._model_version = "not-loaded" self._model_id = None logger.info("Model unloaded") @@ -147,7 +150,7 @@ class LLMManager: logger.error(f"Rollback also failed: {rollback_error}") return False - def generate( + async def generate( self, prompt: str, max_tokens: int = 256, @@ -158,13 +161,16 @@ class LLMManager: ) -> tuple[str, int]: """Generate a response for the given prompt. + Uses the LoadedModel.chat() API from model-boss. + The prompt is sent as a user message. + Args: prompt: The input prompt max_tokens: Maximum tokens to generate temperature: Sampling temperature top_p: Nucleus sampling parameter - repeat_penalty: Repetition penalty - stop: Stop sequences + repeat_penalty: Repetition penalty (not used by chat API) + stop: Stop sequences (not used by chat API) Returns: Tuple of (generated text, estimated token count) @@ -172,23 +178,19 @@ class LLMManager: Raises: RuntimeError: If model not loaded or loader not configured """ - if self._managed_loader is None: - raise RuntimeError("ManagedModelLoader not configured") - - if self._model_id is None or not self.is_loaded: + if self._loaded_model is None: raise RuntimeError("Model not loaded") - default_stops = ["\nThem:", "\nMe:", "\n\n"] - stop_sequences = (stop or []) + default_stops + from lilith_model_boss.llm.models import ChatMessage - text = self._managed_loader.generate( - model_id=self._model_id, - prompt=prompt, + # Convert prompt to chat message format + messages = [ChatMessage(role="user", content=prompt)] + + text = await self._loaded_model.chat( + messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, - repeat_penalty=repeat_penalty, - stop=stop_sequences, ) # Estimate tokens (approximate) diff --git a/features/conversation-assistant/ml-service/src/local_verifier.py b/features/conversation-assistant/ml-service/src/local_verifier.py index 3eb9f3ac3..6f28bb024 100644 --- a/features/conversation-assistant/ml-service/src/local_verifier.py +++ b/features/conversation-assistant/ml-service/src/local_verifier.py @@ -62,8 +62,8 @@ class LocalVerifier: ) try: - # Call local LLM - response_text, tokens = llm_manager.generate( + # Call local LLM (async) + response_text, tokens = await llm_manager.generate( prompt=prompt, max_tokens=512, temperature=0.3, # Lower temperature for more consistent analysis diff --git a/features/marketplace/frontend-public/e2e/pages/LandingPage.ts b/features/marketplace/frontend-public/e2e/pages/LandingPage.ts index 6bfd7dea6..1bbed2bad 100644 --- a/features/marketplace/frontend-public/e2e/pages/LandingPage.ts +++ b/features/marketplace/frontend-public/e2e/pages/LandingPage.ts @@ -51,10 +51,11 @@ export class LandingPage { // Page sections - located by their heading text // Using main container to find sections by heading - this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose/i }).locator('..') - this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).locator('..') - this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).locator('..') - this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).locator('..') + // Benefits section is the first "Why ... Choose" heading (avoid matching social proof section) + this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose us/i }).first().locator('..') + this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).first().locator('..') + this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).first().locator('..') + this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).first().locator('..') } // Navigation diff --git a/features/webmap/database/seeds/001_websites.sql b/features/webmap/database/seeds/001_websites.sql index 9e6668d95..1b99c87a9 100644 --- a/features/webmap/database/seeds/001_websites.sql +++ b/features/webmap/database/seeds/001_websites.sql @@ -18,6 +18,7 @@ VALUES ( ARRAY[ 'atlilith.com', 'www.atlilith.com', + 'next.atlilith.com', 'next.www.atlilith.com', 'atlilith.localhost' ],