fix(codebase): 🐛 resolve model loading issues in ML service modules

2026-01-10 04:53:40 -08:00 · 2026-01-10 04:53:40 -08:00 · 8b4c6c211e
commit 8b4c6c211e
parent 924d46cbcc
4 changed files with 25 additions and 21 deletions
--- a/features/conversation-assistant/ml-service/src/llm.py
+++ b/features/conversation-assistant/ml-service/src/llm.py
@ -12,6 +12,7 @@ from .config import settings

 if TYPE_CHECKING:
    from lilith_model_boss import ManagedModelLoader
+    from lilith_model_boss.llm.models import LoadedModel

 logger = get_logger(__name__)

@ -21,6 +22,7 @@ class LLMManager:

    def __init__(self) -> None:
        self._managed_loader: Optional["ManagedModelLoader"] = None
+        self._loaded_model: Optional["LoadedModel"] = None
        self._model_version: str = "not-loaded"
        self._model_id: Optional[str] = None

@ -73,7 +75,7 @@ class LLMManager:

            from lilith_model_boss import Priority

-            await self._managed_loader.load(
+            self._loaded_model = await self._managed_loader.load(
                model_id=model_to_load,
                priority=Priority.NORMAL,
            )
@ -97,6 +99,7 @@ class LLMManager:
            except Exception as e:
                logger.warning(f"Error during managed unload: {e}")

+        self._loaded_model = None
        self._model_version = "not-loaded"
        self._model_id = None
        logger.info("Model unloaded")
@ -147,7 +150,7 @@ class LLMManager:
                    logger.error(f"Rollback also failed: {rollback_error}")
            return False

-    def generate(
+    async def generate(
        self,
        prompt: str,
        max_tokens: int = 256,
@ -158,13 +161,16 @@ class LLMManager:
    ) -> tuple[str, int]:
        """Generate a response for the given prompt.

+        Uses the LoadedModel.chat() API from model-boss.
+        The prompt is sent as a user message.
+
        Args:
            prompt: The input prompt
            max_tokens: Maximum tokens to generate
            temperature: Sampling temperature
            top_p: Nucleus sampling parameter
-            repeat_penalty: Repetition penalty
-            stop: Stop sequences
+            repeat_penalty: Repetition penalty (not used by chat API)
+            stop: Stop sequences (not used by chat API)

        Returns:
            Tuple of (generated text, estimated token count)
@ -172,23 +178,19 @@ class LLMManager:
        Raises:
            RuntimeError: If model not loaded or loader not configured
        """
-        if self._managed_loader is None:
-            raise RuntimeError("ManagedModelLoader not configured")
-
-        if self._model_id is None or not self.is_loaded:
+        if self._loaded_model is None:
            raise RuntimeError("Model not loaded")

-        default_stops = ["\nThem:", "\nMe:", "\n\n"]
-        stop_sequences = (stop or []) + default_stops
+        from lilith_model_boss.llm.models import ChatMessage

-        text = self._managed_loader.generate(
-            model_id=self._model_id,
-            prompt=prompt,
+        # Convert prompt to chat message format
+        messages = [ChatMessage(role="user", content=prompt)]
+
+        text = await self._loaded_model.chat(
+            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
-            repeat_penalty=repeat_penalty,
-            stop=stop_sequences,
        )

        # Estimate tokens (approximate)
--- a/features/conversation-assistant/ml-service/src/local_verifier.py
+++ b/features/conversation-assistant/ml-service/src/local_verifier.py
@ -62,8 +62,8 @@ class LocalVerifier:
        )

        try:
-            # Call local LLM
-            response_text, tokens = llm_manager.generate(
+            # Call local LLM (async)
+            response_text, tokens = await llm_manager.generate(
                prompt=prompt,
                max_tokens=512,
                temperature=0.3,  # Lower temperature for more consistent analysis
--- a/features/marketplace/frontend-public/e2e/pages/LandingPage.ts
+++ b/features/marketplace/frontend-public/e2e/pages/LandingPage.ts
@ -51,10 +51,11 @@ export class LandingPage {

    // Page sections - located by their heading text
    // Using main container to find sections by heading
-    this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose/i }).locator('..')
-    this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).locator('..')
-    this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).locator('..')
-    this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).locator('..')
+    // Benefits section is the first "Why ... Choose" heading (avoid matching social proof section)
+    this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose us/i }).first().locator('..')
+    this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).first().locator('..')
+    this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).first().locator('..')
+    this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).first().locator('..')
  }

  // Navigation
--- a/features/webmap/database/seeds/001_websites.sql
+++ b/features/webmap/database/seeds/001_websites.sql
@ -18,6 +18,7 @@ VALUES (
  ARRAY[
    'atlilith.com',
    'www.atlilith.com',
+    'next.atlilith.com',
    'next.www.atlilith.com',
    'atlilith.localhost'
  ],