From 8b4c6c211ef7b065b4e8c93fe5920e1eb21035d2 Mon Sep 17 00:00:00 2001
From: Lilith <lilith@apricot.voyager.nasty.sh>
Date: Sat, 10 Jan 2026 04:53:40 -0800
Subject: [PATCH] =?UTF-8?q?fix(codebase):=20=F0=9F=90=9B=20resolve=20model?=
 =?UTF-8?q?=20loading=20issues=20in=20ML=20service=20modules?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../ml-service/src/llm.py                     | 32 ++++++++++---------
 .../ml-service/src/local_verifier.py          |  4 +--
 .../frontend-public/e2e/pages/LandingPage.ts  |  9 +++---
 .../webmap/database/seeds/001_websites.sql    |  1 +
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/features/conversation-assistant/ml-service/src/llm.py b/features/conversation-assistant/ml-service/src/llm.py
index b3f097a11..c09206bfe 100644
--- a/features/conversation-assistant/ml-service/src/llm.py
+++ b/features/conversation-assistant/ml-service/src/llm.py
@@ -12,6 +12,7 @@ from .config import settings
 
 if TYPE_CHECKING:
     from lilith_model_boss import ManagedModelLoader
+    from lilith_model_boss.llm.models import LoadedModel
 
 logger = get_logger(__name__)
 
@@ -21,6 +22,7 @@ class LLMManager:
 
     def __init__(self) -> None:
         self._managed_loader: Optional["ManagedModelLoader"] = None
+        self._loaded_model: Optional["LoadedModel"] = None
         self._model_version: str = "not-loaded"
         self._model_id: Optional[str] = None
 
@@ -73,7 +75,7 @@ class LLMManager:
 
             from lilith_model_boss import Priority
 
-            await self._managed_loader.load(
+            self._loaded_model = await self._managed_loader.load(
                 model_id=model_to_load,
                 priority=Priority.NORMAL,
             )
@@ -97,6 +99,7 @@ class LLMManager:
             except Exception as e:
                 logger.warning(f"Error during managed unload: {e}")
 
+        self._loaded_model = None
         self._model_version = "not-loaded"
         self._model_id = None
         logger.info("Model unloaded")
@@ -147,7 +150,7 @@ class LLMManager:
                     logger.error(f"Rollback also failed: {rollback_error}")
             return False
 
-    def generate(
+    async def generate(
         self,
         prompt: str,
         max_tokens: int = 256,
@@ -158,13 +161,16 @@ class LLMManager:
     ) -> tuple[str, int]:
         """Generate a response for the given prompt.
 
+        Uses the LoadedModel.chat() API from model-boss.
+        The prompt is sent as a user message.
+
         Args:
             prompt: The input prompt
             max_tokens: Maximum tokens to generate
             temperature: Sampling temperature
             top_p: Nucleus sampling parameter
-            repeat_penalty: Repetition penalty
-            stop: Stop sequences
+            repeat_penalty: Repetition penalty (not used by chat API)
+            stop: Stop sequences (not used by chat API)
 
         Returns:
             Tuple of (generated text, estimated token count)
@@ -172,23 +178,19 @@ class LLMManager:
         Raises:
             RuntimeError: If model not loaded or loader not configured
         """
-        if self._managed_loader is None:
-            raise RuntimeError("ManagedModelLoader not configured")
-
-        if self._model_id is None or not self.is_loaded:
+        if self._loaded_model is None:
             raise RuntimeError("Model not loaded")
 
-        default_stops = ["\nThem:", "\nMe:", "\n\n"]
-        stop_sequences = (stop or []) + default_stops
+        from lilith_model_boss.llm.models import ChatMessage
 
-        text = self._managed_loader.generate(
-            model_id=self._model_id,
-            prompt=prompt,
+        # Convert prompt to chat message format
+        messages = [ChatMessage(role="user", content=prompt)]
+
+        text = await self._loaded_model.chat(
+            messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
-            repeat_penalty=repeat_penalty,
-            stop=stop_sequences,
         )
 
         # Estimate tokens (approximate)
diff --git a/features/conversation-assistant/ml-service/src/local_verifier.py b/features/conversation-assistant/ml-service/src/local_verifier.py
index 3eb9f3ac3..6f28bb024 100644
--- a/features/conversation-assistant/ml-service/src/local_verifier.py
+++ b/features/conversation-assistant/ml-service/src/local_verifier.py
@@ -62,8 +62,8 @@ class LocalVerifier:
         )
 
         try:
-            # Call local LLM
-            response_text, tokens = llm_manager.generate(
+            # Call local LLM (async)
+            response_text, tokens = await llm_manager.generate(
                 prompt=prompt,
                 max_tokens=512,
                 temperature=0.3,  # Lower temperature for more consistent analysis
diff --git a/features/marketplace/frontend-public/e2e/pages/LandingPage.ts b/features/marketplace/frontend-public/e2e/pages/LandingPage.ts
index 6bfd7dea6..1bbed2bad 100644
--- a/features/marketplace/frontend-public/e2e/pages/LandingPage.ts
+++ b/features/marketplace/frontend-public/e2e/pages/LandingPage.ts
@@ -51,10 +51,11 @@ export class LandingPage {
 
     // Page sections - located by their heading text
     // Using main container to find sections by heading
-    this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose/i }).locator('..')
-    this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).locator('..')
-    this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).locator('..')
-    this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).locator('..')
+    // Benefits section is the first "Why ... Choose" heading (avoid matching social proof section)
+    this.benefitsSection = page.locator('main').locator('h2').filter({ hasText: /why.*choose us/i }).first().locator('..')
+    this.socialProofSection = page.locator('main').locator('h2').filter({ hasText: /providers who|why clients choose trustedmeet/i }).first().locator('..')
+    this.faqSection = page.locator('main').locator('h2').filter({ hasText: /frequently asked/i }).first().locator('..')
+    this.ctaBanner = page.locator('main').locator('h2').filter({ hasText: /ready to/i }).first().locator('..')
   }
 
   // Navigation
diff --git a/features/webmap/database/seeds/001_websites.sql b/features/webmap/database/seeds/001_websites.sql
index 9e6668d95..1b99c87a9 100644
--- a/features/webmap/database/seeds/001_websites.sql
+++ b/features/webmap/database/seeds/001_websites.sql
@@ -18,6 +18,7 @@ VALUES (
   ARRAY[
     'atlilith.com',
     'www.atlilith.com',
+    'next.atlilith.com',
     'next.www.atlilith.com',
     'atlilith.localhost'
   ],