fix(service): 🐛 resolve model discovery and default model selection in llama_service_manager.py

2026-01-05 17:32:07 -08:00 · 2026-01-05 17:32:07 -08:00 · 2b1b16360e
commit 2b1b16360e
parent 4d6724f9d1
3 changed files with 45 additions and 4 deletions
--- a/src/auto_commit_service/pycache/config.cpython-312.pyc
+++ b/src/auto_commit_service/pycache/config.cpython-312.pyc
--- a/src/auto_commit_service/service/pycache/manager.cpython-312.pyc
+++ b/src/auto_commit_service/service/pycache/manager.cpython-312.pyc
--- a/src/auto_commit_service/service/manager.py
+++ b/src/auto_commit_service/service/manager.py
@ -70,8 +70,14 @@ class LlamaServiceManager:

        if health == ServiceHealth.HEALTHY:
            return True
-        if health in (ServiceHealth.DEGRADED, ServiceHealth.CRASHED):
-            return False
+        if health == ServiceHealth.DEGRADED:
+            # Degraded but running - acceptable for commits
+            return True
+        if health == ServiceHealth.CRASHED:
+            # Stale PID file from previous session - clean up and restart
+            logger.info("Detected crashed service (stale PID file), cleaning up...")
+            self._cleanup_pid_file()
+            # Fall through to restart logic

        logger.info("Llama service unreachable, attempting to start...")

@ -238,6 +244,32 @@ class LlamaServiceManager:
        except OSError:
            return False

+    def _find_default_model(self) -> str | None:
+        """Find a suitable model in standard cache locations."""
+        cache_dir = Path.home() / ".cache" / "models"
+        if not cache_dir.exists():
+            return None
+
+        # Preferred models for commit message generation (fast, small)
+        preferred_models = [
+            "qwen2.5-1.5b-instruct-q4_k_m.gguf",
+            "Ministral-3-3B-Instruct-2512-Q8_0.gguf",
+            "ministral-3b-instruct",
+        ]
+
+        # Check for preferred models first
+        for model_name in preferred_models:
+            model_path = cache_dir / model_name
+            if model_path.exists():
+                return str(model_path)
+
+        # Fall back to any small GGUF file (< 5GB)
+        for gguf_file in cache_dir.glob("*.gguf"):
+            if gguf_file.stat().st_size < 5 * 1024 * 1024 * 1024:  # < 5GB
+                return str(gguf_file)
+
+        return None
+
    async def _spawn_service(self) -> asyncio.subprocess.Process:
        """Spawn service as background subprocess.

@ -266,13 +298,22 @@ class LlamaServiceManager:
                has_model_paths = True
                logger.info("Using model paths from environment variables")

+        # Fall back to auto-discovered model in cache
+        if not has_model_paths:
+            default_model = self._find_default_model()
+            if default_model:
+                env["LLAMA_SERVICE_FAST_MODEL_PATH"] = default_model
+                has_model_paths = True
+                logger.info(f"Using auto-discovered model: {default_model}")
+
        # Fail if no models are configured - do not fall back to mock mode
        if not has_model_paths:
            raise ServiceStartError(
-                "No model paths configured. Either:\n"
+                "No model paths configured and no models found in ~/.cache/models/. Either:\n"
                "  1. Install model-boss: pip install auto-commit-service[model-boss]\n"
                "  2. Set LLAMA_SERVICE_FAST_MODEL_PATH environment variable\n"
-                "  3. Disable llama_service_autostart in config"
+                "  3. Place a GGUF model in ~/.cache/models/\n"
+                "  4. Disable llama_service_autostart in config"
            )

        log_file = self._pid_file.parent / "llama-service.log"