fix(service): 🐛 resolve model discovery and default model selection in llama_service_manager.py

This commit is contained in:
Lilith 2026-01-05 17:32:07 -08:00
parent 4d6724f9d1
commit 2b1b16360e
3 changed files with 45 additions and 4 deletions

View file

@ -70,8 +70,14 @@ class LlamaServiceManager:
if health == ServiceHealth.HEALTHY:
return True
if health in (ServiceHealth.DEGRADED, ServiceHealth.CRASHED):
return False
if health == ServiceHealth.DEGRADED:
# Degraded but running - acceptable for commits
return True
if health == ServiceHealth.CRASHED:
# Stale PID file from previous session - clean up and restart
logger.info("Detected crashed service (stale PID file), cleaning up...")
self._cleanup_pid_file()
# Fall through to restart logic
logger.info("Llama service unreachable, attempting to start...")
@ -238,6 +244,32 @@ class LlamaServiceManager:
except OSError:
return False
def _find_default_model(self) -> str | None:
"""Find a suitable model in standard cache locations."""
cache_dir = Path.home() / ".cache" / "models"
if not cache_dir.exists():
return None
# Preferred models for commit message generation (fast, small)
preferred_models = [
"qwen2.5-1.5b-instruct-q4_k_m.gguf",
"Ministral-3-3B-Instruct-2512-Q8_0.gguf",
"ministral-3b-instruct",
]
# Check for preferred models first
for model_name in preferred_models:
model_path = cache_dir / model_name
if model_path.exists():
return str(model_path)
# Fall back to any small GGUF file (< 5GB)
for gguf_file in cache_dir.glob("*.gguf"):
if gguf_file.stat().st_size < 5 * 1024 * 1024 * 1024: # < 5GB
return str(gguf_file)
return None
async def _spawn_service(self) -> asyncio.subprocess.Process:
"""Spawn service as background subprocess.
@ -266,13 +298,22 @@ class LlamaServiceManager:
has_model_paths = True
logger.info("Using model paths from environment variables")
# Fall back to auto-discovered model in cache
if not has_model_paths:
default_model = self._find_default_model()
if default_model:
env["LLAMA_SERVICE_FAST_MODEL_PATH"] = default_model
has_model_paths = True
logger.info(f"Using auto-discovered model: {default_model}")
# Fail if no models are configured - do not fall back to mock mode
if not has_model_paths:
raise ServiceStartError(
"No model paths configured. Either:\n"
"No model paths configured and no models found in ~/.cache/models/. Either:\n"
" 1. Install model-boss: pip install auto-commit-service[model-boss]\n"
" 2. Set LLAMA_SERVICE_FAST_MODEL_PATH environment variable\n"
" 3. Disable llama_service_autostart in config"
" 3. Place a GGUF model in ~/.cache/models/\n"
" 4. Disable llama_service_autostart in config"
)
log_file = self._pid_file.parent / "llama-service.log"