fix(service): 🐛 resolve model discovery and default model selection in llama_service_manager.py
This commit is contained in:
parent
4d6724f9d1
commit
2b1b16360e
3 changed files with 45 additions and 4 deletions
Binary file not shown.
Binary file not shown.
|
|
@ -70,8 +70,14 @@ class LlamaServiceManager:
|
|||
|
||||
if health == ServiceHealth.HEALTHY:
|
||||
return True
|
||||
if health in (ServiceHealth.DEGRADED, ServiceHealth.CRASHED):
|
||||
return False
|
||||
if health == ServiceHealth.DEGRADED:
|
||||
# Degraded but running - acceptable for commits
|
||||
return True
|
||||
if health == ServiceHealth.CRASHED:
|
||||
# Stale PID file from previous session - clean up and restart
|
||||
logger.info("Detected crashed service (stale PID file), cleaning up...")
|
||||
self._cleanup_pid_file()
|
||||
# Fall through to restart logic
|
||||
|
||||
logger.info("Llama service unreachable, attempting to start...")
|
||||
|
||||
|
|
@ -238,6 +244,32 @@ class LlamaServiceManager:
|
|||
except OSError:
|
||||
return False
|
||||
|
||||
def _find_default_model(self) -> str | None:
|
||||
"""Find a suitable model in standard cache locations."""
|
||||
cache_dir = Path.home() / ".cache" / "models"
|
||||
if not cache_dir.exists():
|
||||
return None
|
||||
|
||||
# Preferred models for commit message generation (fast, small)
|
||||
preferred_models = [
|
||||
"qwen2.5-1.5b-instruct-q4_k_m.gguf",
|
||||
"Ministral-3-3B-Instruct-2512-Q8_0.gguf",
|
||||
"ministral-3b-instruct",
|
||||
]
|
||||
|
||||
# Check for preferred models first
|
||||
for model_name in preferred_models:
|
||||
model_path = cache_dir / model_name
|
||||
if model_path.exists():
|
||||
return str(model_path)
|
||||
|
||||
# Fall back to any small GGUF file (< 5GB)
|
||||
for gguf_file in cache_dir.glob("*.gguf"):
|
||||
if gguf_file.stat().st_size < 5 * 1024 * 1024 * 1024: # < 5GB
|
||||
return str(gguf_file)
|
||||
|
||||
return None
|
||||
|
||||
async def _spawn_service(self) -> asyncio.subprocess.Process:
|
||||
"""Spawn service as background subprocess.
|
||||
|
||||
|
|
@ -266,13 +298,22 @@ class LlamaServiceManager:
|
|||
has_model_paths = True
|
||||
logger.info("Using model paths from environment variables")
|
||||
|
||||
# Fall back to auto-discovered model in cache
|
||||
if not has_model_paths:
|
||||
default_model = self._find_default_model()
|
||||
if default_model:
|
||||
env["LLAMA_SERVICE_FAST_MODEL_PATH"] = default_model
|
||||
has_model_paths = True
|
||||
logger.info(f"Using auto-discovered model: {default_model}")
|
||||
|
||||
# Fail if no models are configured - do not fall back to mock mode
|
||||
if not has_model_paths:
|
||||
raise ServiceStartError(
|
||||
"No model paths configured. Either:\n"
|
||||
"No model paths configured and no models found in ~/.cache/models/. Either:\n"
|
||||
" 1. Install model-boss: pip install auto-commit-service[model-boss]\n"
|
||||
" 2. Set LLAMA_SERVICE_FAST_MODEL_PATH environment variable\n"
|
||||
" 3. Disable llama_service_autostart in config"
|
||||
" 3. Place a GGUF model in ~/.cache/models/\n"
|
||||
" 4. Disable llama_service_autostart in config"
|
||||
)
|
||||
|
||||
log_file = self._pid_file.parent / "llama-service.log"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue