auto-commit-service/src/auto_commit_service/service/manager.py

"""Llama service lifecycle manager."""

import asyncio
import fcntl
import logging
import os
import signal
import sys
import time
from enum import Enum
from pathlib import Path

import httpx

logger = logging.getLogger(__name__)


class ServiceManagerError(Exception):
    """Base exception for service manager errors."""


class ServiceStartError(ServiceManagerError):
    """Failed to start service."""


class ServiceCrashError(ServiceManagerError):
    """Service crashed unexpectedly."""


class ServiceHealth(str, Enum):
    """Service health status."""

    HEALTHY = "healthy"
    DEGRADED = "degraded"
    CRASHED = "crashed"
    UNREACHABLE = "unreachable"


class LlamaServiceManager:
    """Manages llama service lifecycle as subprocess."""

    def __init__(
        self,
        service_url: str = "http://localhost:8000",
        pid_file: Path | None = None,
        lock_file: Path | None = None,
        startup_timeout: float = 30.0,
        health_check_timeout: float = 5.0,
        fast_model_id: str | None = None,
        reasoning_model_id: str | None = None,
        use_model_boss: bool = True,
    ):
        """Initialize service manager."""
        self.service_url = service_url
        self._pid_file = pid_file or Path.home() / ".config/commits/llama-service.pid"
        self._lock_file = lock_file or Path.home() / ".config/commits/llama-service.lock"
        self._startup_timeout = startup_timeout
        self._health_check_timeout = health_check_timeout
        self._fast_model_id = fast_model_id
        self._reasoning_model_id = reasoning_model_id
        self._use_model_boss = use_model_boss
        self._spawned_pid: int | None = None
        self._lock_fd: int | None = None
        self._resolved_fast_model_path: str | None = None
        self._resolved_reasoning_model_path: str | None = None

    async def ensure_service_available(self) -> bool:
        """Ensure service is available, starting if necessary."""
        health = await self.check_health()

        if health == ServiceHealth.HEALTHY:
            return True
        if health == ServiceHealth.DEGRADED:
            # Degraded but running - acceptable for commits
            return True
        if health == ServiceHealth.CRASHED:
            # Stale PID file from previous session - clean up and restart
            logger.info("Detected crashed service (stale PID file), cleaning up...")
            self._cleanup_pid_file()
            # Fall through to restart logic

        logger.info("Llama service unreachable, attempting to start...")

        try:
            # Resolve model paths via model-boss before starting
            if self._use_model_boss and self._fast_model_id:
                await self._resolve_model_paths()

            return await self.start_service()
        except ServiceStartError as e:
            logger.error(f"Failed to start llama-service: {e}")
            return False

    async def _resolve_model_paths(self) -> None:
        """Resolve model IDs to paths via model-boss.

        Raises:
            ServiceStartError: If model resolution fails
        """
        try:
            from lilith_model_boss import ensure_model

            if self._fast_model_id and not self._resolved_fast_model_path:
                logger.info(f"Resolving fast model via model-boss: {self._fast_model_id}")
                self._resolved_fast_model_path = ensure_model(self._fast_model_id)
                logger.info(f"Resolved fast model path: {self._resolved_fast_model_path}")

            if self._reasoning_model_id and not self._resolved_reasoning_model_path:
                logger.info(f"Resolving reasoning model via model-boss: {self._reasoning_model_id}")
                self._resolved_reasoning_model_path = ensure_model(self._reasoning_model_id)
                logger.info(f"Resolved reasoning model path: {self._resolved_reasoning_model_path}")

        except ImportError:
            raise ServiceStartError(
                "model-boss not installed. Install with: pip install auto-commit-service[model-boss]"
            )
        except Exception as e:
            raise ServiceStartError(f"Failed to resolve model paths: {e}")

    async def start_service(self) -> bool:
        """Start llama service subprocess."""
        pid = self._read_pid_file()
        if pid and self._is_process_alive(pid):
            logger.info(f"Service already running (PID: {pid})")
            return True

        if not self._acquire_lock():
            await asyncio.sleep(2)
            pid = self._read_pid_file()
            if pid and self._is_process_alive(pid):
                return True
            return False

        try:
            pid = self._read_pid_file()
            if pid and self._is_process_alive(pid):
                return True

            logger.info("Starting llama service subprocess...")
            process = await self._spawn_service()
            self._spawned_pid = process.pid
            self._write_pid_file(process.pid)
            logger.info(f"Llama service started (PID: {process.pid})")

            if await self._wait_for_healthy(self._startup_timeout):
                logger.info("✓ Llama service is healthy")
                return True
            else:
                logger.error(f"✗ Service failed to start within {self._startup_timeout}s")
                return False

        except Exception as e:
            logger.exception(f"Failed to start llama service: {e}")
            return False
        finally:
            self._release_lock()

    async def check_health(self) -> ServiceHealth:
        """Check service health and detect crashes."""
        pid = self._read_pid_file()
        if pid and not self._is_process_alive(pid):
            return ServiceHealth.CRASHED

        try:
            async with httpx.AsyncClient(timeout=self._health_check_timeout) as client:
                response = await client.get(f"{self.service_url}/health")
                if response.status_code == 200:
                    data = response.json()
                    return ServiceHealth.HEALTHY if data.get("status") == "ok" else ServiceHealth.DEGRADED
                return ServiceHealth.UNREACHABLE
        except (httpx.ConnectError, httpx.TimeoutException):
            return ServiceHealth.UNREACHABLE
        except Exception:
            return ServiceHealth.UNREACHABLE

    async def stop_service(self) -> None:
        """Gracefully stop service if we own it."""
        if self._spawned_pid is None or not self._is_process_alive(self._spawned_pid):
            return

        logger.info(f"Stopping llama service (PID: {self._spawned_pid})...")
        try:
            os.kill(self._spawned_pid, signal.SIGTERM)
            for _ in range(10):
                if not self._is_process_alive(self._spawned_pid):
                    self._cleanup_pid_file()
                    return
                await asyncio.sleep(0.5)
            os.kill(self._spawned_pid, signal.SIGKILL)
            self._cleanup_pid_file()
        except ProcessLookupError:
            self._cleanup_pid_file()
        except Exception as e:
            logger.exception(f"Error stopping service: {e}")

    def _acquire_lock(self) -> bool:
        """Acquire exclusive lock."""
        self._lock_file.parent.mkdir(parents=True, exist_ok=True)
        try:
            self._lock_fd = os.open(self._lock_file, os.O_CREAT | os.O_WRONLY)
            fcntl.flock(self._lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
            return True
        except BlockingIOError:
            return False

    def _release_lock(self) -> None:
        """Release lock."""
        if self._lock_fd is not None:
            try:
                fcntl.flock(self._lock_fd, fcntl.LOCK_UN)
                os.close(self._lock_fd)
                self._lock_fd = None
            except Exception:
                pass

    def _read_pid_file(self) -> int | None:
        """Read PID from file."""
        if not self._pid_file.exists():
            return None
        try:
            content = self._pid_file.read_text().strip()
            return int(content) if content else None
        except Exception:
            return None

    def _write_pid_file(self, pid: int) -> None:
        """Write PID to file."""
        self._pid_file.parent.mkdir(parents=True, exist_ok=True)
        self._pid_file.write_text(str(pid))

    def _cleanup_pid_file(self) -> None:
        """Remove PID file."""
        try:
            if self._pid_file.exists():
                self._pid_file.unlink()
        except Exception:
            pass

    def _is_process_alive(self, pid: int) -> bool:
        """Check if process is alive."""
        try:
            os.kill(pid, 0)
            return True
        except OSError:
            return False

    def _find_default_model(self) -> str | None:
        """Find a suitable model in standard cache locations."""
        cache_dir = Path.home() / ".cache" / "models"
        if not cache_dir.exists():
            return None

        # Preferred models for commit message generation (fast, small)
        preferred_models = [
            "qwen2.5-1.5b-instruct-q4_k_m.gguf",
            "Ministral-3-3B-Instruct-2512-Q8_0.gguf",
            "ministral-3b-instruct",
        ]

        # Check for preferred models first
        for model_name in preferred_models:
            model_path = cache_dir / model_name
            if model_path.exists():
                return str(model_path)

        # Fall back to any small GGUF file (< 5GB)
        for gguf_file in cache_dir.glob("*.gguf"):
            if gguf_file.stat().st_size < 5 * 1024 * 1024 * 1024:  # < 5GB
                return str(gguf_file)

        return None

    async def _spawn_service(self) -> asyncio.subprocess.Process:
        """Spawn service as background subprocess.

        Raises:
            ServiceStartError: If no model paths are configured
        """
        cmd = [sys.executable, "-m", "lilith_llama_service"]
        env = os.environ.copy()

        # Use resolved model paths from model-boss if available
        has_model_paths = False

        if self._resolved_fast_model_path:
            env["LLAMA_SERVICE_FAST_MODEL_PATH"] = self._resolved_fast_model_path
            has_model_paths = True
            logger.info(f"Using fast model: {self._resolved_fast_model_path}")

        if self._resolved_reasoning_model_path:
            env["LLAMA_SERVICE_REASONING_MODEL_PATH"] = self._resolved_reasoning_model_path
            has_model_paths = True
            logger.info(f"Using reasoning model: {self._resolved_reasoning_model_path}")

        # Fall back to environment variables if set
        if not has_model_paths:
            if "LLAMA_SERVICE_FAST_MODEL_PATH" in env or "LLAMA_SERVICE_REASONING_MODEL_PATH" in env:
                has_model_paths = True
                logger.info("Using model paths from environment variables")

        # Fall back to auto-discovered model in cache
        if not has_model_paths:
            default_model = self._find_default_model()
            if default_model:
                env["LLAMA_SERVICE_FAST_MODEL_PATH"] = default_model
                has_model_paths = True
                logger.info(f"Using auto-discovered model: {default_model}")

        # Fail if no models are configured - do not fall back to mock mode
        if not has_model_paths:
            raise ServiceStartError(
                "No model paths configured and no models found in ~/.cache/models/. Either:\n"
                "  1. Install model-boss: pip install auto-commit-service[model-boss]\n"
                "  2. Set LLAMA_SERVICE_FAST_MODEL_PATH environment variable\n"
                "  3. Place a GGUF model in ~/.cache/models/\n"
                "  4. Disable llama_service_autostart in config"
            )

        log_file = self._pid_file.parent / "llama-service.log"
        log_file.parent.mkdir(parents=True, exist_ok=True)

        with open(log_file, "a") as log:
            log.write(f"\n=== Service started at {time.ctime()} ===\n")
            log.write(f"Fast model: {env.get('LLAMA_SERVICE_FAST_MODEL_PATH', 'not set')}\n")
            log.write(f"Reasoning model: {env.get('LLAMA_SERVICE_REASONING_MODEL_PATH', 'not set')}\n")

            process = await asyncio.create_subprocess_exec(
                *cmd,
                env=env,
                stdout=log,
                stderr=asyncio.subprocess.STDOUT,
                start_new_session=True,
            )
        return process

    async def _wait_for_healthy(self, timeout: float) -> bool:
        """Wait for service to become healthy."""
        start = time.time()
        while time.time() - start < timeout:
            try:
                async with httpx.AsyncClient(timeout=2.0) as client:
                    response = await client.get(f"{self.service_url}/health")
                    if response.status_code == 200:
                        data = response.json()
                        if data.get("status") == "ok":
                            return True
            except Exception:
                pass
            await asyncio.sleep(1)
        return False