Compare commits

...

10 commits

Author SHA1 Message Date
autocommit
797bcf9973 deps-upgrade(deps): ⬆️ Update runtime and build dependencies in pyproject.toml to latest versions
Some checks failed
Publish / publish (push) Failing after 0s
Publish to PyPI / Build and Publish (push) Failing after 50s
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-04-12 00:22:21 -07:00
Claude Code
84a6046386 deps-upgrade(python): ⬆️ Update Python dependencies to latest stable versions in pyproject.toml
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-03-25 05:48:19 -07:00
Lilith
adaa2ec58d chore(bootstrap): 🔧 Update FastAPI bootstrap config and Docker autostart handlers
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:51:45 -08:00
Lilith
b9ca94a0eb deps-add(config-or-deps-if-more): Update project dependencies in pyproject.toml (version bumps, new additions, or pinning)
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:51:45 -08:00
Lilith
c259fe6cbe chore(lilith_service_fastapi): 🔧 Update FastAPI lifespan configuration in lifespan.py
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:32:47 -08:00
Lilith
22e4984af8 chore(lilith_service_fasta): 🔧 adjust cleanup timing in lifespan.py for optimized resource lifecycle management
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:19:27 -08:00
Lilith
1fde135419 chore(fastapi-lifespan): 🔧 implemented improved lifecycle hooks for graceful startup and shutdown in FastAPI applications
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:00:19 -08:00
Lilith
56f82816be chore(config): 🔧 Update dependency versions in pyproject.toml (upgrades, pinning, or version adjustments)
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-15 10:00:19 -08:00
Lilith
f053ba727c chore: migrate to DRY reusable workflow 2026-01-21 12:50:18 -08:00
Lilith
3588165488 ci: add Forgejo Actions workflow for PyPI publishing 2026-01-21 12:36:40 -08:00
5 changed files with 279 additions and 13 deletions

View file

@ -0,0 +1,7 @@
# Calls reusable workflow from lilith/workflows
name: Publish
on: [push, workflow_dispatch]
jobs:
publish:
uses: lilith/workflows/.forgejo/workflows/publish-pypi.yml@main
secrets: inherit

View file

@ -4,11 +4,11 @@ build-backend = "hatchling.build"
[project]
name = "lilith-service-fastapi-bootstrap"
version = "4.0.0"
version = "4.2.0"
description = "Base utilities for FastAPI microservices with optional ML support"
requires-python = ">=3.10"
license = "MIT"
authors = [{ name = "Lilith", email = "dev@atlilith.com" }]
authors = [{ name = "Lilith", email = "quinn@ftw.codes" }]
keywords = ["fastapi", "microservice", "bootstrap", "ml", "ai"]
classifiers = [
"Development Status :: 5 - Production/Stable",
@ -32,7 +32,7 @@ dependencies = [
redis = ["redis>=5.0.0"]
database = ["sqlalchemy>=2.0.0", "asyncpg>=0.29.0"]
ml = [
"lilith-model-boss>=3.0.0",
"lilith-model-boss>=4.0.0",
"lilith-model-boss-loaders>=1.0.0",
]
service-addresses = ["lilith-service-addresses>=1.0.0"] # For dependency startup (v2.0+)

View file

@ -96,6 +96,9 @@ from .exceptions import (
ConnectionError,
)
# Docker auto-start (fail-fast dependency management)
from .docker_autostart import ensure_container_running, ContainerStartError
# Content Generation
from .content_generation import (
ContentGenerationSettings,
@ -210,4 +213,7 @@ __all__ = [
"ServiceConfig",
"ServiceInfo",
"create_service_discovery_client",
# Docker auto-start
"ensure_container_running",
"ContainerStartError",
]

View file

@ -0,0 +1,187 @@
"""Docker container auto-start utility for fail-fast service dependencies.
Services that depend on Docker containers (Redis, PostgreSQL, etc.) use this
to ensure containers are running at boot. If a container can't be started,
the service fails immediately no graceful degradation.
Usage:
from lilith_service_fastapi_bootstrap.docker_autostart import ensure_container_running
# Ensure model-boss-redis is up before connecting
await ensure_container_running("model-boss-redis")
# Ensure with custom timeout
await ensure_container_running("lilith-analytics-redis", timeout=30)
"""
import asyncio
import logging
import shutil
import subprocess
logger = logging.getLogger(__name__)
DOCKER_BIN: str | None = shutil.which("docker")
class ContainerStartError(RuntimeError):
"""Raised when a required Docker container cannot be started."""
def _run_docker(*args: str, timeout: int = 15) -> subprocess.CompletedProcess[str]:
"""Run a docker CLI command synchronously.
Raises:
ContainerStartError: If docker binary is not found.
"""
if DOCKER_BIN is None:
raise ContainerStartError(
"Docker CLI not found on PATH. "
"Cannot auto-start container dependencies. "
"Install Docker or start containers manually."
)
return subprocess.run(
[DOCKER_BIN, *args],
capture_output=True,
text=True,
timeout=timeout,
)
def _is_container_running(container_name: str) -> bool:
"""Check if a Docker container is currently running."""
result = _run_docker(
"inspect",
"--format",
"{{.State.Running}}",
container_name,
)
return result.returncode == 0 and result.stdout.strip() == "true"
def _container_exists(container_name: str) -> bool:
"""Check if a Docker container exists (running or stopped)."""
result = _run_docker(
"inspect",
"--format",
"{{.State.Status}}",
container_name,
)
return result.returncode == 0
def _start_container(container_name: str) -> None:
"""Start an existing Docker container.
Raises:
ContainerStartError: If the container fails to start.
"""
logger.info(f"Starting Docker container: {container_name}")
result = _run_docker("start", container_name)
if result.returncode != 0:
raise ContainerStartError(
f"Failed to start container '{container_name}': {result.stderr.strip()}"
)
async def _wait_for_healthy(
container_name: str,
timeout: float,
poll_interval: float = 0.5,
) -> None:
"""Wait for a container to be running and responding.
Checks both Docker 'running' state and healthcheck status (if configured).
Raises:
ContainerStartError: If container doesn't become healthy within timeout.
"""
elapsed = 0.0
while elapsed < timeout:
if not _is_container_running(container_name):
await asyncio.sleep(poll_interval)
elapsed += poll_interval
continue
# Check healthcheck status if the container has one
result = _run_docker(
"inspect",
"--format",
"{{if .State.Health}}{{.State.Health.Status}}{{else}}none{{end}}",
container_name,
)
health_status = result.stdout.strip()
if health_status == "none":
# No healthcheck configured — running state is sufficient
return
elif health_status == "healthy":
return
elif health_status == "unhealthy":
raise ContainerStartError(
f"Container '{container_name}' is unhealthy. "
f"Check: docker logs {container_name}"
)
# Still starting — wait and poll again
await asyncio.sleep(poll_interval)
elapsed += poll_interval
raise ContainerStartError(
f"Container '{container_name}' did not become healthy within {timeout}s. "
f"Check: docker logs {container_name}"
)
async def ensure_container_running(
container_name: str,
timeout: float = 15.0,
) -> None:
"""Ensure a Docker container is running and healthy.
If the container exists but is stopped, starts it.
If the container doesn't exist, raises an error (it must be defined in docker-compose).
If the container can't reach healthy state within timeout, raises an error.
This is a fail-fast utility services should NOT catch ContainerStartError
and degrade gracefully. If a required dependency can't start, the service
must fail to start.
Args:
container_name: Docker container name (e.g., "model-boss-redis").
timeout: Max seconds to wait for healthy state.
Raises:
ContainerStartError: If container cannot be started or doesn't become healthy.
"""
# Already running — fast path
if _is_container_running(container_name):
logger.debug(f"Container already running: {container_name}")
return
# Container exists but stopped — start it
if _container_exists(container_name):
_start_container(container_name)
await _wait_for_healthy(container_name, timeout)
logger.info(f"Container started and healthy: {container_name}")
return
# Container doesn't exist — try docker-compose up for it
logger.info(f"Container '{container_name}' not found, attempting docker compose up")
compose_result = _run_docker(
"compose",
"up",
"-d",
container_name,
timeout=30,
)
if compose_result.returncode != 0:
raise ContainerStartError(
f"Container '{container_name}' does not exist and docker compose up failed: "
f"{compose_result.stderr.strip()}. "
f"Ensure the container is defined in docker-compose.yml."
)
await _wait_for_healthy(container_name, timeout)
logger.info(f"Container created and healthy: {container_name}")

View file

@ -286,11 +286,18 @@ class GPULifespanManager(LifespanManager):
def __init__(
self,
redis_url: str = "redis://localhost:6379",
auto_start_services: bool = False,
redis_container_name: str = "model-boss-redis",
) -> None:
"""Initialize GPU-aware lifespan manager.
Args:
redis_url: Redis URL for GPUBoss coordination.
auto_start_services: Whether GPUBoss should auto-start Redis/services.
Defaults to False expects Redis to be running externally
(via docker-compose, systemd, or dev cluster).
redis_container_name: Docker container name for the model-boss Redis
instance. Auto-started if not running.
Note:
In v4.0+, GPU auto-detection and watchdog are enabled by default.
@ -298,6 +305,8 @@ class GPULifespanManager(LifespanManager):
"""
super().__init__()
self._redis_url = redis_url
self._auto_start_services = auto_start_services
self._redis_container_name = redis_container_name
# Will be initialized on startup
self._boss: GPUBoss | None = None
@ -346,24 +355,38 @@ class GPULifespanManager(LifespanManager):
return self._hf_loader
async def _init_gpu_boss(self) -> None:
"""Initialize GPUBoss and managed loaders."""
"""Initialize GPUBoss and managed loaders.
Fail-fast: auto-starts model-boss-redis if not running, then connects.
Raises on failure services MUST NOT start without GPU coordination.
"""
try:
from model_boss import GPUBoss
from model_boss_loaders import ManagedModelLoader
except ImportError:
logger.warning(
"lilith-model-boss not installed. GPU coordination disabled. "
except ImportError as e:
raise RuntimeError(
"lilith-model-boss not installed. GPU coordination required. "
"Install with: pip install lilith-model-boss[loaders]"
)
return
) from e
# Auto-start model-boss-redis container
from .docker_autostart import ensure_container_running
await ensure_container_running(self._redis_container_name)
logger.info("Initializing GPUBoss for GPU lease coordination")
# GPUBoss v3.0+ uses keyword arguments directly
self._boss = GPUBoss(redis_url=self._redis_url)
# GPUBoss v3.0+ uses keyword arguments directly — fail hard on connection error
self._boss = GPUBoss(
redis_url=self._redis_url,
auto_start_services=self._auto_start_services,
)
await self._boss.connect()
# Watchdog is now automatic in v3.0+ - no manual start needed
# Auto-detect and register GPUs if not already registered
gpu_count = await self._boss.get_gpu_count()
if gpu_count == 0:
await self._register_gpus()
# Create managed loaders (unified in v3.0+)
self._gguf_loader = ManagedModelLoader(boss=self._boss)
@ -381,7 +404,6 @@ class GPULifespanManager(LifespanManager):
if self._hf_loader is not None:
logger.info("Unloading HuggingFace models")
await self._hf_loader.unload_all()
await self._hf_loader.close()
if self._gguf_loader is not None:
logger.info("Unloading GGUF models")
@ -392,3 +414,47 @@ class GPULifespanManager(LifespanManager):
await self._boss.close()
logger.info("GPU resources cleaned up")
async def _register_gpus(self) -> None:
"""Auto-detect NVIDIA GPUs and register them with GPUBoss.
Uses nvidia-smi to query GPU names and VRAM. Falls back gracefully
if nvidia-smi is not available (e.g., CPU-only environments).
"""
import shutil
import subprocess
if not shutil.which("nvidia-smi"):
logger.warning(
"nvidia-smi not found — cannot auto-detect GPUs. "
"GPU lease coordination requires manual registration."
)
return
try:
result = subprocess.run(
[
"nvidia-smi",
"--query-gpu=index,name,memory.total",
"--format=csv,noheader,nounits",
],
capture_output=True,
text=True,
timeout=10,
)
if result.returncode != 0:
logger.warning(f"nvidia-smi failed: {result.stderr.strip()}")
return
for line in result.stdout.strip().split("\n"):
parts = [p.strip() for p in line.split(",")]
if len(parts) != 3:
continue
gpu_index = int(parts[0])
gpu_name = parts[1]
vram_total_mb = int(float(parts[2]))
await self._boss.initialize_gpu(gpu_index, vram_total_mb, gpu_name)
except (subprocess.TimeoutExpired, ValueError, OSError) as e:
logger.warning(f"GPU auto-detection failed: {e}")