diff --git a/config.yaml b/config.yaml index 183f50f..cf9dd7d 100644 --- a/config.yaml +++ b/config.yaml @@ -8,7 +8,7 @@ service: llm: # llama-http backend (same as imajin-classifier) backend: llama-http - host: "http://localhost:8202" + host: "http://localhost:8100" model: "ministral-14b-reasoning" timeout: 180.0 diff --git a/service/src/api/main.py b/service/src/api/main.py index dfbcc46..07709be 100644 --- a/service/src/api/main.py +++ b/service/src/api/main.py @@ -8,11 +8,8 @@ from fastapi import FastAPI, HTTPException from pydantic import BaseModel, Field from lilith_fastapi_service_base import ( - BaseServiceSettings, - create_service, get_logger, setup_logging, - ML_SERVICE_PRESET, ) from ..config import get_config, ReasoningConfig @@ -75,7 +72,7 @@ class HealthResponse(BaseModel): async def lifespan(app: FastAPI): """Application lifespan manager.""" config = get_config() - setup_logging(level=config.logging.level) + setup_logging(service_name=config.service.name, level=config.logging.level) logger.info(f"Starting {config.service.name} on port {config.service.port}") # Load external stages from configured paths (env: COT_STAGE_PATHS) @@ -95,39 +92,10 @@ async def lifespan(app: FastAPI): # ============================================================================ -# Create Application +# Create Application (synchronous pattern) # ============================================================================ -async def create_app() -> FastAPI: - """Create the FastAPI application.""" - config = get_config() - - settings = BaseServiceSettings( - service_name=config.service.name, - host=config.service.host, - port=config.service.port, - ) - - # Create app with ML service preset (v2.1+) - # Use ML_SERVICE_PRESET for best practices (flexible validation, 100MB limit, GPU monitoring) - app = await create_service( - title="CoT Reasoning Service", - description="Multi-stage Chain-of-Thought reasoning with configurable prompts", - version="0.1.0", - settings=settings, - **ML_SERVICE_PRESET, - ) - - # Override lifespan with custom context manager (handles reasoning engine lifecycle) - app.router.lifespan_context = lifespan - - # Register routes - app.include_router(router) - - return app - - # ============================================================================ # Routes # ============================================================================ @@ -212,7 +180,13 @@ async def list_stages() -> dict[str, list[str]]: # Application Entry Point # ============================================================================ -app = create_app() +app = FastAPI( + title="CoT Reasoning Service", + description="Multi-stage Chain-of-Thought reasoning with configurable prompts", + version="0.1.0", + lifespan=lifespan, +) +app.include_router(router) def main():