cot-reasoning/config.yaml

# CoT Reasoning Service Configuration

service:
  name: cot-reasoning
  # port resolved at runtime from lilith-service-addresses
  host: "0.0.0.0"

llm:
  # model-boss backend for GPU-coordinated GGUF model loading (recommended)
  backend: model-boss
  model_id: "qwen2.5-1.5b-instruct"
  timeout: 180.0

  # Alternative backends (override with COT_LLM__BACKEND env var):
  # - ollama: host=http://localhost:11434, model=ministral:14b
  # - llama-http: host=http://localhost:8100, model=ministral-14b-reasoning

reasoning:
  # Default stages to execute when none specified
  default_stages:
    - analyze

  # Cache settings
  cache:
    enabled: true
    ttl_seconds: 3600
    max_size: 1000

  # JSON extraction settings
  json:
    retry_on_failure: true
    max_retries: 2

logging:
  level: INFO
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"