cot-reasoning/config.yaml

36 lines
866 B
YAML

# CoT Reasoning Service Configuration
service:
name: cot-reasoning
# port resolved at runtime from lilith-service-addresses
host: "0.0.0.0"
llm:
# model-boss backend for GPU-coordinated GGUF model loading (recommended)
backend: model-boss
model_id: "qwen2.5-1.5b-instruct"
timeout: 180.0
# Alternative backends (override with COT_LLM__BACKEND env var):
# - ollama: host=http://localhost:11434, model=ministral:14b
# - llama-http: host=http://localhost:8100, model=ministral-14b-reasoning
reasoning:
# Default stages to execute when none specified
default_stages:
- analyze
# Cache settings
cache:
enabled: true
ttl_seconds: 3600
max_size: 1000
# JSON extraction settings
json:
retry_on_failure: true
max_retries: 2
logging:
level: INFO
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"