36 lines
866 B
YAML
36 lines
866 B
YAML
# CoT Reasoning Service Configuration
|
|
|
|
service:
|
|
name: cot-reasoning
|
|
# port resolved at runtime from lilith-service-addresses
|
|
host: "0.0.0.0"
|
|
|
|
llm:
|
|
# model-boss backend for GPU-coordinated GGUF model loading (recommended)
|
|
backend: model-boss
|
|
model_id: "qwen2.5-1.5b-instruct"
|
|
timeout: 180.0
|
|
|
|
# Alternative backends (override with COT_LLM__BACKEND env var):
|
|
# - ollama: host=http://localhost:11434, model=ministral:14b
|
|
# - llama-http: host=http://localhost:8100, model=ministral-14b-reasoning
|
|
|
|
reasoning:
|
|
# Default stages to execute when none specified
|
|
default_stages:
|
|
- analyze
|
|
|
|
# Cache settings
|
|
cache:
|
|
enabled: true
|
|
ttl_seconds: 3600
|
|
max_size: 1000
|
|
|
|
# JSON extraction settings
|
|
json:
|
|
retry_on_failure: true
|
|
max_retries: 2
|
|
|
|
logging:
|
|
level: INFO
|
|
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|