content-moderation/config.yaml
Claude Code 860e77f984 chore(config): 🔧 Update environment variables and feature toggles in config.yaml
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-03-26 14:08:47 -07:00

46 lines
1.9 KiB
YAML

paths:
data_dir: data
generated_subdir: generated
splits_subdir: splits
cache_dir: cache/generated
models_dir: models
claude_engine:
model: haiku
max_concurrent: 10
batch_size: 25
local_engine:
base_url: http://localhost:8210 # model-boss coordinator (manages GPU leases + model lifecycle)
model: ministral-14b-reasoning # model ID from model-boss manifest
max_concurrent: 1
batch_size: 5
temperature: 0.95
priority: normal # model-boss priority: urgent|high|normal|low|batch
categories: [csam, bestiality, necrophilia, snuff, scat, extreme_gore, self_harm]
inference:
include_optional_categories: false # Set true to surface anti_trans and other optional categories
generation:
positives_per_category: 550
hard_negatives_per_category: 600
innocuous_count: 3000
# Category definitions (severity, subtypes, overlaps, seeds) live in
# CATEGORY_SPECS (category_specs.py). Use --categories CLI flag to
# filter generation to specific categories.
# Training caps: generated files grow indefinitely; these caps control
# how many examples per category are used in each training run.
# Exp 32/33 finding: tier-based downsampling of T5 categories (550→350) regressed
# T2/T3 category precision by removing safe-adult-content calibration examples.
# Reverted to flat caps with only empirically validated per-category overrides.
# Lookup order: per-category override > global default (0 = no cap).
training_caps:
by_tier: {} # tier-based caps disabled — flat distribution is better calibrated
# Per-category overrides (empirically validated across Exp 25-31)
positives: {}
hard_negatives:
predatory_behavior: 400 # sweet-spot: 750 causes precision collapse via harassment overlap
harassment: 600 # 18 seeds, 600 is validated sweet-spot
extreme_gore: 700 # 22 seeds, all needed for snuff/gore/medical boundary