46 lines
1.9 KiB
YAML
46 lines
1.9 KiB
YAML
paths:
|
|
data_dir: data
|
|
generated_subdir: generated
|
|
splits_subdir: splits
|
|
cache_dir: cache/generated
|
|
models_dir: models
|
|
|
|
claude_engine:
|
|
model: haiku
|
|
max_concurrent: 10
|
|
batch_size: 25
|
|
|
|
local_engine:
|
|
base_url: http://localhost:8210 # model-boss coordinator (manages GPU leases + model lifecycle)
|
|
model: ministral-14b-reasoning # model ID from model-boss manifest
|
|
max_concurrent: 1
|
|
batch_size: 5
|
|
temperature: 0.95
|
|
priority: normal # model-boss priority: urgent|high|normal|low|batch
|
|
categories: [csam, bestiality, necrophilia, snuff, scat, extreme_gore, self_harm]
|
|
|
|
inference:
|
|
include_optional_categories: false # Set true to surface anti_trans and other optional categories
|
|
|
|
generation:
|
|
positives_per_category: 550
|
|
hard_negatives_per_category: 600
|
|
innocuous_count: 3000
|
|
# Category definitions (severity, subtypes, overlaps, seeds) live in
|
|
# CATEGORY_SPECS (category_specs.py). Use --categories CLI flag to
|
|
# filter generation to specific categories.
|
|
|
|
# Training caps: generated files grow indefinitely; these caps control
|
|
# how many examples per category are used in each training run.
|
|
# Exp 32/33 finding: tier-based downsampling of T5 categories (550→350) regressed
|
|
# T2/T3 category precision by removing safe-adult-content calibration examples.
|
|
# Reverted to flat caps with only empirically validated per-category overrides.
|
|
# Lookup order: per-category override > global default (0 = no cap).
|
|
training_caps:
|
|
by_tier: {} # tier-based caps disabled — flat distribution is better calibrated
|
|
# Per-category overrides (empirically validated across Exp 25-31)
|
|
positives: {}
|
|
hard_negatives:
|
|
predatory_behavior: 400 # sweet-spot: 750 causes precision collapse via harassment overlap
|
|
harassment: 600 # 18 seeds, 600 is validated sweet-spot
|
|
extreme_gore: 700 # 22 seeds, all needed for snuff/gore/medical boundary
|