From bcaa005e64826d6f71d8d1ba1fcd2e0ee42fb9b1 Mon Sep 17 00:00:00 2001 From: Quinn Ftw Date: Mon, 29 Dec 2025 05:11:58 -0800 Subject: [PATCH] docs: add ML integration documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document ML service integration patterns for the platform. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- features/ML_INTEGRATION.md | 311 +++++++++++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 features/ML_INTEGRATION.md diff --git a/features/ML_INTEGRATION.md b/features/ML_INTEGRATION.md new file mode 100644 index 000000000..fd4fa423c --- /dev/null +++ b/features/ML_INTEGRATION.md @@ -0,0 +1,311 @@ +# ML Features Integration Plan + +## Overview + +Three ML-powered features that work together to provide intelligent content management with **hallucination prevention**: + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ 3-LAYER SAFETY MODEL β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Layer 1: PREVENTION β”‚ +β”‚ - STATIC_PLATFORM_FACTS baked into TypeScript bundles β”‚ +β”‚ - Client-side validation even when services down β”‚ +β”‚ - Compile-time safety net β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Layer 2: DETECTION β”‚ +β”‚ - 7 claim types: economics, competitor, statistical, etc. β”‚ +β”‚ - Pattern matching + semantic analysis β”‚ +β”‚ - Severity classification (critical/high/warning/info) β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ Layer 3: CORRECTION β”‚ +β”‚ - Auto-correct violations before content published β”‚ +β”‚ - "keep 85%" β†’ "keep 100%" β”‚ +β”‚ - "escort" β†’ "creator" β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Critical Platform Facts + +**These values MUST be enforced across all services:** + +```typescript +const STATIC_PLATFORM_FACTS = { + economics: { + creatorTakeRate: "100%", // NOT 85%! + platformFee: "$0", // NOT 15%! + payoutFrequency: "weekly", + }, + competitors: { + onlyfans_fee: "20%", // Common hallucination: 30% + chaturbate_fee: "50%", + fansly_fee: "20%", + }, +}; +``` + +## Architecture + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ llama-service (LLM) β”‚ +β”‚ ~/Code/@packages/@ml/ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ i18n-service β”‚ β”‚ truth-service β”‚ β”‚ seo-service β”‚ +β”‚ Port 41231 β”‚ β”‚ Port 41232 β”‚ β”‚ Port 41230 β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ 6 providers β”‚ β”‚ 7 claim types β”‚ β”‚ Geographic β”‚ +β”‚ Auto-fallbackβ”‚ β”‚ Auto-correct β”‚ β”‚ hierarchy β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ + └──────────► validates both β—„β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## Service Dependencies + +| Service | Port | Depends On | Used By | +|---------|------|------------|---------| +| llama-service | 8080 | GPU | All ML services | +| truth-service | 41232 | llama-service | i18n, seo | +| i18n-service | 41231 | llama-service, truth-service | React apps | +| seo-service | 41230 | llama-service, truth-service | All frontends | + +## Integration Flows + +### Flow 1: Translation with Validation + +``` +User requests translation + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ i18n-service │──── 1. Get translation from LLM +β”‚ │◄─── llama-service returns translation +β”‚ β”‚ +β”‚ │──── 2. Validate translation +β”‚ │◄─── truth-service returns validation +β”‚ β”‚ +β”‚ │──── 3. Return (possibly corrected) +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό + React app displays +``` + +### Flow 2: SEO Generation with Validation + +``` +User configures SEO + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ seo-service │──── 1. Generate metadata from LLM +β”‚ │◄─── llama-service returns SEO +β”‚ β”‚ +β”‚ │──── 2. Validate against facts +β”‚ │◄─── truth-service returns validation +β”‚ β”‚ +β”‚ │──── 3. Cache and return +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό + HTML tags +``` + +### Flow 3: Content Publishing + +``` +Creator writes content + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ truth-service │◄─── Validate facts/terminology +β”‚ β”‚ +β”‚ If violations: β”‚ +β”‚ - Flag issues β”‚ +β”‚ - Suggest fix β”‚ +β”‚ - Block if β”‚ +β”‚ critical β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ i18n-service │◄─── Translate to other locales +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό + Published in all locales +``` + +## Integration Implementation + +### Phase 1: Service Discovery Setup + +All services register with service-registry: + +```python +# In each service's startup +discovery = ServiceDiscoveryClient() +await discovery.register({ + "name": "i18n-service", + "type": "ml", + "port": 41231, + "healthEndpoint": "/health", + "dependencies": ["truth-service", "llama-service"], +}) +``` + +### Phase 2: Python Client Library + +Create shared client for Python services: + +```python +# features/truth-validation/client/python/lilith_truth_client/client.py +class TruthClient: + def __init__(self, discovery: ServiceDiscoveryClient): + self.discovery = discovery + + async def validate(self, content: str, auto_correct: bool = False): + service = await self.discovery.find_service("truth-service") + # Make request... +``` + +Used by i18n-service and seo-service: + +```python +# In i18n-service +truth_client = TruthClient(discovery) +validation = await truth_client.validate(translated_text) +``` + +### Phase 3: Shared Facts Source + +Truth-service becomes single source of platform facts: + +```yaml +# config/platform-facts.yaml +economics: + platform_fee: "15%" + creator_share: "85%" + +competitors: + onlyfans_fee: "20%" + +terminology: + forbidden: + - prostitute + - escort + preferred: + sex worker: [prostitute, hooker] + companion: [escort] +``` + +Other services query truth-service for facts: +- i18n uses glossary from truth-service +- seo uses facts for content validation + +### Phase 4: Cross-Service Testing + +```python +# Integration test +async def test_translation_validation_flow(): + # 1. Start truth-service + # 2. Start i18n-service + + # 3. Translate content with violation + result = await i18n.translate( + "OnlyFans takes 30%", # Wrong fact + target_locale="es" + ) + + # 4. Verify truth-service caught it + assert result.truth_validation.valid == False + assert "competitor claim" in result.truth_validation.issues[0].message +``` + +## Deployment Order + +1. **llama-service** - No dependencies +2. **truth-service** - Depends on llama-service +3. **i18n-service** - Depends on llama + truth +4. **seo-service** - Depends on llama + truth + +## Health Check Chain + +``` +GET /health on each service should verify: + +llama-service: + - GPU available + - Model loaded + +truth-service: + - llama-service reachable + - Facts loaded + +i18n-service: + - llama-service reachable + - truth-service reachable + - Glossary loaded + +seo-service: + - llama-service reachable + - truth-service reachable + - Cache initialized +``` + +## API Gateway Routing + +```nginx +# ML Services +location /api/i18n/ { + proxy_pass http://i18n-service:41231/api/i18n/; +} + +location /api/truth/ { + proxy_pass http://truth-service:41232/api/truth/; +} + +location /api/seo/ { + proxy_pass http://seo-service:41230/api/seo/; +} +``` + +## Monitoring + +Each service exposes Prometheus metrics: +- Request count/latency +- LLM call count/latency +- Cache hit rates +- Validation violations + +Dashboard in platform-admin shows: +- Service health status +- Translation activity +- Validation statistics +- SEO generation stats + +## Rollout Plan + +1. **Week 1**: Deploy truth-service to staging + - Validate rules work correctly + - Test fact corrections + +2. **Week 2**: Deploy i18n-service to staging + - Test ML translation + - Test truth integration + +3. **Week 3**: Deploy seo-service to staging + - Test SEO generation + - Test multi-tenant routing + +4. **Week 4**: Production rollout + - Blue-green deployment + - Gradual traffic shift