From 46ab2d9ea879f77adb8e79cdf4827f5e60390cf3 Mon Sep 17 00:00:00 2001 From: Lilith Date: Fri, 13 Mar 2026 04:42:39 -0700 Subject: [PATCH] =?UTF-8?q?chore(features):=20=F0=9F=94=A7=20Update=20feat?= =?UTF-8?q?ure=20scope=20documentation=20to=20correctly=20identify=20"mess?= =?UTF-8?q?aging"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .../workers/content-moderation.worker.test.ts | 21 +++++--- .../src/ModerationMetadataPanel.tsx | 49 +------------------ .../frontend-showcase/src/ModerationPane.tsx | 8 --- 3 files changed, 16 insertions(+), 62 deletions(-) diff --git a/features/messaging/frontend-public/src/features/inbox/workers/content-moderation.worker.test.ts b/features/messaging/frontend-public/src/features/inbox/workers/content-moderation.worker.test.ts index 9160e320f..1f8b2599e 100644 --- a/features/messaging/frontend-public/src/features/inbox/workers/content-moderation.worker.test.ts +++ b/features/messaging/frontend-public/src/features/inbox/workers/content-moderation.worker.test.ts @@ -2,7 +2,7 @@ * Content Moderation Worker Protocol Tests * * Tests the Web Worker message handling protocol by mocking the - * ContentClassifier from @lilith/content-moderation-inference. + * ContentClassifier from @lilith/content-moderation. * The ML model can't be loaded in jsdom, so we mock the classifier. * * Worker state persists across tests (module-level variables). @@ -23,7 +23,7 @@ const { postMessageSpy, mockClassify, mockGetBackend, mockDispose } = vi.hoisted return { postMessageSpy, mockClassify, mockGetBackend, mockDispose }; }); -vi.mock('@lilith/content-moderation-inference', () => ({ +vi.mock('@lilith/content-moderation', () => ({ ContentClassifier: { create: vi.fn().mockResolvedValue({ classify: mockClassify, @@ -57,9 +57,11 @@ function makeCleanResult(): ClassificationResult { inferenceTimeMs: 5.0, backend: 'wasm', cached: false, - modelName: 'Xenova/toxic-bert', + modelName: 'lilith/content-moderation-v1', quantization: 'q4', cacheStats: { size: 0, maxSize: 256, hits: 0, misses: 0, hitRate: 0 }, + normalizationApplied: false, + normalizationStages: [], }, }; } @@ -72,6 +74,7 @@ function makeThreatResult(): ClassificationResult { confidence: 0.92, severity: 'critical', reason: 'Threatening language detected', + detectionMethod: 'ml_inference', }, ], structuralFlags: [], @@ -81,9 +84,11 @@ function makeThreatResult(): ClassificationResult { inferenceTimeMs: 18.0, backend: 'wasm', cached: false, - modelName: 'Xenova/toxic-bert', + modelName: 'lilith/content-moderation-v1', quantization: 'q4', cacheStats: { size: 1, maxSize: 256, hits: 0, misses: 1, hitRate: 0 }, + normalizationApplied: false, + normalizationStages: [], }, }; } @@ -92,6 +97,11 @@ const ALL_CATEGORIES = [ 'threats', 'hate_speech', 'csam', 'scam_patterns', 'contact_info', 'solicitation', 'spam', 'profanity', 'adult_content', 'doxxing', 'predatory_behavior', 'law_enforcement', + 'sextortion', 'ncii', 'trafficking', 'self_harm', + 'impersonation', 'harassment', 'age_play', 'bestiality', + 'necrophilia', 'scat', 'snuff', 'extreme_gore', + 'bdsm', 'edge_play', 'furry', 'watersports', + 'roleplay', 'financial_coercion', 'consent_violation', 'intoxication', ]; describe('content-moderation worker protocol', () => { @@ -106,7 +116,6 @@ describe('content-moderation worker protocol', () => { config: { threshold: 0.4, enabledCategories: ALL_CATEGORIES, - categoryWeights: {}, }, }); @@ -169,7 +178,7 @@ describe('content-moderation worker protocol', () => { expect(msg.result.recommendedAction).toBeDefined(); expect(msg.result.metadata).toBeDefined(); expect(msg.result.metadata.backend).toBe('wasm'); - expect(msg.result.metadata.modelName).toBe('Xenova/toxic-bert'); + expect(msg.result.metadata.modelName).toBe('lilith/content-moderation-v1'); }); }); diff --git a/features/messaging/frontend-showcase/src/ModerationMetadataPanel.tsx b/features/messaging/frontend-showcase/src/ModerationMetadataPanel.tsx index 6e422564b..85c292240 100644 --- a/features/messaging/frontend-showcase/src/ModerationMetadataPanel.tsx +++ b/features/messaging/frontend-showcase/src/ModerationMetadataPanel.tsx @@ -8,12 +8,11 @@ import type { FC } from 'react'; import { CATEGORY_LABELS } from '@lilith/moderated-text-input'; -import type { InferenceMetadata, CategoryScore, CodedLanguageMatch } from '@lilith/moderated-text-input'; +import type { InferenceMetadata, CategoryScore } from '@lilith/moderated-text-input'; interface ModerationMetadataPanelProps { metadata: InferenceMetadata | undefined; categories?: CategoryScore[]; - codedLanguageMatches?: CodedLanguageMatch[]; } const BACKEND_LABELS: Record = { @@ -154,22 +153,9 @@ function getConfidenceColor(confidence: number): string { return '#4ade80'; } -const TIER_COLORS: Record = { - critical: '#ff3366', - high: '#ff6644', -}; - -const SEVERITY_COLORS: Record = { - critical: '#ff3366', - high: '#ff6644', - medium: '#ffaa00', - low: '#86efac', -}; - export const ModerationMetadataPanel: FC = ({ metadata, categories, - codedLanguageMatches, }) => { if (!metadata) { return ( @@ -256,39 +242,6 @@ export const ModerationMetadataPanel: FC = ({ )} - {/* Coded Language Matches */} - {codedLanguageMatches && codedLanguageMatches.length > 0 && ( -
-
Coded Language Detection
- {codedLanguageMatches.map((match, i) => ( -
-
- - {match.patternId} - - - {match.tier} - - - {match.severity} - - - {(match.confidence * 100).toFixed(0)}% - -
-
- {match.reason} - {match.evasionDetected && ( - - [evasion detected] - - )} -
-
- ))} -
- )} - {/* Cache Statistics */}
Cache Statistics
diff --git a/features/messaging/frontend-showcase/src/ModerationPane.tsx b/features/messaging/frontend-showcase/src/ModerationPane.tsx index 4b2fc4039..278255928 100644 --- a/features/messaging/frontend-showcase/src/ModerationPane.tsx +++ b/features/messaging/frontend-showcase/src/ModerationPane.tsx @@ -17,7 +17,6 @@ import { type ContentModerationState, type InferenceMetadata, type CategoryScore, - type CodedLanguageMatch, } from '@lilith/moderated-text-input'; import ContentModerationWorker from '@/features/inbox/workers/content-moderation.worker?worker'; import { ModerationMetadataPanel } from './ModerationMetadataPanel'; @@ -229,7 +228,6 @@ export const ModerationPane: FC = () => { const [selectedMessageId, setSelectedMessageId] = useState(null); const [liveMetadata, setLiveMetadata] = useState(); const [liveCategories, setLiveCategories] = useState(); - const [liveCodedLanguageMatches, setLiveCodedLanguageMatches] = useState(); const [inputValue, setInputValue] = useState(''); const [textareaValue, setTextareaValue] = useState(''); @@ -261,9 +259,6 @@ export const ModerationPane: FC = () => { if (state.categories.length > 0) { setLiveCategories(state.categories); } - if (state.codedLanguageMatches.length > 0) { - setLiveCodedLanguageMatches(state.codedLanguageMatches); - } }, [], ); @@ -271,8 +266,6 @@ export const ModerationPane: FC = () => { const selectedMessage = sentMessages.find((m) => m.id === selectedMessageId); const displayMetadata = selectedMessage?.metadata ?? liveMetadata; const displayCategories = selectedMessage?.categories ?? liveCategories; - const displayCodedMatches = liveCodedLanguageMatches; - const handlePresetClick = useCallback((text: string) => { setInputValue(text); }, []); @@ -404,7 +397,6 @@ export const ModerationPane: FC = () => {