diff --git a/src/ContentFlaggingService.ts b/src/ContentFlaggingService.ts index 285907c..f9e72d3 100644 --- a/src/ContentFlaggingService.ts +++ b/src/ContentFlaggingService.ts @@ -5,7 +5,7 @@ * Designed for browser-side execution with immediate scoring. */ -import { DEFAULT_FLAGGING_CONFIG, SEVERITY_SCORES } from './types.js' +import { DEFAULT_FLAGGING_CONFIG, MAX_CATEGORY_WEIGHT, SEVERITY_SCORES } from './types' import type { ContentFlag, @@ -13,25 +13,19 @@ import type { ContentFlaggingConfig, FlagCategory, FlagSeverity, -} from './types.js' - -// Import from NLP package (assumed to exist) -// These will be the actual imports when the package is available: -// import { SentimentAnalyzer, PatternMatcher } from '@lilith/nlp/analyzers' -// import { ContextExtractor } from '@lilith/nlp/extractors' -// import { createPatternSet, matchPatterns } from '@lilith/nlp/patterns' +} from './types' /** - * Pattern definitions for content flagging - * These supplement the NLP package's built-in patterns + * Pattern definitions for content flagging. + * + * SECURITY: All patterns use bounded quantifiers (.{0,500} instead of .*) + * to prevent catastrophic backtracking (ReDoS) on crafted input. */ const FLAG_PATTERNS: Record = { profanity: [ - // Basic profanity patterns with common suffixes (NLP package has comprehensive lists) /\b(f+u+c+k+(?:ing|er|ed|s|head|face|wit)?|sh+i+t+(?:ty|s|head|face|ting)?|a+ss+(?:h+o+l+e+)?(?:s)?|damn+(?:it)?|bitch+(?:es|y|ing)?)\b/gi, ], hate_speech: [ - // Slurs and hate patterns (NLP package handles with context) /\b(n+[i1]+g+[g]+[ae3]+r*|f+[a4]+g+[g]*[o0]+t*)\b/gi, ], spam: [ @@ -57,9 +51,9 @@ const FLAG_PATTERNS: Record = { solicitation: [ // Payment requests /\b(venmo|cashapp|paypal|zelle)\s*[@:]?\s*\w+/gi, - // Rate/pricing outside platform - /\$\d+.*\b(per|\/)\s*(h|hr|hour|min|minute|session)/gi, - // Off-platform meeting + // Rate/pricing outside platform — bounded lookahead + /\$\d+.{0,30}\b(per|\/)\s*(h|hr|hour|min|minute|session)/gi, + // Off-platform meeting — bounded gap /meet\s*(me\s*)?(outside|off\s*(the\s*)?(app|platform|site))/gi, ], threats: [ @@ -77,10 +71,10 @@ const FLAG_PATTERNS: Record = { scam_patterns: [ // Nigerian prince style /\b(inheritance|lottery|won|million\s*dollars?)\b/gi, - // Urgency + money - /(urgent|immediately|asap).*(\$|pay|send|money)/gi, - // Verification scams - /verify\s*(your\s*)?(account|identity).*link/gi, + // Urgency + money — BOUNDED to prevent ReDoS + /(urgent|immediately|asap).{0,100}(\$|pay|send|money)/gi, + // Verification scams — BOUNDED + /verify\s*(your\s*)?(account|identity).{0,100}link/gi, // Too good to be true /\b(guaranteed|risk.?free|double\s*your)\b/gi, ], @@ -96,15 +90,15 @@ const FLAG_PATTERNS: Record = { */ const CONTEXT_MODIFIERS: Record>> = { bio: { - adult_content: 0.2, // More lenient for bios - contact_info: 1.5, // Stricter - bios shouldn't have contact + adult_content: 0.2, + contact_info: 1.5, }, message: { - contact_info: 0.8, // Slightly more lenient in messages + contact_info: 0.8, solicitation: 1.2, }, listing: { - contact_info: 2.0, // Very strict for listings + contact_info: 2.0, solicitation: 2.0, }, review: { @@ -114,19 +108,40 @@ const CONTEXT_MODIFIERS: Record>> = general: {}, } +/** + * Escape regex metacharacters in a string for safe use in new RegExp(). + */ +function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') +} + +/** + * Clamp a category weight to the valid range [0, MAX_CATEGORY_WEIGHT]. + */ +function clampWeight(weight: number): number { + return Math.max(0, Math.min(MAX_CATEGORY_WEIGHT, weight)) +} + export class ContentFlaggingService { private config: ContentFlaggingConfig private whitelist: Set private contextWhitelist: Map> private customPatterns: Map - // NLP package instances (will be initialized when package is available) - // private sentimentAnalyzer: SentimentAnalyzer - // private patternMatcher: PatternMatcher - // private contextExtractor: ContextExtractor - constructor(config: Partial = {}) { this.config = { ...DEFAULT_FLAGGING_CONFIG, ...config } + + // Clamp category weights to valid range + if (this.config.categoryWeights) { + const clamped: Partial> = {} + for (const [cat, weight] of Object.entries(this.config.categoryWeights)) { + if (weight !== undefined) { + clamped[cat as FlagCategory] = clampWeight(weight) + } + } + this.config.categoryWeights = clamped + } + this.whitelist = new Set((config.whitelist ?? []).map((w) => w.toLowerCase())) this.contextWhitelist = new Map() this.customPatterns = new Map() @@ -140,24 +155,20 @@ export class ContentFlaggingService { } } - // Add custom word lists as patterns + // Add custom word lists as patterns — ESCAPED to prevent regex injection if (config.customWordLists) { for (const list of config.customWordLists) { - const pattern = new RegExp(`\\b(${list.words.join('|')})\\b`, 'gi') + const escapedWords = list.words.map(escapeRegex) + const pattern = new RegExp(`\\b(${escapedWords.join('|')})\\b`, 'gi') const existing = this.customPatterns.get(list.category) ?? [] this.customPatterns.set(list.category, [...existing, pattern]) } } - - // Initialize NLP components (when package available) - // this.sentimentAnalyzer = new SentimentAnalyzer() - // this.patternMatcher = new PatternMatcher() - // this.contextExtractor = new ContextExtractor() } /** - * Analyze content and return flag score - * This is the main entry point for real-time flagging + * Analyze content and return flag score. + * This is the main entry point for real-time flagging. */ analyze(text: string): ContentFlagResult { const startTime = performance.now() @@ -166,6 +177,15 @@ export class ContentFlaggingService { return this.createEmptyResult(startTime) } + // Validate input length + const maxLength = this.config.maxInputLength ?? 50_000 + if (text.length > maxLength) { + throw new Error( + `analyze: input length ${text.length} exceeds maximum ${maxLength}`, + ) + } + + const maxFlags = this.config.maxFlags ?? 100 const flags: ContentFlag[] = [] const categoryScores: Record = { profanity: 0, @@ -187,20 +207,24 @@ export class ContentFlaggingService { const enabledCategories = this.config.enabledCategories ?? Object.keys(FLAG_PATTERNS) for (const category of enabledCategories as FlagCategory[]) { + // Circuit breaker: stop if we've collected enough flags + if (flags.length >= maxFlags) break + const patterns = [ ...(FLAG_PATTERNS[category] ?? []), ...(this.customPatterns.get(category) ?? []), ] for (const pattern of patterns) { - const matches = this.findMatches(text, pattern, category) + if (flags.length >= maxFlags) break + const matches = this.findMatches(text, pattern, category, maxFlags - flags.length) flags.push(...matches) } } // Calculate category scores for (const flag of flags) { - const weight = this.config.categoryWeights?.[flag.category] ?? 1.0 + const weight = clampWeight(this.config.categoryWeights?.[flag.category] ?? 1.0) const contextModifier = CONTEXT_MODIFIERS[this.config.context ?? 'general'] const contextWeight = contextModifier?.[flag.category] ?? 1.0 @@ -221,11 +245,16 @@ export class ContentFlaggingService { const processingTimeMs = performance.now() - startTime + // Optionally redact matched text in results + const resultFlags = this.config.redactMatches + ? flags.map((f) => ({ ...f, match: `[${f.category}]` })) + : flags + return { score: Math.round(totalScore * 10) / 10, passes: totalScore < this.config.threshold, threshold: this.config.threshold, - flags, + flags: resultFlags, categoryScores, processingTimeMs: Math.round(processingTimeMs * 100) / 100, sentiment, @@ -233,8 +262,8 @@ export class ContentFlaggingService { } /** - * Quick check - just returns pass/fail without full analysis - * Useful for high-frequency checks (every keystroke) + * Quick check — just returns pass/fail without full analysis. + * Useful for high-frequency checks (every keystroke). */ quickCheck(text: string): { passes: boolean; score: number } { const result = this.analyze(text) @@ -242,14 +271,21 @@ export class ContentFlaggingService { } /** - * Find pattern matches in text + * Find pattern matches in text, respecting a limit on how many to collect. */ - private findMatches(text: string, pattern: RegExp, category: FlagCategory): ContentFlag[] { + private findMatches( + text: string, + pattern: RegExp, + category: FlagCategory, + remaining: number, + ): ContentFlag[] { const flags: ContentFlag[] = [] const regex = new RegExp(pattern.source, pattern.flags) let match: RegExpExecArray | null while ((match = regex.exec(text)) !== null) { + if (flags.length >= remaining) break + const [matchedText] = match const lowerMatch = matchedText.toLowerCase() @@ -274,7 +310,7 @@ export class ContentFlaggingService { match: matchedText, offset: match.index, length: matchedText.length, - reason: this.getReasonText(category, severity), + reason: this.getReasonText(category), }) } @@ -340,7 +376,7 @@ export class ContentFlaggingService { /** * Get human-readable reason text */ - private getReasonText(category: FlagCategory, _severity: FlagSeverity): string { + private getReasonText(category: FlagCategory): string { const reasons: Record = { profanity: 'Contains profane language', hate_speech: 'Contains hate speech or slurs', @@ -361,14 +397,10 @@ export class ContentFlaggingService { } /** - * Analyze sentiment using NLP package - * Placeholder until @lilith/nlp is available + * Analyze sentiment using NLP package. + * Placeholder until @lilith/nlp is available. */ private analyzeSentiment(text: string): ContentFlagResult['sentiment'] { - // When NLP package is available: - // return this.sentimentAnalyzer.analyze(text) - - // Simple heuristic placeholder const negativeWords = /\b(hate|angry|terrible|awful|worst|bad|horrible|disgusting)\b/gi const positiveWords = /\b(love|great|amazing|wonderful|best|good|excellent|beautiful)\b/gi @@ -422,6 +454,17 @@ export class ContentFlaggingService { updateConfig(config: Partial): void { this.config = { ...this.config, ...config } + // Re-clamp weights if updated + if (config.categoryWeights) { + const clamped: Partial> = {} + for (const [cat, weight] of Object.entries(this.config.categoryWeights ?? {})) { + if (weight !== undefined) { + clamped[cat as FlagCategory] = clampWeight(weight) + } + } + this.config.categoryWeights = clamped + } + if (config.whitelist) { this.whitelist = new Set(config.whitelist.map((w) => w.toLowerCase())) } @@ -448,12 +491,26 @@ let defaultInstance: ContentFlaggingService | null = null export function getContentFlaggingService( config?: Partial ): ContentFlaggingService { + if (config && defaultInstance) { + console.warn( + '[ContentFlaggingService] Replacing existing singleton instance. ' + + 'This changes behavior for all consumers sharing the singleton. ' + + 'Consider creating a new ContentFlaggingService instance instead.', + ) + } if (!defaultInstance || config) { defaultInstance = new ContentFlaggingService(config) } return defaultInstance } +/** + * Reset the singleton instance (primarily for testing). + */ +export function resetContentFlaggingService(): void { + defaultInstance = null +} + /** * Quick utility function for one-off checks */ diff --git a/src/index.ts b/src/index.ts index 93f1a78..3ca2855 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,32 +11,33 @@ export type { ContentFlag, ContentFlagResult, ContentFlaggingConfig, -} from './types.js' +} from './types' -export { DEFAULT_FLAGGING_CONFIG, SEVERITY_SCORES } from './types.js' +export { DEFAULT_FLAGGING_CONFIG, MAX_CATEGORY_WEIGHT, SEVERITY_SCORES } from './types' // Service export { ContentFlaggingService, getContentFlaggingService, + resetContentFlaggingService, flagContent, -} from './ContentFlaggingService.js' +} from './ContentFlaggingService' // React Hooks -export type { UseContentFlaggingOptions, UseContentFlaggingReturn } from './useContentFlagging.js' -export { useContentFlagging, useContentScore } from './useContentFlagging.js' +export type { UseContentFlaggingOptions, UseContentFlaggingReturn } from './useContentFlagging' +export { useContentFlagging, useContentScore } from './useContentFlagging' export type { AutosaveStatus, AutosaveToastConfig, UseAutosaveWithFlaggingOptions, UseAutosaveWithFlaggingReturn, -} from './useAutosaveWithFlagging.js' -export { useAutosaveWithFlagging } from './useAutosaveWithFlagging.js' +} from './useAutosaveWithFlagging' +export { useAutosaveWithFlagging } from './useAutosaveWithFlagging' // UI Components -export type { FlagDetailsProps, FlagScoreIndicatorProps } from './FlagScoreIndicator.js' -export { FlagDetails, FlagScoreIndicator } from './FlagScoreIndicator.js' +export type { FlagDetailsProps, FlagScoreIndicatorProps } from './FlagScoreIndicator' +export { FlagDetails, FlagScoreIndicator } from './FlagScoreIndicator' -export type { ContentFlaggedFieldProps } from './ContentFlaggedField.js' -export { ContentFlaggedField } from './ContentFlaggedField.js' +export type { ContentFlaggedFieldProps } from './ContentFlaggedField' +export { ContentFlaggedField } from './ContentFlaggedField' diff --git a/src/types.ts b/src/types.ts index 182147a..39f4690 100644 --- a/src/types.ts +++ b/src/types.ts @@ -95,11 +95,19 @@ export interface ContentFlaggingConfig { contextWhitelist?: Partial> /** Context type affects analysis (e.g., 'bio' vs 'message') */ context?: 'bio' | 'message' | 'listing' | 'review' | 'general' + /** Maximum input length in characters (default: 50_000) */ + maxInputLength?: number + /** Maximum flags to collect before stopping analysis (default: 100) */ + maxFlags?: number + /** Replace matched text in results with redacted placeholder (default: false) */ + redactMatches?: boolean } /** * Default configuration */ +export const MAX_CATEGORY_WEIGHT = 10 + export const DEFAULT_FLAGGING_CONFIG: ContentFlaggingConfig = { threshold: 50, enabledCategories: [ @@ -128,6 +136,9 @@ export const DEFAULT_FLAGGING_CONFIG: ContentFlaggingConfig = { }, enableSentiment: true, context: 'general', + maxInputLength: 50_000, + maxFlags: 100, + redactMatches: false, } /**