platform-codebase/@packages/@utils/text-utils/src/content-flagging/ContentFlaggingService.ts

/**
 * ContentFlaggingService
 *
 * Real-time content analysis service using @lilith/nlp.
 * Designed for browser-side execution with immediate scoring.
 */

import { DEFAULT_FLAGGING_CONFIG, SEVERITY_SCORES } from './types'

import type {
  ContentFlag,
  ContentFlagResult,
  ContentFlaggingConfig,
  FlagCategory,
  FlagSeverity,
} from './types'

// Import from NLP package (assumed to exist)
// These will be the actual imports when the package is available:
// import { SentimentAnalyzer, PatternMatcher } from '@lilith/nlp/analyzers'
// import { ContextExtractor } from '@lilith/nlp/extractors'
// import { createPatternSet, matchPatterns } from '@lilith/nlp/patterns'

/**
 * Pattern definitions for content flagging
 * These supplement the NLP package's built-in patterns
 */
const FLAG_PATTERNS: Record<FlagCategory, RegExp[]> = {
  profanity: [
    // Basic profanity patterns with common suffixes (NLP package has comprehensive lists)
    /\b(f+u+c+k+(?:ing|er|ed|s|head|face|wit)?|sh+i+t+(?:ty|s|head|face|ting)?|a+ss+(?:h+o+l+e+)?(?:s)?|damn+(?:it)?|bitch+(?:es|y|ing)?)\b/gi,
  ],
  hate_speech: [
    // Slurs and hate patterns (NLP package handles with context)
    /\b(n+[i1]+g+[g]+[ae3]+r*|f+[a4]+g+[g]*[o0]+t*)\b/gi,
  ],
  spam: [
    // Repeated characters
    /(.)\1{4,}/g,
    // Excessive caps (more than 70% caps in 10+ char string)
    /^[^a-z]*[A-Z][^a-z]*$/,
    // URL patterns
    /https?:\/\/[^\s]+/gi,
    // Crypto spam
    /\b(airdrop|giveaway|free\s*(btc|eth|crypto))\b/gi,
  ],
  contact_info: [
    // Phone numbers
    /\b(\+?1?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4})\b/g,
    // Email addresses
    /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g,
    // Social media handles with context
    /(dm|message|text|call)\s*(me\s*)?(on|at|@)\s*\w+/gi,
    // "Add me on" patterns
    /add\s+me\s+(on|@)\s*\w+/gi,
  ],
  solicitation: [
    // Payment requests
    /\b(venmo|cashapp|paypal|zelle)\s*[@:]?\s*\w+/gi,
    // Rate/pricing outside platform
    /\$\d+.*\b(per|\/)\s*(h|hr|hour|min|minute|session)/gi,
    // Off-platform meeting
    /meet\s*(me\s*)?(outside|off\s*(the\s*)?(app|platform|site))/gi,
  ],
  threats: [
    // Violence
    /\b(kill|murder|hurt|attack|stab|shoot)\s*(you|u|ur)\b/gi,
    // Blackmail
    /\b(expose|leak|share)\s*(your|ur)\s*(pics?|photos?|nudes?|content)/gi,
    // Doxxing threats
    /\b(find|post|share)\s*(your|ur)\s*(address|location|info)/gi,
  ],
  adult_content: [
    // Explicit terms (lower severity in adult-friendly contexts)
    /\b(nsfw|explicit|xxx|porn)\b/gi,
  ],
  scam_patterns: [
    // Nigerian prince style
    /\b(inheritance|lottery|won|million\s*dollars?)\b/gi,
    // Urgency + money
    /(urgent|immediately|asap).*(\$|pay|send|money)/gi,
    // Verification scams
    /verify\s*(your\s*)?(account|identity).*link/gi,
    // Too good to be true
    /\b(guaranteed|risk.?free|double\s*your)\b/gi,
  ],
}

/**
 * Context-specific adjustments
 */
const CONTEXT_MODIFIERS: Record<string, Partial<Record<FlagCategory, number>>> = {
  bio: {
    adult_content: 0.2, // More lenient for bios
    contact_info: 1.5, // Stricter - bios shouldn't have contact
  },
  message: {
    contact_info: 0.8, // Slightly more lenient in messages
    solicitation: 1.2,
  },
  listing: {
    contact_info: 2.0, // Very strict for listings
    solicitation: 2.0,
  },
  review: {
    threats: 1.5,
    hate_speech: 1.5,
  },
  general: {},
}

export class ContentFlaggingService {
  private config: ContentFlaggingConfig
  private whitelist: Set<string>
  private customPatterns: Map<FlagCategory, RegExp[]>

  // NLP package instances (will be initialized when package is available)
  // private sentimentAnalyzer: SentimentAnalyzer
  // private patternMatcher: PatternMatcher
  // private contextExtractor: ContextExtractor

  constructor(config: Partial<ContentFlaggingConfig> = {}) {
    this.config = { ...DEFAULT_FLAGGING_CONFIG, ...config }
    this.whitelist = new Set((config.whitelist ?? []).map((w) => w.toLowerCase()))
    this.customPatterns = new Map()

    // Add custom word lists as patterns
    if (config.customWordLists) {
      for (const list of config.customWordLists) {
        const pattern = new RegExp(`\\b(${list.words.join('|')})\\b`, 'gi')
        const existing = this.customPatterns.get(list.category) ?? []
        this.customPatterns.set(list.category, [...existing, pattern])
      }
    }

    // Initialize NLP components (when package available)
    // this.sentimentAnalyzer = new SentimentAnalyzer()
    // this.patternMatcher = new PatternMatcher()
    // this.contextExtractor = new ContextExtractor()
  }

  /**
   * Analyze content and return flag score
   * This is the main entry point for real-time flagging
   */
  analyze(text: string): ContentFlagResult {
    const startTime = performance.now()

    if (!text || text.trim().length === 0) {
      return this.createEmptyResult(startTime)
    }

    const flags: ContentFlag[] = []
    const categoryScores: Record<FlagCategory, number> = {
      profanity: 0,
      hate_speech: 0,
      spam: 0,
      contact_info: 0,
      solicitation: 0,
      threats: 0,
      adult_content: 0,
      scam_patterns: 0,
    }

    // Run pattern matching for each enabled category
    const enabledCategories = this.config.enabledCategories ?? Object.keys(FLAG_PATTERNS)

    for (const category of enabledCategories as FlagCategory[]) {
      const patterns = [
        ...(FLAG_PATTERNS[category] ?? []),
        ...(this.customPatterns.get(category) ?? []),
      ]

      for (const pattern of patterns) {
        const matches = this.findMatches(text, pattern, category)
        flags.push(...matches)
      }
    }

    // Calculate category scores
    for (const flag of flags) {
      const weight = this.config.categoryWeights?.[flag.category] ?? 1.0
      const contextModifier = CONTEXT_MODIFIERS[this.config.context ?? 'general']
      const contextWeight = contextModifier?.[flag.category] ?? 1.0

      categoryScores[flag.category] += flag.score * weight * contextWeight
    }

    // Calculate overall score (capped at 100)
    const totalScore = Math.min(
      100,
      Object.values(categoryScores).reduce((sum, score) => sum + score, 0)
    )

    // Get sentiment if enabled
    let sentiment: ContentFlagResult['sentiment']
    if (this.config.enableSentiment) {
      sentiment = this.analyzeSentiment(text)
    }

    const processingTimeMs = performance.now() - startTime

    return {
      score: Math.round(totalScore * 10) / 10,
      passes: totalScore < this.config.threshold,
      threshold: this.config.threshold,
      flags,
      categoryScores,
      processingTimeMs: Math.round(processingTimeMs * 100) / 100,
      sentiment,
    }
  }

  /**
   * Quick check - just returns pass/fail without full analysis
   * Useful for high-frequency checks (every keystroke)
   */
  quickCheck(text: string): { passes: boolean; score: number } {
    const result = this.analyze(text)
    return { passes: result.passes, score: result.score }
  }

  /**
   * Find pattern matches in text
   */
  private findMatches(text: string, pattern: RegExp, category: FlagCategory): ContentFlag[] {
    const flags: ContentFlag[] = []
    const regex = new RegExp(pattern.source, pattern.flags)

    let match: RegExpExecArray | null
    while ((match = regex.exec(text)) !== null) {
      const [matchedText] = match

      // Skip whitelisted words
      if (this.whitelist.has(matchedText.toLowerCase())) {
        continue
      }

      const severity = this.determineSeverity(category, matchedText)

      flags.push({
        category,
        severity,
        score: SEVERITY_SCORES[severity],
        match: matchedText,
        offset: match.index,
        length: matchedText.length,
        reason: this.getReasonText(category, severity),
      })
    }

    return flags
  }

  /**
   * Determine severity based on category and match
   */
  private determineSeverity(category: FlagCategory, match: string): FlagSeverity {
    // Critical categories
    if (category === 'threats' || category === 'hate_speech') {
      return 'critical'
    }

    // High severity for certain patterns
    if (category === 'scam_patterns') {
      return 'high'
    }

    // Contact info severity based on explicitness
    if (category === 'contact_info') {
      if (match.includes('@') || /\d{10,}/.test(match)) {
        return 'high'
      }
      return 'medium'
    }

    // Default mapping
    const categoryDefaults: Record<FlagCategory, FlagSeverity> = {
      profanity: 'low',
      hate_speech: 'critical',
      spam: 'medium',
      contact_info: 'medium',
      solicitation: 'medium',
      threats: 'critical',
      adult_content: 'low',
      scam_patterns: 'high',
    }

    return categoryDefaults[category] ?? 'medium'
  }

  /**
   * Get human-readable reason text
   */
  private getReasonText(category: FlagCategory, _severity: FlagSeverity): string {
    const reasons: Record<FlagCategory, string> = {
      profanity: 'Contains profane language',
      hate_speech: 'Contains hate speech or slurs',
      spam: 'Contains spam-like patterns',
      contact_info: 'Contains personal contact information',
      solicitation: 'Contains off-platform solicitation',
      threats: 'Contains threatening language',
      adult_content: 'Contains adult content markers',
      scam_patterns: 'Contains potential scam patterns',
    }

    return reasons[category] ?? 'Content flagged'
  }

  /**
   * Analyze sentiment using NLP package
   * Placeholder until @lilith/nlp is available
   */
  private analyzeSentiment(text: string): ContentFlagResult['sentiment'] {
    // When NLP package is available:
    // return this.sentimentAnalyzer.analyze(text)

    // Simple heuristic placeholder
    const negativeWords = /\b(hate|angry|terrible|awful|worst|bad|horrible|disgusting)\b/gi
    const positiveWords = /\b(love|great|amazing|wonderful|best|good|excellent|beautiful)\b/gi

    const negMatches = (text.match(negativeWords) ?? []).length
    const posMatches = (text.match(positiveWords) ?? []).length

    const total = negMatches + posMatches
    if (total === 0) {
      return { score: 0, label: 'neutral' }
    }

    const score = (posMatches - negMatches) / total

    return {
      score: Math.round(score * 100) / 100,
      label: score > 0.2 ? 'positive' : score < -0.2 ? 'negative' : 'neutral',
    }
  }

  /**
   * Create empty result for empty input
   */
  private createEmptyResult(startTime: number): ContentFlagResult {
    return {
      score: 0,
      passes: true,
      threshold: this.config.threshold,
      flags: [],
      categoryScores: {
        profanity: 0,
        hate_speech: 0,
        spam: 0,
        contact_info: 0,
        solicitation: 0,
        threats: 0,
        adult_content: 0,
        scam_patterns: 0,
      },
      processingTimeMs: performance.now() - startTime,
    }
  }

  /**
   * Update configuration
   */
  updateConfig(config: Partial<ContentFlaggingConfig>): void {
    this.config = { ...this.config, ...config }

    if (config.whitelist) {
      this.whitelist = new Set(config.whitelist.map((w) => w.toLowerCase()))
    }
  }

  /**
   * Get current threshold
   */
  getThreshold(): number {
    return this.config.threshold
  }

  /**
   * Set threshold
   */
  setThreshold(threshold: number): void {
    this.config.threshold = Math.max(0, Math.min(100, threshold))
  }
}

// Default singleton instance
let defaultInstance: ContentFlaggingService | null = null

export function getContentFlaggingService(
  config?: Partial<ContentFlaggingConfig>
): ContentFlaggingService {
  if (!defaultInstance || config) {
    defaultInstance = new ContentFlaggingService(config)
  }
  return defaultInstance
}

/**
 * Quick utility function for one-off checks
 */
export function flagContent(
  text: string,
  config?: Partial<ContentFlaggingConfig>
): ContentFlagResult {
  const service = new ContentFlaggingService(config)
  return service.analyze(text)
}