lilith-platform.live/codebase/@features/ad-watch/src/compliance.ts

/**
 * Compliance checks — the literal, unambiguous rules Quinn maintains in
 * Executor `ad-copy/_RULES.md` (+ FACT_SHEET.md), applied to a platform's text
 * (live-scraped or intended copy). This is a transparent, data-driven detector:
 * each rule is a regex Quinn stated explicitly, surfaced for review — it does
 * not enforce or auto-edit.
 *
 * The two former source contradictions were resolved by Quinn (2026-06-27):
 *   - price: one rate everywhere = $1000, shown (FACT_SHEET) → flag any rate
 *     that differs (the legacy $700/$1100/$3500 mess). Override with ADWATCH_RATE.
 *   - domain: prefer the long transquinnftw.com; tsquinn.com is the short alias,
 *     acceptable only where char limits are tight → info nudge, not a hard flag.
 */

import { normMoney } from './normalize.js';

export type ComplianceSeverity = 'warning' | 'info';

export interface ComplianceRule {
  id: string;
  severity: ComplianceSeverity;
  /** Platform ids this applies to; omitted = all. */
  appliesTo?: string[];
  note: string;
  /** Simple literal/regex rule. */
  pattern?: RegExp;
  /** Custom matcher (used when a rule needs parsing, e.g. price comparison). */
  detect?: (text: string) => string[];
}

/** Canonical hourly rate — one number on every surface (FACT_SHEET). */
export const CANONICAL_RATE = Number(process.env['ADWATCH_RATE'] ?? '1000');

export interface ComplianceViolation {
  rule: string;
  severity: ComplianceSeverity;
  /** The offending substring(s) found. */
  matches: string[];
  note: string;
}

// Broad emoji coverage for the Eros "no emoji" platform rule.
const EMOJI_RE =
  /[\u{1F300}-\u{1FAFF}\u{1F000}-\u{1F0FF}\u{2600}-\u{27BF}\u{2B00}-\u{2BFF}\u{2190}-\u{21FF}\u{FE0F}\u{20E3}]/u;

export const COMPLIANCE_RULES: ComplianceRule[] = [
  {
    id: 'geek-not-nerd',
    severity: 'warning',
    pattern: /\bnerds?\b/i,
    note: '_RULES: say "geek", never "nerd" (no exceptions).',
  },
  {
    id: 'banned-phrase-where-i-like-to-stay',
    severity: 'warning',
    pattern: /where i like to stay/i,
    note: '_RULES: banned phrase "where I like to stay".',
  },
  {
    id: 'suspended-twitter-link',
    severity: 'warning',
    pattern: /\b(?:twitter\.com|x\.com)\/[A-Za-z0-9_]+/i,
    note: 'FACT_SHEET: X/Twitter @TransQuinnFTW is SUSPENDED — scrub links.',
  },
  {
    id: 'bay-area-geo',
    severity: 'warning',
    pattern: /\b(?:san francisco|bay area|napa|san jose|wine country|UWS|upper west side)\b/i,
    note: '_RULES: geo is Brooklyn/NYC only — no Bay Area / old-location references.',
  },
  {
    id: 'eros-no-emoji',
    severity: 'warning',
    appliesTo: ['eros'],
    pattern: EMOJI_RE,
    note: '_RULES: Eros copy must be emoji-free (per-platform override).',
  },
  {
    id: 'price-not-canonical',
    severity: 'warning',
    note: `FACT_SHEET: one rate on every surface = $${CANONICAL_RATE}. Any other rate (legacy $700/$1100/$3500…) should be reconciled.`,
    detect: (text) => {
      const out = new Set<string>();
      const re = /\$\s?\d{1,3}(?:,?\d{3})*(?:\.\d{2})?/g;
      let m: RegExpExecArray | null;
      while ((m = re.exec(text)) !== null) {
        const amt = normMoney(m[0]);
        // Only flag rate-magnitude amounts (skip tiny incidental $ and huge ids).
        if (amt !== undefined && amt >= 200 && amt <= 20000 && amt !== CANONICAL_RATE) {
          out.add(m[0].trim());
        }
      }
      return [...out];
    },
  },
  {
    id: 'prefer-long-domain',
    severity: 'info',
    pattern: /tsquinn\.com/i,
    note: 'Domain: prefer transquinnftw.com; tsquinn.com is the short alias — use only where char limits are tight.',
  },
];

/** Source-canon contradictions — both resolved by Quinn 2026-06-27 (now encoded as rules above). */
export const CONTRADICTIONS: Array<{ topic: string; detail: string }> = [];

/** Find all (global match) occurrences for a rule's pattern. */
function findMatches(text: string, pattern: RegExp): string[] {
  const flags = pattern.flags.includes('g') ? pattern.flags : `${pattern.flags}g`;
  const re = new RegExp(pattern.source, flags);
  const out = new Set<string>();
  let m: RegExpExecArray | null;
  while ((m = re.exec(text)) !== null) {
    out.add(m[0]);
    if (m.index === re.lastIndex) re.lastIndex++; // avoid zero-width loop
  }
  return [...out];
}

/** Check a platform's text against the compliance ruleset. */
export function checkCompliance(
  text: string,
  opts: { platform?: string; rules?: ComplianceRule[] } = {},
): ComplianceViolation[] {
  const rules = opts.rules ?? COMPLIANCE_RULES;
  const platform = opts.platform?.toLowerCase();
  const out: ComplianceViolation[] = [];
  for (const rule of rules) {
    if (rule.appliesTo && (!platform || !rule.appliesTo.includes(platform))) continue;
    const matches = rule.detect ? rule.detect(text) : rule.pattern ? findMatches(text, rule.pattern) : [];
    if (matches.length > 0) {
      out.push({ rule: rule.id, severity: rule.severity, matches, note: rule.note });
    }
  }
  return out;
}