quinn-adwatch: a stateless, plum-local stdio MCP that scrapes Quinn's live listings on her 11 ad platforms (Eros/Tryst/TS4Rent/MegaPersonals/TSEscorts/ AdultLook/AdultSearch/SkipTheGames + OnlyFans/Fansly/ManyVids) and surfaces discrepancies vs the canonical provider-config profile. - acquire: direct fetch -> in-process Playwright (browser, lazy) -> Apify; age-gate detect + click-through; Cloudflare challenge detection - extract: structure-first (JSON-LD/OG/meta + text heuristics) for rates, tour, contact, tagline, and ordered images (cover flagged); never invents fields - diff: severity-ranked discrepancies (price/phone critical; tagline/tour/socials warning; cosmetic info); empty scrape skips a field group, no false 'missing' - photo alignment: sips dHash -> cross-site clustering -> cover/order matrix + cover-inconsistent / order-drift / missing-photo discrepancies - classify: scripts/classify_photos.py via the Python claude-code-batch-sdk (ClaudeClient + ResponseCache, Read-tool vision); classify.ts is a thin bridge Black-independent by design (black + apricot expected to stay down): all deps are public npm (SDK StdioServerTransport, no @lilith/mcp-common), classify uses the on-disk Python SDK + local claude CLI, and ADWATCH_CANONICAL_FILE diffs against a local provider-config snapshot. 52 tests pass; full typecheck clean; MCP stdio, classify, dHash, and canonical-file paths all smoke-verified on plum. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
143 lines
5.2 KiB
TypeScript
143 lines
5.2 KiB
TypeScript
import { describe, expect, test } from 'bun:test';
|
|
import {
|
|
extractProfile,
|
|
ChallengeError,
|
|
htmlToText,
|
|
extractRates,
|
|
extractImages,
|
|
detectAgeGate,
|
|
} from '../src/extract.js';
|
|
|
|
const ctx = { platform: 'tryst', url: 'https://example.test/p', via: 'direct' as const, fetchedAt: '2026-06-26T00:00:00.000Z' };
|
|
|
|
const PROFILE_HTML = `<!doctype html><html><head>
|
|
<title>Quinn — Manhattan Companion</title>
|
|
<meta property="og:title" content="Quinn — Don't fall in love!">
|
|
<meta property="og:description" content="Up-and-coming companion touring this year.">
|
|
<meta property="og:image" content="https://cdn.example.test/quinn/cover.jpg">
|
|
<script type="application/ld+json">
|
|
{"@type":"Person","name":"Quinn","description":"Up-and-coming companion touring this year."}
|
|
</script>
|
|
</head><body>
|
|
<img src="/img/logo.png" alt="site logo">
|
|
<section class="rates">
|
|
<h3>Incall</h3>
|
|
<p>1 hour — $1,000</p>
|
|
<p>2 hours — $1,800</p>
|
|
<h3>Outcall</h3>
|
|
<p>1 hour — $1,200</p>
|
|
</section>
|
|
<section class="gallery">
|
|
<img src="https://cdn.example.test/quinn/cover.jpg" alt="cover">
|
|
<img src="/quinn/photo-2.jpg" alt="second">
|
|
<img data-src="https://cdn.example.test/quinn/photo-3.webp">
|
|
</section>
|
|
<section class="tour">
|
|
<p>Denver, CO · Jul 1</p>
|
|
<p>Chicago, IL · Aug 10</p>
|
|
</section>
|
|
<a href="https://onlyfans.com/transquinnftw">OnlyFans</a>
|
|
<a href="https://twitter.com/transquinnftw">Twitter</a>
|
|
<p>Text me: (424) 466-3669</p>
|
|
</body></html>`;
|
|
|
|
const CHALLENGE_HTML = `<!doctype html><html><head><title>Just a moment...</title></head>
|
|
<body><div class="cf-browser-verification">Checking your browser before accessing.</div></body></html>`;
|
|
|
|
const AGE_GATE_HTML = `<!doctype html><html><head><title>Adults Only</title></head>
|
|
<body><h1>Age Verification</h1><p>You must be 18 or older to enter.</p>
|
|
<button>I am 18 or older</button></body></html>`;
|
|
|
|
describe('extractProfile', () => {
|
|
const p = extractProfile(PROFILE_HTML, ctx);
|
|
|
|
test('pulls name from JSON-LD Person', () => {
|
|
expect(p.name).toBe('Quinn');
|
|
});
|
|
|
|
test('derives tagline from og:title minus name', () => {
|
|
expect(p.tagline?.toLowerCase()).toContain("don't fall in love");
|
|
});
|
|
|
|
test('extracts labelled incall/outcall rates', () => {
|
|
const incall1h = p.rates.find((r) => r.service === 'incall' && r.price === 1000);
|
|
const outcall1h = p.rates.find((r) => r.service === 'outcall' && r.price === 1200);
|
|
expect(incall1h).toBeDefined();
|
|
expect(outcall1h).toBeDefined();
|
|
expect(p.rates.find((r) => r.price === 1800)).toBeDefined();
|
|
});
|
|
|
|
test('extracts City, ST tour stops', () => {
|
|
const cities = p.tour.map((t) => t.city);
|
|
expect(cities).toContain('Denver');
|
|
expect(cities).toContain('Chicago');
|
|
});
|
|
|
|
test('extracts phone and socials', () => {
|
|
expect(p.phone).toBeDefined();
|
|
expect(p.socials['onlyfans']).toBe('transquinnftw');
|
|
expect(p.socials['twitter']).toBe('transquinnftw');
|
|
});
|
|
|
|
test('records jsonLd count', () => {
|
|
expect(p.raw.jsonLdCount).toBe(1);
|
|
});
|
|
|
|
test('extracts ordered images with og:image as thumbnail', () => {
|
|
expect(p.images.length).toBeGreaterThanOrEqual(3);
|
|
const thumb = p.images.find((i) => i.thumbnail);
|
|
expect(thumb?.src).toBe('https://cdn.example.test/quinn/cover.jpg');
|
|
expect(thumb?.order).toBe(0);
|
|
// order is preserved and contiguous
|
|
expect(p.images.map((i) => i.order)).toEqual([...p.images.keys()]);
|
|
});
|
|
|
|
test('resolves relative image srcs and skips the logo', () => {
|
|
const srcs = p.images.map((i) => i.src);
|
|
expect(srcs).toContain('https://example.test/quinn/photo-2.jpg');
|
|
expect(srcs).toContain('https://cdn.example.test/quinn/photo-3.webp');
|
|
expect(srcs.some((s) => s.includes('logo'))).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('age-gate detection', () => {
|
|
test('detectAgeGate flags an 18+ interstitial', () => {
|
|
expect(detectAgeGate(AGE_GATE_HTML, 'Adults Only')).toBe(true);
|
|
expect(detectAgeGate(PROFILE_HTML, 'Quinn')).toBe(false);
|
|
});
|
|
|
|
test('extractProfile warns (does not throw) on an age gate', () => {
|
|
const ag = extractProfile(AGE_GATE_HTML, ctx);
|
|
expect(ag.warnings.some((w) => w.startsWith('age-gate'))).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('extractImages', () => {
|
|
test('dedupes by resolved URL and orders from zero', () => {
|
|
const html = `<img src="https://x.test/a.jpg"><img src="https://x.test/a.jpg"><img src="/b.png">`;
|
|
const imgs = extractImages(html, 'https://x.test/p');
|
|
expect(imgs.map((i) => i.src)).toEqual(['https://x.test/a.jpg', 'https://x.test/b.png']);
|
|
expect(imgs[0]!.thumbnail).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('challenge detection', () => {
|
|
test('throws ChallengeError on a Cloudflare interstitial', () => {
|
|
expect(() => extractProfile(CHALLENGE_HTML, ctx)).toThrow(ChallengeError);
|
|
});
|
|
});
|
|
|
|
describe('helpers', () => {
|
|
test('htmlToText strips scripts and tags', () => {
|
|
const t = htmlToText('<div>Hi<script>evil()</script> <b>there</b></div>');
|
|
expect(t).toContain('Hi');
|
|
expect(t).toContain('there');
|
|
expect(t).not.toContain('evil');
|
|
});
|
|
|
|
test('extractRates ignores sub-$30 dollar mentions', () => {
|
|
const rates = extractRates('Coffee is $5. 1 hour $1000.');
|
|
expect(rates.some((r) => r.price === 5)).toBe(false);
|
|
expect(rates.some((r) => r.price === 1000)).toBe(true);
|
|
});
|
|
});
|