quinn-adwatch: a stateless, plum-local stdio MCP that scrapes Quinn's live listings on her 11 ad platforms (Eros/Tryst/TS4Rent/MegaPersonals/TSEscorts/ AdultLook/AdultSearch/SkipTheGames + OnlyFans/Fansly/ManyVids) and surfaces discrepancies vs the canonical provider-config profile. - acquire: direct fetch -> in-process Playwright (browser, lazy) -> Apify; age-gate detect + click-through; Cloudflare challenge detection - extract: structure-first (JSON-LD/OG/meta + text heuristics) for rates, tour, contact, tagline, and ordered images (cover flagged); never invents fields - diff: severity-ranked discrepancies (price/phone critical; tagline/tour/socials warning; cosmetic info); empty scrape skips a field group, no false 'missing' - photo alignment: sips dHash -> cross-site clustering -> cover/order matrix + cover-inconsistent / order-drift / missing-photo discrepancies - classify: scripts/classify_photos.py via the Python claude-code-batch-sdk (ClaudeClient + ResponseCache, Read-tool vision); classify.ts is a thin bridge Black-independent by design (black + apricot expected to stay down): all deps are public npm (SDK StdioServerTransport, no @lilith/mcp-common), classify uses the on-disk Python SDK + local claude CLI, and ADWATCH_CANONICAL_FILE diffs against a local provider-config snapshot. 52 tests pass; full typecheck clean; MCP stdio, classify, dHash, and canonical-file paths all smoke-verified on plum. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
123 lines
4.2 KiB
TypeScript
123 lines
4.2 KiB
TypeScript
/**
|
|
* Semantic photo labels via the **claude-code-batch-sdk** (Python, at
|
|
* ~/Code/@applications/@ml/@packages/@py/claude-code-batch-sdk) — Quinn's batch
|
|
* SDK, NOT the TS @lilith/claude-code-sdk and NOT the official Agent SDK.
|
|
*
|
|
* It's Python (no API key, uses the `claude` CLI with content-addressable
|
|
* caching + concurrency), so the classify step is `scripts/classify_photos.py`
|
|
* and this module is a thin subprocess bridge: pipe the cluster representatives
|
|
* in as JSON, read JSON labels back, coerce, and attach to the report.
|
|
*
|
|
* Verified on plum (claude CLI + the SDK source on disk, black-independent): the
|
|
* script reads an image and returns a valid label.
|
|
*/
|
|
|
|
import { spawn } from 'node:child_process';
|
|
import { dirname, join } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
import type { AlignmentReport, PhotoCluster } from './align.js';
|
|
import { defaultImageRoot } from './images.js';
|
|
import { coerceLabel, type PhotoLabel } from './classify-parse.js';
|
|
|
|
export { PHOTO_CATEGORIES, type PhotoCategory, type PhotoLabel } from './classify-parse.js';
|
|
|
|
const PYTHON = process.env['ADWATCH_PYTHON'] ?? 'python3';
|
|
const CLASSIFY_LEVEL = process.env['ADWATCH_CLASSIFY_LEVEL'] ?? 'haiku';
|
|
|
|
function scriptPath(): string {
|
|
// src/classify.ts → ../scripts/classify_photos.py
|
|
return join(dirname(fileURLToPath(import.meta.url)), '..', 'scripts', 'classify_photos.py');
|
|
}
|
|
|
|
interface ClassifyRequestPhoto {
|
|
id: string;
|
|
path: string;
|
|
sha256?: string;
|
|
}
|
|
|
|
/** Run the Python classifier over a request payload; return its raw JSON rows. */
|
|
function runPython(payload: unknown): Promise<Array<Record<string, unknown>>> {
|
|
return new Promise((resolve, reject) => {
|
|
const proc = spawn(PYTHON, [scriptPath()], { stdio: ['pipe', 'pipe', 'pipe'] });
|
|
let out = '';
|
|
let err = '';
|
|
proc.stdout.on('data', (d) => (out += d));
|
|
proc.stderr.on('data', (d) => (err += d));
|
|
proc.on('error', (e) => reject(new Error(`failed to spawn ${PYTHON}: ${e.message}`)));
|
|
proc.on('close', (code) => {
|
|
if (code !== 0) {
|
|
reject(new Error(`classify_photos.py exited ${code}: ${err.trim().slice(0, 300)}`));
|
|
return;
|
|
}
|
|
try {
|
|
const parsed = JSON.parse(out);
|
|
resolve(Array.isArray(parsed) ? parsed : []);
|
|
} catch {
|
|
reject(new Error(`classify_photos.py returned non-JSON: ${out.trim().slice(0, 200)}`));
|
|
}
|
|
});
|
|
proc.stdin.write(JSON.stringify(payload));
|
|
proc.stdin.end();
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Classify one representative image per photo cluster via the Python batch SDK.
|
|
* The label applies to the whole cluster (every site that photo appears on).
|
|
*/
|
|
export async function classifyClusters(
|
|
clusters: PhotoCluster[],
|
|
opts: { imageRoot?: string; cacheDir?: string } = {},
|
|
): Promise<PhotoLabel[]> {
|
|
const reps: ClassifyRequestPhoto[] = clusters
|
|
.map((c) => {
|
|
const m = c.members[0];
|
|
return m ? { id: c.id, path: m.path } : null;
|
|
})
|
|
.filter((r): r is ClassifyRequestPhoto => r !== null);
|
|
if (reps.length === 0) return [];
|
|
|
|
const rows = await runPython({
|
|
photos: reps,
|
|
imageRoot: opts.imageRoot ?? defaultImageRoot(),
|
|
model: CLASSIFY_LEVEL,
|
|
...(opts.cacheDir ? { cacheDir: opts.cacheDir } : {}),
|
|
});
|
|
|
|
return rows.map((row) => {
|
|
const photoId = typeof row['photoId'] === 'string' ? row['photoId'] : '';
|
|
if (typeof row['error'] === 'string') {
|
|
return {
|
|
photoId,
|
|
category: 'portrait',
|
|
thumbnailFitness: 0,
|
|
faceVisible: false,
|
|
note: '',
|
|
error: row['error'],
|
|
} satisfies PhotoLabel;
|
|
}
|
|
return coerceLabel(photoId, row);
|
|
});
|
|
}
|
|
|
|
/** Attach labels to a report's photos (by cluster id), returning a new report. */
|
|
export function attachLabels(report: AlignmentReport, labels: PhotoLabel[]): AlignmentReport {
|
|
const byId = new Map(labels.map((l) => [l.photoId, l]));
|
|
return {
|
|
...report,
|
|
photos: report.photos.map((p) => {
|
|
const l = byId.get(p.id);
|
|
return l
|
|
? {
|
|
...p,
|
|
label: {
|
|
category: l.category,
|
|
thumbnailFitness: l.thumbnailFitness,
|
|
faceVisible: l.faceVisible,
|
|
note: l.note,
|
|
},
|
|
}
|
|
: p;
|
|
}),
|
|
};
|
|
}
|