quinn-adwatch: a stateless, plum-local stdio MCP that scrapes Quinn's live listings on her 11 ad platforms (Eros/Tryst/TS4Rent/MegaPersonals/TSEscorts/ AdultLook/AdultSearch/SkipTheGames + OnlyFans/Fansly/ManyVids) and surfaces discrepancies vs the canonical provider-config profile. - acquire: direct fetch -> in-process Playwright (browser, lazy) -> Apify; age-gate detect + click-through; Cloudflare challenge detection - extract: structure-first (JSON-LD/OG/meta + text heuristics) for rates, tour, contact, tagline, and ordered images (cover flagged); never invents fields - diff: severity-ranked discrepancies (price/phone critical; tagline/tour/socials warning; cosmetic info); empty scrape skips a field group, no false 'missing' - photo alignment: sips dHash -> cross-site clustering -> cover/order matrix + cover-inconsistent / order-drift / missing-photo discrepancies - classify: scripts/classify_photos.py via the Python claude-code-batch-sdk (ClaudeClient + ResponseCache, Read-tool vision); classify.ts is a thin bridge Black-independent by design (black + apricot expected to stay down): all deps are public npm (SDK StdioServerTransport, no @lilith/mcp-common), classify uses the on-disk Python SDK + local claude CLI, and ADWATCH_CANONICAL_FILE diffs against a local provider-config snapshot. 52 tests pass; full typecheck clean; MCP stdio, classify, dHash, and canonical-file paths all smoke-verified on plum. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
101 lines
3.6 KiB
TypeScript
101 lines
3.6 KiB
TypeScript
/**
|
||
* Perceptual hashing for cross-site photo identity.
|
||
*
|
||
* sha256 (in images.ts) only catches byte-exact dups; the SAME photo recompressed
|
||
* or resized on two platforms has a different sha256 but a near-identical
|
||
* **dHash**. dHash clustering by Hamming distance is what lets ad-watch say
|
||
* "the Tryst cover is photo-A, which sits at position 4 on Eros".
|
||
*
|
||
* Decode path uses macOS **sips** (house preference — no PIL, no extra deps):
|
||
* sips downscales to a 9×8 24-bit BMP, which we parse directly (uncompressed,
|
||
* trivial header) and reduce to a 64-bit difference hash.
|
||
*/
|
||
|
||
import { execFile } from 'node:child_process';
|
||
import { readFile, unlink } from 'node:fs/promises';
|
||
import { tmpdir } from 'node:os';
|
||
import { join } from 'node:path';
|
||
import { promisify } from 'node:util';
|
||
import { randomUUID } from 'node:crypto';
|
||
|
||
const exec = promisify(execFile);
|
||
|
||
// dHash works on a (W+1)×H grid; comparing horizontally adjacent columns yields
|
||
// W×H bits. 9×8 → 64 bits.
|
||
export const DHASH_W = 9;
|
||
export const DHASH_H = 8;
|
||
|
||
/**
|
||
* Parse a 24-bit uncompressed BMP into row-major (top-down) grayscale luminance.
|
||
* BMP rows are bottom-up and BGR, padded to 4-byte boundaries — normalized here.
|
||
*/
|
||
export function parseBmp24ToGray(buf: Buffer): { width: number; height: number; gray: number[] } {
|
||
if (buf.length < 54 || buf[0] !== 0x42 || buf[1] !== 0x4d) {
|
||
throw new Error('not a BMP');
|
||
}
|
||
const dataOffset = buf.readUInt32LE(10);
|
||
const width = buf.readInt32LE(18);
|
||
const rawHeight = buf.readInt32LE(22);
|
||
const bpp = buf.readUInt16LE(28);
|
||
if (bpp !== 24) throw new Error(`expected 24bpp BMP, got ${bpp}`);
|
||
const height = Math.abs(rawHeight);
|
||
const bottomUp = rawHeight > 0;
|
||
const rowBytes = width * 3;
|
||
const rowPadded = Math.ceil(rowBytes / 4) * 4;
|
||
|
||
const gray: number[] = new Array(width * height);
|
||
for (let row = 0; row < height; row++) {
|
||
const srcRow = bottomUp ? height - 1 - row : row; // normalize to top-down
|
||
const base = dataOffset + srcRow * rowPadded;
|
||
for (let x = 0; x < width; x++) {
|
||
const p = base + x * 3;
|
||
const b = buf[p]!;
|
||
const g = buf[p + 1]!;
|
||
const r = buf[p + 2]!;
|
||
gray[row * width + x] = 0.299 * r + 0.587 * g + 0.114 * b;
|
||
}
|
||
}
|
||
return { width, height, gray };
|
||
}
|
||
|
||
/** Difference hash: for each row, bit = (col < nextCol) brightness. */
|
||
export function dHashFromGray(gray: number[], width: number, height: number): bigint {
|
||
let hash = 0n;
|
||
for (let y = 0; y < height; y++) {
|
||
for (let x = 0; x < width - 1; x++) {
|
||
const left = gray[y * width + x]!;
|
||
const right = gray[y * width + x + 1]!;
|
||
hash = (hash << 1n) | (left < right ? 1n : 0n);
|
||
}
|
||
}
|
||
return hash;
|
||
}
|
||
|
||
export function dHashHex(hash: bigint): string {
|
||
return hash.toString(16).padStart(16, '0');
|
||
}
|
||
|
||
/** Hamming distance between two dHashes (number of differing bits). */
|
||
export function hamming(a: bigint, b: bigint): number {
|
||
let x = a ^ b;
|
||
let count = 0;
|
||
while (x > 0n) {
|
||
count += Number(x & 1n);
|
||
x >>= 1n;
|
||
}
|
||
return count;
|
||
}
|
||
|
||
/** dHash an image file via sips → 9×8 BMP → difference hash. */
|
||
export async function dHashOfFile(path: string): Promise<bigint> {
|
||
const tmp = join(tmpdir(), `adwatch-${randomUUID()}.bmp`);
|
||
try {
|
||
// -z <height> <width> resizes (note order); -s format bmp emits 24-bit BMP.
|
||
await exec('sips', ['-s', 'format', 'bmp', '-z', String(DHASH_H), String(DHASH_W), path, '--out', tmp]);
|
||
const buf = await readFile(tmp);
|
||
const { width, height, gray } = parseBmp24ToGray(buf);
|
||
return dHashFromGray(gray, width, height);
|
||
} finally {
|
||
await unlink(tmp).catch(() => undefined);
|
||
}
|
||
}
|