lilith-platform.live/codebase/@features/ad-watch/src/classify.ts

/**
 * Semantic photo labels via the **claude-code-batch-sdk** (Python, at
 * ~/Code/@applications/@ml/@packages/@py/claude-code-batch-sdk) — Quinn's batch
 * SDK, NOT the TS @lilith/claude-code-sdk and NOT the official Agent SDK.
 *
 * It's Python (no API key, uses the `claude` CLI with content-addressable
 * caching + concurrency), so the classify step is `scripts/classify_photos.py`
 * and this module is a thin subprocess bridge: pipe the cluster representatives
 * in as JSON, read JSON labels back, coerce, and attach to the report.
 *
 * Verified on plum (claude CLI + the SDK source on disk, black-independent): the
 * script reads an image and returns a valid label.
 */

import { spawn } from 'node:child_process';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import type { AlignmentReport, PhotoCluster } from './align.js';
import { defaultImageRoot } from './images.js';
import { coerceLabel, type PhotoLabel } from './classify-parse.js';

export { PHOTO_CATEGORIES, type PhotoCategory, type PhotoLabel } from './classify-parse.js';

const PYTHON = process.env['ADWATCH_PYTHON'] ?? 'python3';
const CLASSIFY_LEVEL = process.env['ADWATCH_CLASSIFY_LEVEL'] ?? 'haiku';

function scriptPath(): string {
  // src/classify.ts → ../scripts/classify_photos.py
  return join(dirname(fileURLToPath(import.meta.url)), '..', 'scripts', 'classify_photos.py');
}

interface ClassifyRequestPhoto {
  id: string;
  path: string;
  sha256?: string;
}

/** Run the Python classifier over a request payload; return its raw JSON rows. */
function runPython(payload: unknown): Promise<Array<Record<string, unknown>>> {
  return new Promise((resolve, reject) => {
    const proc = spawn(PYTHON, [scriptPath()], { stdio: ['pipe', 'pipe', 'pipe'] });
    let out = '';
    let err = '';
    proc.stdout.on('data', (d) => (out += d));
    proc.stderr.on('data', (d) => (err += d));
    proc.on('error', (e) => reject(new Error(`failed to spawn ${PYTHON}: ${e.message}`)));
    proc.on('close', (code) => {
      if (code !== 0) {
        reject(new Error(`classify_photos.py exited ${code}: ${err.trim().slice(0, 300)}`));
        return;
      }
      try {
        const parsed = JSON.parse(out);
        resolve(Array.isArray(parsed) ? parsed : []);
      } catch {
        reject(new Error(`classify_photos.py returned non-JSON: ${out.trim().slice(0, 200)}`));
      }
    });
    proc.stdin.write(JSON.stringify(payload));
    proc.stdin.end();
  });
}

/**
 * Classify one representative image per photo cluster via the Python batch SDK.
 * The label applies to the whole cluster (every site that photo appears on).
 */
export async function classifyClusters(
  clusters: PhotoCluster[],
  opts: { imageRoot?: string; cacheDir?: string } = {},
): Promise<PhotoLabel[]> {
  const reps: ClassifyRequestPhoto[] = clusters
    .map((c) => {
      const m = c.members[0];
      return m ? { id: c.id, path: m.path } : null;
    })
    .filter((r): r is ClassifyRequestPhoto => r !== null);
  if (reps.length === 0) return [];

  const rows = await runPython({
    photos: reps,
    imageRoot: opts.imageRoot ?? defaultImageRoot(),
    model: CLASSIFY_LEVEL,
    ...(opts.cacheDir ? { cacheDir: opts.cacheDir } : {}),
  });

  return rows.map((row) => {
    const photoId = typeof row['photoId'] === 'string' ? row['photoId'] : '';
    if (typeof row['error'] === 'string') {
      return {
        photoId,
        category: 'portrait',
        thumbnailFitness: 0,
        faceVisible: false,
        note: '',
        error: row['error'],
      } satisfies PhotoLabel;
    }
    return coerceLabel(photoId, row);
  });
}

/** Attach labels to a report's photos (by cluster id), returning a new report. */
export function attachLabels(report: AlignmentReport, labels: PhotoLabel[]): AlignmentReport {
  const byId = new Map(labels.map((l) => [l.photoId, l]));
  return {
    ...report,
    photos: report.photos.map((p) => {
      const l = byId.get(p.id);
      return l
        ? {
            ...p,
            label: {
              category: l.category,
              thumbnailFitness: l.thumbnailFitness,
              faceVisible: l.faceVisible,
              note: l.note,
            },
          }
        : p;
    }),
  };
}