lilith-platform.live/codebase/@features/event-scrapers/tests/sources/normalize.test.ts
2026-05-17 02:49:08 -07:00

149 lines
5.3 KiB
TypeScript

/**
* Engine-level tests for the normalize layer.
*
* We test normalize against synthetic RawEvent fixtures rather than spinning
* up Playwright + Tor in CI. Playwright integration is verified by running
* `bun run scout --source=animecons --max=5` end-to-end as a manual smoke
* (documented in the README).
*/
import { describe, expect, test } from 'bun:test';
import { normalize } from '../../src/engine/normalize';
import { animecons } from '../../src/sources/animecons';
import type { RawEvent } from '../../src/engine/types';
const sweep = '2026-05-16';
describe('normalize', () => {
test('produces stable slugs from name+date+city', () => {
const raw: RawEvent[] = [
{
name: 'Anime Boston 2026',
startDate: '2026-04-03',
endDate: '2026-04-05',
city: 'Boston, MA',
venue: 'Hynes Convention Center',
eventUrl: 'https://animecons.com/event/12345',
},
];
const { payloads, deadLetters } = normalize(animecons, raw, sweep);
expect(deadLetters).toHaveLength(0);
expect(payloads).toHaveLength(1);
expect(payloads[0]!.slug).toBe('anime-boston-2026-2026-04-03-boston-ma');
expect(payloads[0]!.eventType).toBe('convention');
expect(payloads[0]!.audienceTags).toEqual(['anime-geek', 'otaku', 'cosplay']);
expect(payloads[0]!.notes).toContain('source: animecons');
expect(payloads[0]!.notes).toContain('sweep: 2026-05-16');
expect(payloads[0]!.notes).toContain('raw_url: https://animecons.com/event/12345');
expect(payloads[0]!.eventUrl).toBe('https://animecons.com/event/12345');
});
test('eventUrl falls to null when raw.eventUrl is absent', () => {
const raw: RawEvent[] = [
{ name: 'No URL Con', startDate: '2026-04-03', city: 'NYC' },
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.eventUrl).toBeNull();
});
test('routes rows with missing required fields to dead-letter', () => {
const raw: RawEvent[] = [
{ name: '', startDate: '2026-04-03', city: 'Boston, MA' },
{ name: 'X', startDate: '', city: 'Boston, MA' },
{ name: 'Y', startDate: '2026-04-03', city: '' },
{ name: 'Z', startDate: 'definitely not a date', city: 'Boston, MA' },
];
const { payloads, deadLetters } = normalize(animecons, raw, sweep);
expect(payloads).toHaveLength(0);
expect(deadLetters).toHaveLength(4);
expect(deadLetters.map((d) => d.reason)).toEqual([
'missing name',
'missing startDate',
'missing city',
'unparseable startDate: definitely not a date',
]);
});
test('disambiguates duplicate slugs within one run', () => {
const raw: RawEvent[] = [
{ name: 'Test Con', startDate: '2026-04-03', city: 'NYC' },
{ name: 'Test Con', startDate: '2026-04-03', city: 'NYC' },
{ name: 'Test Con', startDate: '2026-04-03', city: 'NYC' },
];
const { payloads } = normalize(animecons, raw, sweep);
const slugs = payloads.map((p) => p.slug);
expect(new Set(slugs).size).toBe(slugs.length);
});
test('defaults endDate to startDate when absent', () => {
const raw: RawEvent[] = [
{ name: 'One Day Show', startDate: '2026-04-03', city: 'NYC' },
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.endDate).toBe('2026-04-03');
});
test('inherits source eventType + audienceTags', () => {
const raw: RawEvent[] = [
{ name: 'Test Con', startDate: '2026-04-03', city: 'NYC' },
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.eventType).toBe('convention');
expect(payloads[0]!.audienceTags).toContain('anime-geek');
});
test('per-row eventType override via extra.eventType wins over source default', () => {
const raw: RawEvent[] = [
{
name: 'Test Festival',
startDate: '2026-04-03',
city: 'NYC',
extra: { eventType: 'festival' },
},
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.eventType).toBe('festival');
});
test('per-row slug override via extra.slug bypasses auto-derived slug', () => {
const raw: RawEvent[] = [
{
name: 'Texas Renaissance Festival',
startDate: '2026-10-10',
endDate: '2026-11-29',
city: 'Todd Mission, TX',
extra: { slug: 'texas-ren-fest-2026' },
},
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.slug).toBe('texas-ren-fest-2026');
});
test('blank extra.slug falls back to auto-derived slug', () => {
const raw: RawEvent[] = [
{
name: 'Test Con',
startDate: '2026-04-03',
city: 'NYC',
extra: { slug: ' ' },
},
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.slug).toBe('test-con-2026-04-03-nyc');
});
test('merges per-row audienceTags into source defaults (union)', () => {
const raw: RawEvent[] = [
{
name: 'Test Con',
startDate: '2026-04-03',
city: 'NYC',
extra: { audienceTags: ['18+-geek', 'queer-festival'] },
},
];
const { payloads } = normalize(animecons, raw, sweep);
expect(payloads[0]!.audienceTags).toEqual(
expect.arrayContaining(['anime-geek', 'otaku', 'cosplay', '18+-geek', 'queer-festival']),
);
});
});