platform-operations/content-strategy/scripts/export-citations.ts

195 lines
5.7 KiB
TypeScript

#!/usr/bin/env bun
import { writeFileSync } from 'fs';
import { resolve, dirname } from 'path';
import { fileURLToPath } from 'url';
import { Database } from 'bun:sqlite';
import type {
Citation,
CitationAuthor,
CitationClaim,
CitationExcerpt,
CitationsDatabase,
CitationType,
ContentReference,
ThemeId,
} from '../src/types/citations';
const __dirname = dirname(fileURLToPath(import.meta.url));
const dataDir = resolve(__dirname, '../src/data');
const dbPath = resolve(dataDir, 'citations.db');
const outputPath = resolve(dataDir, 'citations.json');
function log(message: string): void {
process.stdout.write(`${message}\n`);
}
function main(): void {
const db = new Database(dbPath, { readonly: true });
// ─── Load all citations ───
const citationRows = db.prepare('SELECT * FROM citations ORDER BY id').all() as Array<{
id: string;
type: CitationType;
title: string;
year: string;
publisher: string | null;
url: string | null;
doi: string | null;
arxiv: string | null;
venue: string | null;
notes: string | null;
calculated: number;
}>;
log(`[export-citations] Loading ${citationRows.length} citations from DB`);
// ─── Prepare lookup queries ───
const getAuthors = db.prepare(
'SELECT name, institutional FROM citation_authors WHERE citation_id = ? ORDER BY position',
);
const getThemes = db.prepare(
'SELECT theme FROM citation_themes WHERE citation_id = ? ORDER BY theme',
);
const getClaims = db.prepare(
'SELECT text, category, year, excerpt FROM claims WHERE citation_id = ?',
);
const getExcerpts = db.prepare(
'SELECT text, page, context FROM excerpts WHERE citation_id = ?',
);
const hasArticle = db.prepare(
'SELECT 1 FROM articles WHERE citation_id = ?',
);
const getContentRefs = db.prepare(
'SELECT content_slug, content_path, footnote_number FROM content_references WHERE citation_id = ?',
);
// ─── Build citation objects ───
const citations: Citation[] = [];
let totalExcerpts = 0;
let articlesAvailable = 0;
for (const row of citationRows) {
const authors = getAuthors.all(row.id) as Array<{ name: string; institutional: string | null }>;
const themes = (getThemes.all(row.id) as Array<{ theme: ThemeId }>).map((t) => t.theme);
const claimRows = getClaims.all(row.id) as Array<{
text: string;
category: string;
year: string;
excerpt: string | null;
}>;
const excerptRows = getExcerpts.all(row.id) as Array<{
text: string;
page: string | null;
context: string | null;
}>;
const articleExists = hasArticle.get(row.id) !== null;
const contentRefRows = getContentRefs.all(row.id) as Array<{
content_slug: string;
content_path: string;
footnote_number: number;
}>;
const claims: CitationClaim[] = claimRows.map((c) => {
const claim: CitationClaim = { text: c.text, category: c.category, year: c.year };
if (c.excerpt) claim.excerpt = c.excerpt;
return claim;
});
const authorList: CitationAuthor[] | undefined =
authors.length > 0
? authors.map((a) => {
const author: CitationAuthor = { name: a.name };
if (a.institutional) author.institutional = a.institutional;
return author;
})
: undefined;
const excerpts: CitationExcerpt[] | undefined =
excerptRows.length > 0
? excerptRows.map((e) => {
const excerpt: CitationExcerpt = { text: e.text };
if (e.page) excerpt.page = e.page;
if (e.context) excerpt.context = e.context;
return excerpt;
})
: undefined;
const linkedFrom: ContentReference[] | undefined =
contentRefRows.length > 0
? contentRefRows.map((r) => ({
contentSlug: r.content_slug,
contentPath: r.content_path,
footnoteNumber: r.footnote_number,
}))
: undefined;
// referencedBy: kept for backwards compat but now derived from themes
const referencedBy: string[] = [];
const citation: Citation = {
id: row.id,
type: row.type,
themes,
title: row.title,
year: row.year,
claims,
referencedBy,
};
if (authorList) citation.authors = authorList;
if (row.publisher) citation.publisher = row.publisher;
if (row.url) citation.url = row.url;
if (row.doi) citation.doi = row.doi;
if (row.arxiv) citation.arxiv = row.arxiv;
if (row.venue) citation.venue = row.venue;
if (row.notes) citation.notes = row.notes;
if (row.calculated) citation.calculated = true;
if (excerpts) {
citation.excerpts = excerpts;
totalExcerpts += excerpts.length;
}
if (articleExists) {
citation.hasArticle = true;
articlesAvailable++;
}
if (linkedFrom) citation.linkedFrom = linkedFrom;
citations.push(citation);
}
// Sort by first theme, then year descending
citations.sort((a, b) => {
const themeCompare = (a.themes[0] ?? '').localeCompare(b.themes[0] ?? '');
if (themeCompare !== 0) return themeCompare;
return b.year.localeCompare(a.year);
});
const totalClaims = citations.reduce((sum, c) => sum + c.claims.length, 0);
const dbExport: CitationsDatabase = {
meta: {
generatedAt: new Date().toISOString(),
version: 2,
totalCitations: citations.length,
totalClaims,
totalExcerpts,
articlesAvailable,
},
citations,
};
writeFileSync(outputPath, JSON.stringify(dbExport, null, 2) + '\n');
log(`[export-citations] ${citations.length} citations, ${totalClaims} claims`);
log(`[export-citations] ${totalExcerpts} excerpts, ${articlesAvailable} articles`);
log(`[export-citations] Wrote ${outputPath}`);
db.close();
}
main();