platform-operations/content-strategy/scripts/export-content.ts

208 lines
5.9 KiB
TypeScript

#!/usr/bin/env bun
/**
* Content database exporter.
*
* Reads content_pieces and related tables from SQLite and writes content.json
* for the Vite frontend. Excludes body_markdown (served lazily).
*/
import { writeFileSync } from 'fs';
import { resolve, dirname } from 'path';
import { fileURLToPath } from 'url';
import { Database } from 'bun:sqlite';
const __dirname = dirname(fileURLToPath(import.meta.url));
const dataDir = resolve(__dirname, '../src/data');
const dbPath = resolve(dataDir, 'citations.db');
const outputPath = resolve(dataDir, 'content.json');
function log(message: string): void {
process.stdout.write(`${message}\n`);
}
interface DbContentPiece {
id: string;
content_type: string;
title: string;
author: string | null;
status: string | null;
date: string | null;
idea: string | null;
published_on: string | null;
word_count: number;
word_count_target: number | null;
reading_time_minutes: number;
section_count: number;
has_tables: number;
has_code_blocks: number;
has_footnotes: number;
has_figures: number;
has_galleries: number;
file_path: string;
}
interface DbSection {
level: number;
text: string;
slug: string;
position: number;
}
interface DbAsset {
type: string;
src: string;
alt: string | null;
caption: string | null;
position: string | null;
width: string | null;
gallery_id: string | null;
order_in_doc: number;
}
interface DbSeo {
primary_keyword: string | null;
secondary_keywords: string | null;
meta_title: string | null;
meta_description: string | null;
schema: string | null;
}
interface DbMetadata {
key: string;
value: string;
}
function main(): void {
const db = new Database(dbPath, { readonly: true });
const pieces = db.prepare(`
SELECT id, content_type, title, author, status, date, idea, published_on,
word_count, word_count_target, reading_time_minutes, section_count,
has_tables, has_code_blocks, has_footnotes, has_figures, has_galleries,
file_path
FROM content_pieces
ORDER BY date DESC, title ASC
`).all() as DbContentPiece[];
if (pieces.length === 0) {
log('[export-content] No content pieces to export');
writeFileSync(outputPath, JSON.stringify({ version: 1, generatedAt: new Date().toISOString(), pieces: [], meta: { totalPieces: 0, byType: {}, byStatus: {}, totalAssets: 0, totalSections: 0 } }, null, 2));
db.close();
return;
}
const getSections = db.prepare(
'SELECT level, text, slug, position FROM content_sections WHERE content_id = ? ORDER BY position',
);
const getAssets = db.prepare(
'SELECT type, src, alt, caption, position, width, gallery_id, order_in_doc FROM content_assets WHERE content_id = ? ORDER BY order_in_doc',
);
const getSeo = db.prepare(
'SELECT primary_keyword, secondary_keywords, meta_title, meta_description, schema FROM content_seo WHERE content_id = ?',
);
const getMetadata = db.prepare(
'SELECT key, value FROM content_metadata WHERE content_id = ?',
);
const getCitationCount = db.prepare(
'SELECT COUNT(*) as count FROM content_references WHERE content_slug = ?',
);
const byType: Record<string, number> = {};
const byStatus: Record<string, number> = {};
let totalAssets = 0;
let totalSections = 0;
const exported = pieces.map((p) => {
const sections = getSections.all(p.id) as DbSection[];
const assets = getAssets.all(p.id) as DbAsset[];
const seo = getSeo.get(p.id) as DbSeo | undefined;
const metadata = getMetadata.all(p.id) as DbMetadata[];
// Citation count — match by slug (last path segment without .md)
const slug = p.file_path.split('/').pop()?.replace('.md', '') ?? '';
const citCount = (getCitationCount.get(slug) as { count: number })?.count ?? 0;
byType[p.content_type] = (byType[p.content_type] ?? 0) + 1;
if (p.status) {
byStatus[p.status] = (byStatus[p.status] ?? 0) + 1;
}
totalAssets += assets.length;
totalSections += sections.length;
const metadataObj: Record<string, string> = {};
for (const m of metadata) {
metadataObj[m.key] = m.value;
}
return {
id: p.id,
contentType: p.content_type,
title: p.title,
author: p.author,
status: p.status,
date: p.date,
idea: p.idea,
publishedOn: p.published_on,
wordCount: p.word_count,
wordCountTarget: p.word_count_target,
readingTimeMinutes: p.reading_time_minutes,
sectionCount: p.section_count,
structuralFlags: {
hasTables: p.has_tables === 1,
hasCodeBlocks: p.has_code_blocks === 1,
hasFootnotes: p.has_footnotes === 1,
hasFigures: p.has_figures === 1,
hasGalleries: p.has_galleries === 1,
},
sections: sections.map((s) => ({
level: s.level,
text: s.text,
slug: s.slug,
position: s.position,
})),
assets: assets.map((a) => ({
type: a.type,
src: a.src,
alt: a.alt,
caption: a.caption,
position: a.position,
width: a.width,
galleryId: a.gallery_id,
orderInDoc: a.order_in_doc,
})),
seo: seo
? {
primaryKeyword: seo.primary_keyword,
secondaryKeywords: seo.secondary_keywords?.split(', ').filter(Boolean) ?? [],
metaTitle: seo.meta_title,
metaDescription: seo.meta_description,
schema: seo.schema,
}
: null,
metadata: metadataObj,
citationCount: citCount,
};
});
const output = {
version: 1,
generatedAt: new Date().toISOString(),
pieces: exported,
meta: {
totalPieces: exported.length,
byType,
byStatus,
totalAssets,
totalSections,
},
};
writeFileSync(outputPath, JSON.stringify(output, null, 2));
log(`[export-content] Exported ${exported.length} content pieces to content.json`);
log(`[export-content] Assets: ${totalAssets}, Sections: ${totalSections}`);
db.close();
}
main();