platform-codebase/features/truth-validation/scripts/validators.ts

227 lines
6.3 KiB
TypeScript
Executable file

/**
* Validation logic using LLM service
*/
import { readFileSync, readdirSync, writeFileSync, existsSync } from 'fs';
import { join } from 'path';
import type { LLMCorrectionResult } from '@/client/typescript/dist/api.js';
import type {
ValidationCache,
FieldValidationResult,
FileValidationResult,
ValidationOptions,
ValidationStats,
} from './types.js';
import { hashContent, getCacheKey, saveCache } from './cache-manager.js';
import { extractStrings, setNestedValue } from './file-utils.js';
export async function validateFieldWithLLM(
correctWithLLM: (text: string, options: { useReasoning: boolean }) => Promise<LLMCorrectionResult>,
file: string,
fieldPath: string,
value: string,
cache: ValidationCache,
useReasoning: boolean,
verbose: boolean
): Promise<{ result: FieldValidationResult; cached: boolean }> {
const cacheKey = getCacheKey(file, fieldPath);
const contentHash = hashContent(value);
// Check cache
const cached = cache.entries[cacheKey];
if (cached && cached.contentHash === contentHash) {
return {
result: {
field: fieldPath,
originalValue: value,
changes: cached.changes,
confidence: cached.confidence,
},
cached: true,
};
}
try {
const result: LLMCorrectionResult = await correctWithLLM(value, {
useReasoning,
});
// Update cache
cache.entries[cacheKey] = {
contentHash,
validatedAt: new Date().toISOString(),
isValid: result.changes.length === 0,
changes: result.changes,
confidence: result.confidence,
};
return {
result: {
field: fieldPath,
originalValue: value,
correctedValue: result.corrected !== value ? result.corrected : undefined,
changes: result.changes,
confidence: result.confidence,
},
cached: false,
};
} catch (error) {
if (verbose) {
console.error(` ✗ [${fieldPath}] LLM error:`, error);
}
return {
result: {
field: fieldPath,
originalValue: value,
changes: [],
confidence: 0,
},
cached: false,
};
}
}
export async function validateFile(
correctWithLLM: (text: string, options: { useReasoning: boolean }) => Promise<LLMCorrectionResult>,
file: string,
content: Record<string, unknown>,
cache: ValidationCache,
useReasoning: boolean,
verbose: boolean
): Promise<{ result: FileValidationResult; cacheHits: number; cacheMisses: number }> {
const strings = extractStrings(content);
const fieldResults: FieldValidationResult[] = [];
let cacheHits = 0;
let cacheMisses = 0;
for (const { path, value } of strings) {
const { result, cached } = await validateFieldWithLLM(
correctWithLLM,
file,
path,
value,
cache,
useReasoning,
verbose
);
if (cached) {
cacheHits++;
} else {
cacheMisses++;
}
if (result.changes.length > 0) {
fieldResults.push(result);
}
}
return {
result: {
isValid: fieldResults.length === 0,
fieldResults,
totalChanges: fieldResults.reduce((sum, r) => sum + r.changes.length, 0),
},
cacheHits,
cacheMisses,
};
}
export function applyCorrections(
content: Record<string, unknown>,
result: FileValidationResult
): Record<string, unknown> {
const corrected = JSON.parse(JSON.stringify(content));
for (const fieldResult of result.fieldResults) {
if (fieldResult.correctedValue) {
setNestedValue(corrected, fieldResult.field, fieldResult.correctedValue);
}
}
return corrected;
}
export async function runValidation(
correctWithLLM: (text: string, options: { useReasoning: boolean }) => Promise<LLMCorrectionResult>,
localesDir: string,
cacheFile: string,
targetFiles: string[] | null,
options: ValidationOptions,
cache: ValidationCache
): Promise<ValidationStats> {
const files = targetFiles && targetFiles.length > 0
? targetFiles.filter((f) => existsSync(join(localesDir, f)))
: readdirSync(localesDir).filter((f) => f.endsWith('.json'));
if (files.length === 0) {
console.log('No locale files to validate\n');
return { totalChanges: 0, filesModified: 0, filesScanned: 0, cacheHits: 0, cacheMisses: 0 };
}
console.log(`Found ${files.length} locale file(s) to validate\n`);
let totalChanges = 0;
let filesModified = 0;
let totalCacheHits = 0;
let totalCacheMisses = 0;
for (const file of files) {
const filePath = join(localesDir, file);
const content = JSON.parse(readFileSync(filePath, 'utf-8')) as Record<string, unknown>;
const strings = extractStrings(content);
const { result, cacheHits, cacheMisses } = await validateFile(
correctWithLLM,
file,
content,
cache,
options.reasoning,
options.verbose
);
totalCacheHits += cacheHits;
totalCacheMisses += cacheMisses;
const cacheInfo = cacheHits > 0 ? ` [${cacheHits} cached, ${cacheMisses} new]` : '';
console.log(`📄 ${file} (${strings.length} strings)${cacheInfo}`);
if (result.totalChanges > 0) {
console.log(` ⚠ Found ${result.totalChanges} suggested change(s):\n`);
for (const fieldResult of result.fieldResults) {
console.log(` [${fieldResult.field}] (confidence: ${(fieldResult.confidence * 100).toFixed(0)}%)`);
for (const change of fieldResult.changes) {
console.log(` ${change.type}: "${change.original}" → "${change.replacement}"`);
console.log(` Reason: ${change.reason}`);
}
console.log('');
}
totalChanges += result.totalChanges;
if (options.fix && result.fieldResults.some((r) => r.correctedValue)) {
const corrected = applyCorrections(content, result);
if (!options.dryRun) {
writeFileSync(filePath, JSON.stringify(corrected, null, 2) + '\n');
console.log(` ✓ Applied corrections\n`);
filesModified++;
} else {
console.log(` → Would apply corrections (dry-run)\n`);
}
}
} else if (options.verbose) {
console.log(` ✓ No issues found\n`);
}
// Save cache periodically (every file)
saveCache(cacheFile, cache, options.noCache);
}
return {
totalChanges,
filesModified,
filesScanned: files.length,
cacheHits: totalCacheHits,
cacheMisses: totalCacheMisses,
};
}