security(spellcheck/dictionaries): 🔒️ Validate unsafe dictionary loading operations with URL/file path checks and input size limits to prevent SSRF/XSS/DoS attacks

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Lilith 2026-02-27 14:09:09 -08:00
parent bde9b747b3
commit 6472d44147
10 changed files with 364 additions and 41 deletions

View file

@ -2,48 +2,70 @@
import { accessSync } from 'node:fs';
import { readFile } from 'node:fs/promises';
import { resolve, dirname } from 'node:path';
import { resolve, dirname, basename } from 'node:path';
import { fileURLToPath, pathToFileURL } from 'node:url';
import { SpellChecker } from '../src/spellcheck/spell-checker';
import { SymSpellEngine } from '../src/spellcheck/engines/symspell-engine';
import type { SpellEngine } from '../src/spellcheck/engines/types';
// --- Constants ---
const MAX_CLI_INPUT_LENGTH = 1_000_000;
// --- Node.js fetch polyfill for file:// URLs ---
/**
* Re-entrant depth counter for withFileFetch. Only the outermost call
* saves/restores globalThis.fetch nested calls just bump the counter.
*/
let fileFetchDepth = 0;
let originalFetch: typeof globalThis.fetch | null = null;
/**
* Temporarily patch globalThis.fetch to handle file:// URLs.
* The SpellCheckerWasm.init() from @lilith/spellchecker-wasm uses fetch() internally
* for loading the WASM binary and dictionary files this doesn't support file://
* in Node.js. We intercept file:// requests and serve them from disk.
*
* Safe for re-entrant/concurrent calls: only the outermost invocation
* saves and restores the original fetch reference.
*/
async function withFileFetch<T>(fn: () => Promise<T>): Promise<T> {
const originalFetch = globalThis.fetch;
if (fileFetchDepth === 0) {
originalFetch = globalThis.fetch;
globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
const url = typeof input === 'string'
? input
: input instanceof URL
? input.href
: input.url;
globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
const url = typeof input === 'string'
? input
: input instanceof URL
? input.href
: input.url;
if (url.startsWith('file://')) {
const filePath = new URL(url).pathname;
const buffer = await readFile(filePath);
const headers: Record<string, string> = {};
if (filePath.endsWith('.wasm')) {
headers['Content-Type'] = 'application/wasm';
if (url.startsWith('file://')) {
const filePath = new URL(url).pathname;
const buffer = await readFile(filePath);
const headers: Record<string, string> = {};
if (filePath.endsWith('.wasm')) {
headers['Content-Type'] = 'application/wasm';
}
return new Response(buffer, { headers });
}
return new Response(buffer, { headers });
}
return originalFetch(input, init);
}) as typeof fetch;
return originalFetch!(input, init);
}) as typeof fetch;
}
fileFetchDepth++;
try {
return await fn();
} finally {
globalThis.fetch = originalFetch;
fileFetchDepth--;
if (fileFetchDepth === 0 && originalFetch) {
globalThis.fetch = originalFetch;
originalFetch = null;
}
}
}
@ -91,19 +113,20 @@ async function resolveWasmPaths(dataDir?: string): Promise<{
// --- Create WASM engine for Node.js ---
async function createWasmEngine(dataDir?: string): Promise<SpellEngine> {
async function createWasmEngine(dataDir?: string, verbose = false): Promise<SpellEngine> {
const paths = await resolveWasmPaths(dataDir);
// Verify files exist
for (const [label, path] of [
for (const [label, filePath] of [
['WASM binary', paths.wasmPath],
['Dictionary', paths.dictionaryPath],
] as const) {
try {
await readFile(path, { flag: 'r' });
await readFile(filePath, { flag: 'r' });
} catch {
const displayPath = verbose ? filePath : basename(filePath);
throw new Error(
`${label} not found at ${path}. Use --data-dir to specify the spellcheck-data directory.`,
`${label} not found at ${displayPath}. Use --data-dir to specify the spellcheck-data directory.`,
);
}
}
@ -169,10 +192,17 @@ Examples:
}
}
// If no text provided as args, read from stdin
// If no text provided as args, read from stdin (with byte limit)
if (!text) {
const chunks: Buffer[] = [];
let totalBytes = 0;
for await (const chunk of process.stdin) {
totalBytes += chunk.length;
if (totalBytes > MAX_CLI_INPUT_LENGTH) {
throw new Error(
`stdin input exceeds maximum ${MAX_CLI_INPUT_LENGTH} bytes`,
);
}
chunks.push(chunk);
}
text = Buffer.concat(chunks).toString();
@ -189,7 +219,7 @@ Examples:
if (useWasm) {
if (verbose) console.error('[init] Loading WASM engine...');
const startTime = performance.now();
engine = await createWasmEngine(dataDir);
engine = await createWasmEngine(dataDir, verbose);
if (verbose) {
console.error(`[init] WASM engine ready (${Math.round(performance.now() - startTime)}ms)`);
}
@ -285,7 +315,6 @@ function describeNormalization(word: string): string | null {
* Simple edit distance label for display.
*/
function distanceLabel(original: string, suggestion: string): string {
let dist = 0;
const a = original.toLowerCase();
const b = suggestion.toLowerCase();
@ -307,9 +336,8 @@ function distanceLabel(original: string, suggestion: string): string {
}
}
}
dist = matrix[a.length][b.length];
return String(dist);
return String(matrix[a.length][b.length]);
}
main().catch((error) => {

View file

@ -1,15 +1,16 @@
import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
import type { DictionaryDataLoader } from '../core/dictionary-loader';
export class FetchDictionaryLoader implements DictionaryDataLoader {
private readonly baseUrl: string;
private readonly baseUrl: URL;
constructor(baseUrl: string) {
// Strip trailing slash for consistent path joining
this.baseUrl = baseUrl.replace(/\/+$/, '');
// Ensure trailing slash for correct URL resolution
const normalized = baseUrl.endsWith('/') ? baseUrl : baseUrl + '/';
this.baseUrl = new URL(normalized);
}
async loadText(filePath: string): Promise<string> {
const url = `${this.baseUrl}/${filePath}`;
const url = this.resolveUrl(filePath);
const response = await fetch(url);
if (!response.ok) {
@ -20,9 +21,8 @@ export class FetchDictionaryLoader implements DictionaryDataLoader {
}
async exists(filePath: string): Promise<boolean> {
const url = `${this.baseUrl}/${filePath}`;
try {
const url = this.resolveUrl(filePath);
const response = await fetch(url, { method: 'HEAD' });
return response.ok;
@ -30,4 +30,31 @@ export class FetchDictionaryLoader implements DictionaryDataLoader {
return false;
}
}
/**
* Safely resolve a file path against the base URL, rejecting injection attempts.
* Throws if the path contains traversal sequences or escapes the base origin+path.
*/
private resolveUrl(filePath: string): string {
if (filePath.startsWith('/')) {
throw new Error(`Absolute paths are not allowed: ${filePath}`);
}
if (filePath.includes('..')) {
throw new Error(`Path traversal detected: ${filePath}`);
}
const resolved = new URL(filePath, this.baseUrl);
// Verify the resolved URL stays within the base origin and path prefix
if (resolved.origin !== this.baseUrl.origin) {
throw new Error(`URL injection detected: resolved to different origin`);
}
if (!resolved.pathname.startsWith(this.baseUrl.pathname)) {
throw new Error(`URL injection detected: ${filePath}`);
}
return resolved.href;
}
}

View file

@ -1,23 +1,46 @@
import * as fs from 'fs';
import * as path from 'path';
import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
import type { DictionaryDataLoader } from '../core/dictionary-loader';
export class NodeDictionaryLoader implements DictionaryDataLoader {
private readonly rootPath: string;
constructor(rootPath: string) {
this.rootPath = rootPath;
this.rootPath = path.resolve(rootPath);
}
async loadText(filePath: string): Promise<string> {
const fullPath = `${this.rootPath}/${filePath}`;
const fullPath = this.resolveSafe(filePath);
return fs.readFileSync(fullPath, 'utf-8');
}
async exists(filePath: string): Promise<boolean> {
const fullPath = `${this.rootPath}/${filePath}`;
try {
const fullPath = this.resolveSafe(filePath);
return fs.existsSync(fullPath);
return fs.existsSync(fullPath);
} catch {
return false;
}
}
/**
* Resolve a file path within the root directory, rejecting traversal attempts.
* Throws if the resolved path escapes the root directory.
*/
private resolveSafe(filePath: string): string {
if (path.isAbsolute(filePath)) {
throw new Error(`Absolute paths are not allowed: ${filePath}`);
}
const resolved = path.resolve(this.rootPath, filePath);
if (!resolved.startsWith(this.rootPath + path.sep) && resolved !== this.rootPath) {
throw new Error(`Path traversal detected: ${filePath}`);
}
return resolved;
}
}

View file

@ -14,7 +14,15 @@ export class SymSpellEngine implements SpellEngine {
private readonly maxEditDistance: number;
constructor(private readonly options: SymSpellEngineOptions) {
this.maxEditDistance = options.maxEditDistance ?? 2;
const distance = options.maxEditDistance ?? 2;
if (!Number.isInteger(distance) || distance < 0 || distance > 5) {
throw new Error(
`maxEditDistance must be an integer between 0 and 5, got ${distance}`,
);
}
this.maxEditDistance = distance;
}
async init(): Promise<void> {

View file

@ -41,6 +41,7 @@ export class SpellChecker {
enableSplitWordDetection: true,
enableJoinedWordDetection: true,
enableAggressiveNormalization: true,
maxInputLength: 100_000,
confidenceThresholds: {
autoFix: 0.7,
suggest: 0.5,
@ -203,6 +204,16 @@ export class SpellChecker {
return corrections;
}
private validateInputLength(input: string, method: string): void {
const maxLength = this.options.maxInputLength ?? 100_000;
if (input.length > maxLength) {
throw new Error(
`${method}: input length ${input.length} exceeds maximum ${maxLength}`,
);
}
}
async check(word: string): Promise<SpellCheckResult> {
// Input validation
if (!word || typeof word !== 'string') {
@ -214,6 +225,8 @@ export class SpellChecker {
};
}
this.validateInputLength(word, 'check');
// Handle whitespace-only input
const trimmed = word.trim();
if (!trimmed) {
@ -317,6 +330,8 @@ export class SpellChecker {
return text || '';
}
this.validateInputLength(text, 'fix');
if (!this.initialized) {
await this.initialize();
}
@ -406,6 +421,10 @@ export class SpellChecker {
}
async checkText(text: string): Promise<BatchSpellCheckResult> {
if (text) {
this.validateInputLength(text, 'checkText');
}
if (!this.initialized) {
await this.initialize();
}

View file

@ -0,0 +1,50 @@
import { describe, it, expect } from 'vitest';
import { FetchDictionaryLoader } from '../dictionaries/loaders/fetch-loader';
describe('FetchDictionaryLoader — URL injection protection', () => {
const loader = new FetchDictionaryLoader('https://cdn.example.com/dictionaries');
it('rejects ../ path traversal', async () => {
await expect(loader.loadText('../../admin/secret')).rejects.toThrow('Path traversal detected');
});
it('rejects paths with embedded ..', async () => {
await expect(loader.loadText('foo/../../../etc/passwd')).rejects.toThrow(
'Path traversal detected',
);
});
it('rejects absolute paths starting with /', async () => {
await expect(loader.loadText('/admin/secret')).rejects.toThrow('Absolute paths are not allowed');
});
it('exists() returns false for traversal attempts instead of throwing', async () => {
const result = await loader.exists('../../admin/secret');
expect(result).toBe(false);
});
it('exists() returns false for absolute paths', async () => {
const result = await loader.exists('/admin/secret');
expect(result).toBe(false);
});
it('resolves normal relative paths correctly', () => {
const loader2 = new FetchDictionaryLoader('https://cdn.example.com/dictionaries');
// loadText will fail on fetch (no server), but should NOT throw injection errors
// We test the URL resolution by expecting a fetch error, not a security error
expect(loader2.loadText('english/words.txt')).rejects.toThrow();
});
it('handles base URL with trailing slash consistently', () => {
const loaderWithSlash = new FetchDictionaryLoader('https://cdn.example.com/dict/');
const loaderNoSlash = new FetchDictionaryLoader('https://cdn.example.com/dict');
// Both should reject traversal identically
expect(loaderWithSlash.loadText('../../secret')).rejects.toThrow('Path traversal detected');
expect(loaderNoSlash.loadText('../../secret')).rejects.toThrow('Path traversal detected');
});
});

View file

@ -0,0 +1,82 @@
import { describe, it, expect } from 'vitest';
import { SpellChecker } from '../spell-checker';
import type { SpellEngine, SpellSuggestion } from '../engines/types';
/** Minimal mock engine for testing input validation (no real dictionary needed) */
class StubEngine implements SpellEngine {
isReady(): boolean { return true; }
contains(): boolean { return true; }
suggest(): SpellSuggestion[] { return []; }
addWord(): void { /* noop */ }
}
describe('SpellChecker — input length limits', () => {
it('rejects input exceeding maxInputLength in check()', async () => {
const checker = new SpellChecker({
engine: new StubEngine(),
maxInputLength: 100,
});
const longInput = 'a'.repeat(101);
await expect(checker.check(longInput)).rejects.toThrow(
'check: input length 101 exceeds maximum 100',
);
});
it('rejects input exceeding maxInputLength in fix()', async () => {
const checker = new SpellChecker({
engine: new StubEngine(),
maxInputLength: 100,
autoCorrect: true,
});
const longInput = 'a'.repeat(101);
await expect(checker.fix(longInput)).rejects.toThrow(
'fix: input length 101 exceeds maximum 100',
);
});
it('rejects input exceeding maxInputLength in checkText()', async () => {
const checker = new SpellChecker({
engine: new StubEngine(),
maxInputLength: 100,
});
const longInput = 'a'.repeat(101);
await expect(checker.checkText(longInput)).rejects.toThrow(
'checkText: input length 101 exceeds maximum 100',
);
});
it('accepts input within maxInputLength', async () => {
const checker = new SpellChecker({
engine: new StubEngine(),
maxInputLength: 100,
});
const result = await checker.check('hello');
expect(result.correct).toBe(true);
});
it('uses default maxInputLength of 100_000 when not specified', async () => {
const checker = new SpellChecker({
engine: new StubEngine(),
});
// 100K chars should be fine
const withinLimit = 'a'.repeat(100_000);
const result = await checker.check(withinLimit);
expect(result.correct).toBe(true);
// 100K + 1 should fail
const overLimit = 'a'.repeat(100_001);
await expect(checker.check(overLimit)).rejects.toThrow('exceeds maximum 100000');
});
});

View file

@ -0,0 +1,62 @@
import { describe, it, expect } from 'vitest';
import * as path from 'path';
import { NodeDictionaryLoader } from '../dictionaries/loaders/node-loader';
describe('NodeDictionaryLoader — path traversal protection', () => {
const loader = new NodeDictionaryLoader('/tmp/test-dictionaries');
it('rejects ../etc/passwd traversal', async () => {
await expect(loader.loadText('../../etc/passwd')).rejects.toThrow('Path traversal detected');
});
it('rejects deeper traversal', async () => {
await expect(loader.loadText('../../../etc/shadow')).rejects.toThrow('Path traversal detected');
});
it('rejects absolute paths', async () => {
await expect(loader.loadText('/etc/passwd')).rejects.toThrow('Absolute paths are not allowed');
});
it('exists() returns false for traversal attempts instead of throwing', async () => {
const result = await loader.exists('../../etc/passwd');
expect(result).toBe(false);
});
it('exists() returns false for absolute paths', async () => {
const result = await loader.exists('/etc/passwd');
expect(result).toBe(false);
});
it('prevents prefix attack (rootPath as prefix of different directory)', () => {
// e.g. rootPath = /tmp/test-dictionaries, attacker tries /tmp/test-dictionaries-evil/secret
const loader2 = new NodeDictionaryLoader('/tmp/test-dictionaries');
// This path resolves to /tmp/test-dictionaries-evil/secret which starts with
// /tmp/test-dictionaries but NOT /tmp/test-dictionaries/
expect(
loader2.loadText('../test-dictionaries-evil/secret'),
).rejects.toThrow('Path traversal detected');
});
it('allows valid relative paths', async () => {
// This won't find the file, but it should NOT throw a traversal error
const loader3 = new NodeDictionaryLoader('/tmp/test-dictionaries');
// exists() returns false for nonexistent files (not a traversal)
const result = await loader3.exists('english/words.txt');
expect(result).toBe(false);
});
it('normalizes rootPath via path.resolve in constructor', () => {
const loader4 = new NodeDictionaryLoader('/tmp/test/../test-dictionaries');
// Should resolve to /tmp/test-dictionaries, so traversal to /tmp/ is blocked
expect(
loader4.loadText('../../etc/passwd'),
).rejects.toThrow('Path traversal detected');
});
});

View file

@ -40,6 +40,7 @@ export interface SpellCheckOptions {
enableAggressiveNormalization?: boolean;
loader?: DictionaryDataLoader;
engine?: SpellEngine;
maxInputLength?: number;
}
export interface DictionaryConfig {

View file

@ -28,6 +28,7 @@ export class LRUCache<K, V> {
this.hits++;
// Update access order
this.accessOrder.set(key, this.accessCounter++);
this.compactIfNeeded();
return this.cache.get(key);
}
@ -45,6 +46,7 @@ export class LRUCache<K, V> {
if (this.cache.has(key)) {
this.cache.set(key, value);
this.accessOrder.set(key, this.accessCounter++);
this.compactIfNeeded();
return;
}
@ -57,6 +59,7 @@ export class LRUCache<K, V> {
// Add new item
this.cache.set(key, value);
this.accessOrder.set(key, this.accessCounter++);
this.compactIfNeeded();
}
/**
@ -158,6 +161,26 @@ export class LRUCache<K, V> {
}
}
/**
* Re-index access order when the counter approaches MAX_SAFE_INTEGER.
* Sorts entries by current access time and re-assigns sequential indices
* starting from 0, preserving relative order.
*/
private compactIfNeeded(): void {
if (this.accessCounter < Number.MAX_SAFE_INTEGER - 1_000_000) {
return;
}
const entries = Array.from(this.accessOrder.entries()).sort(
(a, b) => a[1] - b[1],
);
this.accessCounter = 0;
for (const [key] of entries) {
this.accessOrder.set(key, this.accessCounter++);
}
}
/**
* Evict least recently used item
*/