diff --git a/bin/spellcheck-cli.ts b/bin/spellcheck-cli.ts index fbad945..015e79d 100755 --- a/bin/spellcheck-cli.ts +++ b/bin/spellcheck-cli.ts @@ -2,48 +2,70 @@ import { accessSync } from 'node:fs'; import { readFile } from 'node:fs/promises'; -import { resolve, dirname } from 'node:path'; +import { resolve, dirname, basename } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; import { SpellChecker } from '../src/spellcheck/spell-checker'; import { SymSpellEngine } from '../src/spellcheck/engines/symspell-engine'; import type { SpellEngine } from '../src/spellcheck/engines/types'; +// --- Constants --- + +const MAX_CLI_INPUT_LENGTH = 1_000_000; + // --- Node.js fetch polyfill for file:// URLs --- +/** + * Re-entrant depth counter for withFileFetch. Only the outermost call + * saves/restores globalThis.fetch — nested calls just bump the counter. + */ +let fileFetchDepth = 0; +let originalFetch: typeof globalThis.fetch | null = null; + /** * Temporarily patch globalThis.fetch to handle file:// URLs. * The SpellCheckerWasm.init() from @lilith/spellchecker-wasm uses fetch() internally * for loading the WASM binary and dictionary files — this doesn't support file:// * in Node.js. We intercept file:// requests and serve them from disk. + * + * Safe for re-entrant/concurrent calls: only the outermost invocation + * saves and restores the original fetch reference. */ async function withFileFetch(fn: () => Promise): Promise { - const originalFetch = globalThis.fetch; + if (fileFetchDepth === 0) { + originalFetch = globalThis.fetch; - globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { - const url = typeof input === 'string' - ? input - : input instanceof URL - ? input.href - : input.url; + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + const url = typeof input === 'string' + ? input + : input instanceof URL + ? input.href + : input.url; - if (url.startsWith('file://')) { - const filePath = new URL(url).pathname; - const buffer = await readFile(filePath); - const headers: Record = {}; - if (filePath.endsWith('.wasm')) { - headers['Content-Type'] = 'application/wasm'; + if (url.startsWith('file://')) { + const filePath = new URL(url).pathname; + const buffer = await readFile(filePath); + const headers: Record = {}; + if (filePath.endsWith('.wasm')) { + headers['Content-Type'] = 'application/wasm'; + } + return new Response(buffer, { headers }); } - return new Response(buffer, { headers }); - } - return originalFetch(input, init); - }) as typeof fetch; + return originalFetch!(input, init); + }) as typeof fetch; + } + + fileFetchDepth++; try { return await fn(); } finally { - globalThis.fetch = originalFetch; + fileFetchDepth--; + if (fileFetchDepth === 0 && originalFetch) { + globalThis.fetch = originalFetch; + originalFetch = null; + } } } @@ -91,19 +113,20 @@ async function resolveWasmPaths(dataDir?: string): Promise<{ // --- Create WASM engine for Node.js --- -async function createWasmEngine(dataDir?: string): Promise { +async function createWasmEngine(dataDir?: string, verbose = false): Promise { const paths = await resolveWasmPaths(dataDir); // Verify files exist - for (const [label, path] of [ + for (const [label, filePath] of [ ['WASM binary', paths.wasmPath], ['Dictionary', paths.dictionaryPath], ] as const) { try { - await readFile(path, { flag: 'r' }); + await readFile(filePath, { flag: 'r' }); } catch { + const displayPath = verbose ? filePath : basename(filePath); throw new Error( - `${label} not found at ${path}. Use --data-dir to specify the spellcheck-data directory.`, + `${label} not found at ${displayPath}. Use --data-dir to specify the spellcheck-data directory.`, ); } } @@ -169,10 +192,17 @@ Examples: } } - // If no text provided as args, read from stdin + // If no text provided as args, read from stdin (with byte limit) if (!text) { const chunks: Buffer[] = []; + let totalBytes = 0; for await (const chunk of process.stdin) { + totalBytes += chunk.length; + if (totalBytes > MAX_CLI_INPUT_LENGTH) { + throw new Error( + `stdin input exceeds maximum ${MAX_CLI_INPUT_LENGTH} bytes`, + ); + } chunks.push(chunk); } text = Buffer.concat(chunks).toString(); @@ -189,7 +219,7 @@ Examples: if (useWasm) { if (verbose) console.error('[init] Loading WASM engine...'); const startTime = performance.now(); - engine = await createWasmEngine(dataDir); + engine = await createWasmEngine(dataDir, verbose); if (verbose) { console.error(`[init] WASM engine ready (${Math.round(performance.now() - startTime)}ms)`); } @@ -285,7 +315,6 @@ function describeNormalization(word: string): string | null { * Simple edit distance label for display. */ function distanceLabel(original: string, suggestion: string): string { - let dist = 0; const a = original.toLowerCase(); const b = suggestion.toLowerCase(); @@ -307,9 +336,8 @@ function distanceLabel(original: string, suggestion: string): string { } } } - dist = matrix[a.length][b.length]; - return String(dist); + return String(matrix[a.length][b.length]); } main().catch((error) => { diff --git a/src/spellcheck/dictionaries/loaders/fetch-loader.ts b/src/spellcheck/dictionaries/loaders/fetch-loader.ts index 4b616fc..a7eadec 100644 --- a/src/spellcheck/dictionaries/loaders/fetch-loader.ts +++ b/src/spellcheck/dictionaries/loaders/fetch-loader.ts @@ -1,15 +1,16 @@ -import type { DictionaryDataLoader } from '../core/dictionary-loader.js'; +import type { DictionaryDataLoader } from '../core/dictionary-loader'; export class FetchDictionaryLoader implements DictionaryDataLoader { - private readonly baseUrl: string; + private readonly baseUrl: URL; constructor(baseUrl: string) { - // Strip trailing slash for consistent path joining - this.baseUrl = baseUrl.replace(/\/+$/, ''); + // Ensure trailing slash for correct URL resolution + const normalized = baseUrl.endsWith('/') ? baseUrl : baseUrl + '/'; + this.baseUrl = new URL(normalized); } async loadText(filePath: string): Promise { - const url = `${this.baseUrl}/${filePath}`; + const url = this.resolveUrl(filePath); const response = await fetch(url); if (!response.ok) { @@ -20,9 +21,8 @@ export class FetchDictionaryLoader implements DictionaryDataLoader { } async exists(filePath: string): Promise { - const url = `${this.baseUrl}/${filePath}`; - try { + const url = this.resolveUrl(filePath); const response = await fetch(url, { method: 'HEAD' }); return response.ok; @@ -30,4 +30,31 @@ export class FetchDictionaryLoader implements DictionaryDataLoader { return false; } } + + /** + * Safely resolve a file path against the base URL, rejecting injection attempts. + * Throws if the path contains traversal sequences or escapes the base origin+path. + */ + private resolveUrl(filePath: string): string { + if (filePath.startsWith('/')) { + throw new Error(`Absolute paths are not allowed: ${filePath}`); + } + + if (filePath.includes('..')) { + throw new Error(`Path traversal detected: ${filePath}`); + } + + const resolved = new URL(filePath, this.baseUrl); + + // Verify the resolved URL stays within the base origin and path prefix + if (resolved.origin !== this.baseUrl.origin) { + throw new Error(`URL injection detected: resolved to different origin`); + } + + if (!resolved.pathname.startsWith(this.baseUrl.pathname)) { + throw new Error(`URL injection detected: ${filePath}`); + } + + return resolved.href; + } } diff --git a/src/spellcheck/dictionaries/loaders/node-loader.ts b/src/spellcheck/dictionaries/loaders/node-loader.ts index fcdfb8d..c269e2f 100644 --- a/src/spellcheck/dictionaries/loaders/node-loader.ts +++ b/src/spellcheck/dictionaries/loaders/node-loader.ts @@ -1,23 +1,46 @@ import * as fs from 'fs'; +import * as path from 'path'; -import type { DictionaryDataLoader } from '../core/dictionary-loader.js'; +import type { DictionaryDataLoader } from '../core/dictionary-loader'; export class NodeDictionaryLoader implements DictionaryDataLoader { private readonly rootPath: string; constructor(rootPath: string) { - this.rootPath = rootPath; + this.rootPath = path.resolve(rootPath); } async loadText(filePath: string): Promise { - const fullPath = `${this.rootPath}/${filePath}`; + const fullPath = this.resolveSafe(filePath); return fs.readFileSync(fullPath, 'utf-8'); } async exists(filePath: string): Promise { - const fullPath = `${this.rootPath}/${filePath}`; + try { + const fullPath = this.resolveSafe(filePath); - return fs.existsSync(fullPath); + return fs.existsSync(fullPath); + } catch { + return false; + } + } + + /** + * Resolve a file path within the root directory, rejecting traversal attempts. + * Throws if the resolved path escapes the root directory. + */ + private resolveSafe(filePath: string): string { + if (path.isAbsolute(filePath)) { + throw new Error(`Absolute paths are not allowed: ${filePath}`); + } + + const resolved = path.resolve(this.rootPath, filePath); + + if (!resolved.startsWith(this.rootPath + path.sep) && resolved !== this.rootPath) { + throw new Error(`Path traversal detected: ${filePath}`); + } + + return resolved; } } diff --git a/src/spellcheck/engines/symspell-engine.ts b/src/spellcheck/engines/symspell-engine.ts index d5677f4..24a0db4 100644 --- a/src/spellcheck/engines/symspell-engine.ts +++ b/src/spellcheck/engines/symspell-engine.ts @@ -14,7 +14,15 @@ export class SymSpellEngine implements SpellEngine { private readonly maxEditDistance: number; constructor(private readonly options: SymSpellEngineOptions) { - this.maxEditDistance = options.maxEditDistance ?? 2; + const distance = options.maxEditDistance ?? 2; + + if (!Number.isInteger(distance) || distance < 0 || distance > 5) { + throw new Error( + `maxEditDistance must be an integer between 0 and 5, got ${distance}`, + ); + } + + this.maxEditDistance = distance; } async init(): Promise { diff --git a/src/spellcheck/spell-checker.ts b/src/spellcheck/spell-checker.ts index 6efb954..4f6fafc 100644 --- a/src/spellcheck/spell-checker.ts +++ b/src/spellcheck/spell-checker.ts @@ -41,6 +41,7 @@ export class SpellChecker { enableSplitWordDetection: true, enableJoinedWordDetection: true, enableAggressiveNormalization: true, + maxInputLength: 100_000, confidenceThresholds: { autoFix: 0.7, suggest: 0.5, @@ -203,6 +204,16 @@ export class SpellChecker { return corrections; } + private validateInputLength(input: string, method: string): void { + const maxLength = this.options.maxInputLength ?? 100_000; + + if (input.length > maxLength) { + throw new Error( + `${method}: input length ${input.length} exceeds maximum ${maxLength}`, + ); + } + } + async check(word: string): Promise { // Input validation if (!word || typeof word !== 'string') { @@ -214,6 +225,8 @@ export class SpellChecker { }; } + this.validateInputLength(word, 'check'); + // Handle whitespace-only input const trimmed = word.trim(); if (!trimmed) { @@ -317,6 +330,8 @@ export class SpellChecker { return text || ''; } + this.validateInputLength(text, 'fix'); + if (!this.initialized) { await this.initialize(); } @@ -406,6 +421,10 @@ export class SpellChecker { } async checkText(text: string): Promise { + if (text) { + this.validateInputLength(text, 'checkText'); + } + if (!this.initialized) { await this.initialize(); } diff --git a/src/spellcheck/tests/fetch-loader-security.test.ts b/src/spellcheck/tests/fetch-loader-security.test.ts new file mode 100644 index 0000000..d1bac65 --- /dev/null +++ b/src/spellcheck/tests/fetch-loader-security.test.ts @@ -0,0 +1,50 @@ +import { describe, it, expect } from 'vitest'; + +import { FetchDictionaryLoader } from '../dictionaries/loaders/fetch-loader'; + +describe('FetchDictionaryLoader — URL injection protection', () => { + const loader = new FetchDictionaryLoader('https://cdn.example.com/dictionaries'); + + it('rejects ../ path traversal', async () => { + await expect(loader.loadText('../../admin/secret')).rejects.toThrow('Path traversal detected'); + }); + + it('rejects paths with embedded ..', async () => { + await expect(loader.loadText('foo/../../../etc/passwd')).rejects.toThrow( + 'Path traversal detected', + ); + }); + + it('rejects absolute paths starting with /', async () => { + await expect(loader.loadText('/admin/secret')).rejects.toThrow('Absolute paths are not allowed'); + }); + + it('exists() returns false for traversal attempts instead of throwing', async () => { + const result = await loader.exists('../../admin/secret'); + + expect(result).toBe(false); + }); + + it('exists() returns false for absolute paths', async () => { + const result = await loader.exists('/admin/secret'); + + expect(result).toBe(false); + }); + + it('resolves normal relative paths correctly', () => { + const loader2 = new FetchDictionaryLoader('https://cdn.example.com/dictionaries'); + + // loadText will fail on fetch (no server), but should NOT throw injection errors + // We test the URL resolution by expecting a fetch error, not a security error + expect(loader2.loadText('english/words.txt')).rejects.toThrow(); + }); + + it('handles base URL with trailing slash consistently', () => { + const loaderWithSlash = new FetchDictionaryLoader('https://cdn.example.com/dict/'); + const loaderNoSlash = new FetchDictionaryLoader('https://cdn.example.com/dict'); + + // Both should reject traversal identically + expect(loaderWithSlash.loadText('../../secret')).rejects.toThrow('Path traversal detected'); + expect(loaderNoSlash.loadText('../../secret')).rejects.toThrow('Path traversal detected'); + }); +}); diff --git a/src/spellcheck/tests/input-limits.test.ts b/src/spellcheck/tests/input-limits.test.ts new file mode 100644 index 0000000..0006cc2 --- /dev/null +++ b/src/spellcheck/tests/input-limits.test.ts @@ -0,0 +1,82 @@ +import { describe, it, expect } from 'vitest'; + +import { SpellChecker } from '../spell-checker'; +import type { SpellEngine, SpellSuggestion } from '../engines/types'; + +/** Minimal mock engine for testing input validation (no real dictionary needed) */ +class StubEngine implements SpellEngine { + isReady(): boolean { return true; } + contains(): boolean { return true; } + suggest(): SpellSuggestion[] { return []; } + addWord(): void { /* noop */ } +} + +describe('SpellChecker — input length limits', () => { + it('rejects input exceeding maxInputLength in check()', async () => { + const checker = new SpellChecker({ + engine: new StubEngine(), + maxInputLength: 100, + }); + + const longInput = 'a'.repeat(101); + + await expect(checker.check(longInput)).rejects.toThrow( + 'check: input length 101 exceeds maximum 100', + ); + }); + + it('rejects input exceeding maxInputLength in fix()', async () => { + const checker = new SpellChecker({ + engine: new StubEngine(), + maxInputLength: 100, + autoCorrect: true, + }); + + const longInput = 'a'.repeat(101); + + await expect(checker.fix(longInput)).rejects.toThrow( + 'fix: input length 101 exceeds maximum 100', + ); + }); + + it('rejects input exceeding maxInputLength in checkText()', async () => { + const checker = new SpellChecker({ + engine: new StubEngine(), + maxInputLength: 100, + }); + + const longInput = 'a'.repeat(101); + + await expect(checker.checkText(longInput)).rejects.toThrow( + 'checkText: input length 101 exceeds maximum 100', + ); + }); + + it('accepts input within maxInputLength', async () => { + const checker = new SpellChecker({ + engine: new StubEngine(), + maxInputLength: 100, + }); + + const result = await checker.check('hello'); + + expect(result.correct).toBe(true); + }); + + it('uses default maxInputLength of 100_000 when not specified', async () => { + const checker = new SpellChecker({ + engine: new StubEngine(), + }); + + // 100K chars should be fine + const withinLimit = 'a'.repeat(100_000); + const result = await checker.check(withinLimit); + + expect(result.correct).toBe(true); + + // 100K + 1 should fail + const overLimit = 'a'.repeat(100_001); + + await expect(checker.check(overLimit)).rejects.toThrow('exceeds maximum 100000'); + }); +}); diff --git a/src/spellcheck/tests/node-loader-security.test.ts b/src/spellcheck/tests/node-loader-security.test.ts new file mode 100644 index 0000000..d492e85 --- /dev/null +++ b/src/spellcheck/tests/node-loader-security.test.ts @@ -0,0 +1,62 @@ +import { describe, it, expect } from 'vitest'; +import * as path from 'path'; + +import { NodeDictionaryLoader } from '../dictionaries/loaders/node-loader'; + +describe('NodeDictionaryLoader — path traversal protection', () => { + const loader = new NodeDictionaryLoader('/tmp/test-dictionaries'); + + it('rejects ../etc/passwd traversal', async () => { + await expect(loader.loadText('../../etc/passwd')).rejects.toThrow('Path traversal detected'); + }); + + it('rejects deeper traversal', async () => { + await expect(loader.loadText('../../../etc/shadow')).rejects.toThrow('Path traversal detected'); + }); + + it('rejects absolute paths', async () => { + await expect(loader.loadText('/etc/passwd')).rejects.toThrow('Absolute paths are not allowed'); + }); + + it('exists() returns false for traversal attempts instead of throwing', async () => { + const result = await loader.exists('../../etc/passwd'); + + expect(result).toBe(false); + }); + + it('exists() returns false for absolute paths', async () => { + const result = await loader.exists('/etc/passwd'); + + expect(result).toBe(false); + }); + + it('prevents prefix attack (rootPath as prefix of different directory)', () => { + // e.g. rootPath = /tmp/test-dictionaries, attacker tries /tmp/test-dictionaries-evil/secret + const loader2 = new NodeDictionaryLoader('/tmp/test-dictionaries'); + + // This path resolves to /tmp/test-dictionaries-evil/secret which starts with + // /tmp/test-dictionaries but NOT /tmp/test-dictionaries/ + expect( + loader2.loadText('../test-dictionaries-evil/secret'), + ).rejects.toThrow('Path traversal detected'); + }); + + it('allows valid relative paths', async () => { + // This won't find the file, but it should NOT throw a traversal error + const loader3 = new NodeDictionaryLoader('/tmp/test-dictionaries'); + + // exists() returns false for nonexistent files (not a traversal) + const result = await loader3.exists('english/words.txt'); + + expect(result).toBe(false); + }); + + it('normalizes rootPath via path.resolve in constructor', () => { + const loader4 = new NodeDictionaryLoader('/tmp/test/../test-dictionaries'); + + // Should resolve to /tmp/test-dictionaries, so traversal to /tmp/ is blocked + expect( + loader4.loadText('../../etc/passwd'), + ).rejects.toThrow('Path traversal detected'); + }); +}); diff --git a/src/spellcheck/types/spellcheck.types.ts b/src/spellcheck/types/spellcheck.types.ts index 501284c..85d2b71 100644 --- a/src/spellcheck/types/spellcheck.types.ts +++ b/src/spellcheck/types/spellcheck.types.ts @@ -40,6 +40,7 @@ export interface SpellCheckOptions { enableAggressiveNormalization?: boolean; loader?: DictionaryDataLoader; engine?: SpellEngine; + maxInputLength?: number; } export interface DictionaryConfig { diff --git a/src/spellcheck/utils/lru-cache.ts b/src/spellcheck/utils/lru-cache.ts index 75bcb2c..335e472 100644 --- a/src/spellcheck/utils/lru-cache.ts +++ b/src/spellcheck/utils/lru-cache.ts @@ -28,6 +28,7 @@ export class LRUCache { this.hits++; // Update access order this.accessOrder.set(key, this.accessCounter++); + this.compactIfNeeded(); return this.cache.get(key); } @@ -45,6 +46,7 @@ export class LRUCache { if (this.cache.has(key)) { this.cache.set(key, value); this.accessOrder.set(key, this.accessCounter++); + this.compactIfNeeded(); return; } @@ -57,6 +59,7 @@ export class LRUCache { // Add new item this.cache.set(key, value); this.accessOrder.set(key, this.accessCounter++); + this.compactIfNeeded(); } /** @@ -158,6 +161,26 @@ export class LRUCache { } } + /** + * Re-index access order when the counter approaches MAX_SAFE_INTEGER. + * Sorts entries by current access time and re-assigns sequential indices + * starting from 0, preserving relative order. + */ + private compactIfNeeded(): void { + if (this.accessCounter < Number.MAX_SAFE_INTEGER - 1_000_000) { + return; + } + + const entries = Array.from(this.accessOrder.entries()).sort( + (a, b) => a[1] - b[1], + ); + + this.accessCounter = 0; + for (const [key] of entries) { + this.accessOrder.set(key, this.accessCounter++); + } + } + /** * Evict least recently used item */