diff --git a/src/cache/regex-cache.ts b/src/cache/regex-cache.ts index 7431c05..3063b0c 100644 --- a/src/cache/regex-cache.ts +++ b/src/cache/regex-cache.ts @@ -1,12 +1,23 @@ +import { LIMITS } from '../constants/limits'; + /** - * Singleton cache for compiled RegExp objects - * Avoids repeated compilation of the same patterns + * Singleton LRU cache for compiled RegExp objects. + * Avoids repeated compilation of the same patterns. + * + * Bounded to LIMITS.MAX_REGEX_CACHE entries. When full, evicts + * the least-recently-used entry before inserting a new one. + * + * Uses Map insertion order for LRU tracking — accessing an entry + * deletes and re-inserts it, pushing it to the end. */ export class RegexCache { private static instance: RegexCache; private readonly cache = new Map(); + private readonly maxSize: number; - private constructor() {} + private constructor(maxSize: number = LIMITS.MAX_REGEX_CACHE) { + this.maxSize = maxSize; + } static getInstance(): RegexCache { if (!RegexCache.instance) { @@ -17,27 +28,42 @@ export class RegexCache { } /** - * Get a cached RegExp or compile and cache a new one + * Get a cached RegExp or compile and cache a new one. + * Promotes accessed entries to most-recently-used. */ get(pattern: string, flags?: string): RegExp { const key = `${pattern}::${flags || ''}`; - if (!this.cache.has(key)) { - this.cache.set(key, new RegExp(pattern, flags)); + const cached = this.cache.get(key); + if (cached) { + // Promote to most-recently-used by re-inserting + this.cache.delete(key); + this.cache.set(key, cached); + return cached; } - return this.cache.get(key)!; + // Evict LRU (first entry in Map) if at capacity + if (this.cache.size >= this.maxSize) { + const lruKey = this.cache.keys().next().value; + if (lruKey !== undefined) { + this.cache.delete(lruKey); + } + } + + const regex = new RegExp(pattern, flags); + this.cache.set(key, regex); + return regex; } /** - * Clear the cache + * Clear the cache. */ clear(): void { this.cache.clear(); } /** - * Get current cache size + * Get current cache size. */ get size(): number { return this.cache.size; diff --git a/src/errors/error-handler.ts b/src/errors/error-handler.ts index 6ffc7ed..90f22f8 100644 --- a/src/errors/error-handler.ts +++ b/src/errors/error-handler.ts @@ -1,3 +1,4 @@ +import { withTimeout as withTimeoutRace } from '../performance/timeout-wrapper.js'; import { TextProcessingError } from './text-error.js'; export interface ErrorHandlerOptions { @@ -125,13 +126,11 @@ export class ErrorHandler { timeout: number, timeoutError?: Error, ): Promise { - const timeoutPromise = new Promise((_, reject) => { - setTimeout(() => { - reject(timeoutError || new Error(`Operation timed out after ${timeout}ms`)); - }, timeout); - }); - - return Promise.race([fn(), timeoutPromise]); + return withTimeoutRace( + fn(), + timeout, + timeoutError?.message, + ); } getRetryCount(context?: string): number { diff --git a/src/index.ts b/src/index.ts index cea31e9..5a6684d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -40,6 +40,9 @@ export * from './encoders/index.js'; // Error handling export * from './errors/index.js'; +// Security utilities (escapeRegex, assertInputLength, InputLengthExceededError) +export * from './security'; + // Spellcheck utilities export * from './spellcheck/index.js'; diff --git a/src/patterns/pattern-compiler.ts b/src/patterns/pattern-compiler.ts index a4bbf6d..25a8467 100644 --- a/src/patterns/pattern-compiler.ts +++ b/src/patterns/pattern-compiler.ts @@ -1,3 +1,4 @@ +import { escapeRegex } from '../security/escape-regex.js'; import { RegexCache } from '../cache/regex-cache.js'; export class PatternCompiler { @@ -24,7 +25,7 @@ export class PatternCompiler { } escapeForRegex(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return escapeRegex(str); } createWordBoundaryPattern(word: string, flags?: string): RegExp { diff --git a/src/security/escape-regex.ts b/src/security/escape-regex.ts new file mode 100644 index 0000000..ff83194 --- /dev/null +++ b/src/security/escape-regex.ts @@ -0,0 +1,13 @@ +/** + * Escape all regex metacharacters in a string for safe use in `new RegExp()`. + * + * Prevents regex injection when constructing patterns from user-supplied strings. + * + * @example + * ```ts + * const pattern = new RegExp(`\\b${escapeRegex(userInput)}\\b`, 'gi'); + * ``` + */ +export function escapeRegex(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} diff --git a/src/security/index.ts b/src/security/index.ts new file mode 100644 index 0000000..2e94aa3 --- /dev/null +++ b/src/security/index.ts @@ -0,0 +1,2 @@ +export { escapeRegex } from './escape-regex'; +export { InputLengthExceededError, assertInputLength } from './input-length-guard'; diff --git a/src/security/input-length-guard.ts b/src/security/input-length-guard.ts new file mode 100644 index 0000000..5b4f942 --- /dev/null +++ b/src/security/input-length-guard.ts @@ -0,0 +1,52 @@ +import { TextProcessingError } from '../errors/text-error'; + +/** + * Thrown when input text exceeds the configured maximum length. + * + * Carries structured metadata (actual length, maximum, context) for + * programmatic handling — callers can catch this specifically to + * implement fail-open (return empty) or fail-closed (propagate) semantics. + */ +export class InputLengthExceededError extends TextProcessingError { + constructor( + public readonly actualLength: number, + public readonly maxLength: number, + public readonly context: string, + ) { + super( + `${context}: input length ${actualLength} exceeds maximum ${maxLength}`, + 'INPUT_LENGTH_EXCEEDED', + { actualLength, maxLength, context }, + ); + this.name = 'InputLengthExceededError'; + } +} + +/** + * Assert that input text does not exceed the given maximum length. + * + * @throws {InputLengthExceededError} when `text.length > maxLength` + * + * @example + * ```ts + * // Fail-closed (throw propagates): + * assertInputLength(text, 10_000, 'analyze'); + * + * // Fail-open (catch and return empty): + * try { + * assertInputLength(text, 10_000, 'checkText'); + * } catch (e) { + * if (e instanceof InputLengthExceededError) return { errors: [] }; + * throw e; + * } + * ``` + */ +export function assertInputLength( + text: string, + maxLength: number, + context: string, +): void { + if (text.length > maxLength) { + throw new InputLengthExceededError(text.length, maxLength, context); + } +} diff --git a/src/spellcheck/spell-checker.ts b/src/spellcheck/spell-checker.ts index 4f6fafc..dfbcd3a 100644 --- a/src/spellcheck/spell-checker.ts +++ b/src/spellcheck/spell-checker.ts @@ -6,6 +6,8 @@ import { import { CustomDictionary, DictionaryManager } from './dictionaries/core/dictionary-manager.js'; import { TypoManager } from './typos/index.js'; +import { escapeRegex } from '../security/escape-regex.js'; +import { assertInputLength } from '../security/input-length-guard.js'; import type { SpellEngine } from './engines/types.js'; import type { ConfidenceScorerOptions } from './confidence/confidence-scorer.js'; import type { @@ -204,15 +206,6 @@ export class SpellChecker { return corrections; } - private validateInputLength(input: string, method: string): void { - const maxLength = this.options.maxInputLength ?? 100_000; - - if (input.length > maxLength) { - throw new Error( - `${method}: input length ${input.length} exceeds maximum ${maxLength}`, - ); - } - } async check(word: string): Promise { // Input validation @@ -225,7 +218,7 @@ export class SpellChecker { }; } - this.validateInputLength(word, 'check'); + assertInputLength(word, this.options.maxInputLength ?? 100_000, 'check'); // Handle whitespace-only input const trimmed = word.trim(); @@ -330,7 +323,7 @@ export class SpellChecker { return text || ''; } - this.validateInputLength(text, 'fix'); + assertInputLength(text, this.options.maxInputLength ?? 100_000, 'fix'); if (!this.initialized) { await this.initialize(); @@ -396,14 +389,14 @@ export class SpellChecker { // Apply regular word corrections first for (const [original, correction] of corrections) { - const regex = new RegExp(`\\b${this.escapeRegex(original)}\\b`, 'g'); + const regex = new RegExp(`\\b${escapeRegex(original)}\\b`, 'g'); fixedText = fixedText.replace(regex, correction); } // Apply split-word corrections (these operate on word pairs) for (const [original, correction] of splitWordCorrections) { - const escapedOriginal = this.escapeRegex(original); + const escapedOriginal = escapeRegex(original); const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g'); fixedText = fixedText.replace(regex, correction); @@ -411,7 +404,7 @@ export class SpellChecker { // Apply joined-word corrections (single words to multiple words) for (const [original, correction] of joinedWordCorrections) { - const escapedOriginal = this.escapeRegex(original); + const escapedOriginal = escapeRegex(original); const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g'); fixedText = fixedText.replace(regex, correction); @@ -422,7 +415,7 @@ export class SpellChecker { async checkText(text: string): Promise { if (text) { - this.validateInputLength(text, 'checkText'); + assertInputLength(text, this.options.maxInputLength ?? 100_000, 'checkText'); } if (!this.initialized) { @@ -807,9 +800,6 @@ export class SpellChecker { return correction.toLowerCase(); } - private escapeRegex(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - } clearCache(): void { // No-op when using SymSpell engine (no suggestion cache to clear) diff --git a/src/transformers/escaper.ts b/src/transformers/escaper.ts index 1b3429b..4df8b8a 100644 --- a/src/transformers/escaper.ts +++ b/src/transformers/escaper.ts @@ -1,3 +1,4 @@ +import { escapeRegex } from '../security/escape-regex.js'; /* eslint-disable no-control-regex -- Intentional control character handling for escape/unescape operations */ export type EscapeContext = @@ -25,7 +26,7 @@ export class Escaper { case 'shell': return this.escapeShell(text); case 'regex': - return this.escapeRegex(text); + return escapeRegex(text); case 'csv': return this.escapeCsv(text); case 'json': @@ -172,10 +173,6 @@ export class Escaper { return text.replace(/'\\'/g, "'"); } - private escapeRegex(text: string): string { - // Escape regex special characters - return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - } private unescapeRegex(text: string): string { // Remove escape characters from regex special characters diff --git a/src/transformers/template-engine.ts b/src/transformers/template-engine.ts index 055a857..ba8a68c 100644 --- a/src/transformers/template-engine.ts +++ b/src/transformers/template-engine.ts @@ -1,3 +1,4 @@ +import { escapeRegex } from '../security/escape-regex.js'; /* eslint-disable @typescript-eslint/no-explicit-any -- Template engine requires any for dynamic value handling */ export interface TemplateOptions { @@ -67,15 +68,12 @@ export class TemplateEngine { } private createPattern(delimiters: { start: string; end: string }): RegExp { - const escapedStart = this.escapeRegex(delimiters.start); - const escapedEnd = this.escapeRegex(delimiters.end); + const escapedStart = escapeRegex(delimiters.start); + const escapedEnd = escapeRegex(delimiters.end); return new RegExp(`${escapedStart}\\s*([^}]+?)\\s*${escapedEnd}`, 'g'); } - private escapeRegex(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); - } private escapeHtml(text: string): string { const escapes: Record = {