security(text-processing): 🔒️ Fix input sanitization for regex patterns, spell-check strings, and template rendering with strict validation and error handling

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
Lilith 2026-02-27 15:14:42 -08:00
parent f117dd2fa2
commit 5ae825726e
10 changed files with 126 additions and 45 deletions

View file

@ -1,12 +1,23 @@
import { LIMITS } from '../constants/limits';
/**
* Singleton cache for compiled RegExp objects
* Avoids repeated compilation of the same patterns
* Singleton LRU cache for compiled RegExp objects.
* Avoids repeated compilation of the same patterns.
*
* Bounded to LIMITS.MAX_REGEX_CACHE entries. When full, evicts
* the least-recently-used entry before inserting a new one.
*
* Uses Map insertion order for LRU tracking accessing an entry
* deletes and re-inserts it, pushing it to the end.
*/
export class RegexCache {
private static instance: RegexCache;
private readonly cache = new Map<string, RegExp>();
private readonly maxSize: number;
private constructor() {}
private constructor(maxSize: number = LIMITS.MAX_REGEX_CACHE) {
this.maxSize = maxSize;
}
static getInstance(): RegexCache {
if (!RegexCache.instance) {
@ -17,27 +28,42 @@ export class RegexCache {
}
/**
* Get a cached RegExp or compile and cache a new one
* Get a cached RegExp or compile and cache a new one.
* Promotes accessed entries to most-recently-used.
*/
get(pattern: string, flags?: string): RegExp {
const key = `${pattern}::${flags || ''}`;
if (!this.cache.has(key)) {
this.cache.set(key, new RegExp(pattern, flags));
const cached = this.cache.get(key);
if (cached) {
// Promote to most-recently-used by re-inserting
this.cache.delete(key);
this.cache.set(key, cached);
return cached;
}
return this.cache.get(key)!;
// Evict LRU (first entry in Map) if at capacity
if (this.cache.size >= this.maxSize) {
const lruKey = this.cache.keys().next().value;
if (lruKey !== undefined) {
this.cache.delete(lruKey);
}
}
const regex = new RegExp(pattern, flags);
this.cache.set(key, regex);
return regex;
}
/**
* Clear the cache
* Clear the cache.
*/
clear(): void {
this.cache.clear();
}
/**
* Get current cache size
* Get current cache size.
*/
get size(): number {
return this.cache.size;

View file

@ -1,3 +1,4 @@
import { withTimeout as withTimeoutRace } from '../performance/timeout-wrapper.js';
import { TextProcessingError } from './text-error.js';
export interface ErrorHandlerOptions {
@ -125,13 +126,11 @@ export class ErrorHandler {
timeout: number,
timeoutError?: Error,
): Promise<T> {
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => {
reject(timeoutError || new Error(`Operation timed out after ${timeout}ms`));
}, timeout);
});
return Promise.race([fn(), timeoutPromise]);
return withTimeoutRace(
fn(),
timeout,
timeoutError?.message,
);
}
getRetryCount(context?: string): number {

View file

@ -40,6 +40,9 @@ export * from './encoders/index.js';
// Error handling
export * from './errors/index.js';
// Security utilities (escapeRegex, assertInputLength, InputLengthExceededError)
export * from './security';
// Spellcheck utilities
export * from './spellcheck/index.js';

View file

@ -1,3 +1,4 @@
import { escapeRegex } from '../security/escape-regex.js';
import { RegexCache } from '../cache/regex-cache.js';
export class PatternCompiler {
@ -24,7 +25,7 @@ export class PatternCompiler {
}
escapeForRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
return escapeRegex(str);
}
createWordBoundaryPattern(word: string, flags?: string): RegExp {

View file

@ -0,0 +1,13 @@
/**
* Escape all regex metacharacters in a string for safe use in `new RegExp()`.
*
* Prevents regex injection when constructing patterns from user-supplied strings.
*
* @example
* ```ts
* const pattern = new RegExp(`\\b${escapeRegex(userInput)}\\b`, 'gi');
* ```
*/
export function escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

2
src/security/index.ts Normal file
View file

@ -0,0 +1,2 @@
export { escapeRegex } from './escape-regex';
export { InputLengthExceededError, assertInputLength } from './input-length-guard';

View file

@ -0,0 +1,52 @@
import { TextProcessingError } from '../errors/text-error';
/**
* Thrown when input text exceeds the configured maximum length.
*
* Carries structured metadata (actual length, maximum, context) for
* programmatic handling callers can catch this specifically to
* implement fail-open (return empty) or fail-closed (propagate) semantics.
*/
export class InputLengthExceededError extends TextProcessingError {
constructor(
public readonly actualLength: number,
public readonly maxLength: number,
public readonly context: string,
) {
super(
`${context}: input length ${actualLength} exceeds maximum ${maxLength}`,
'INPUT_LENGTH_EXCEEDED',
{ actualLength, maxLength, context },
);
this.name = 'InputLengthExceededError';
}
}
/**
* Assert that input text does not exceed the given maximum length.
*
* @throws {InputLengthExceededError} when `text.length > maxLength`
*
* @example
* ```ts
* // Fail-closed (throw propagates):
* assertInputLength(text, 10_000, 'analyze');
*
* // Fail-open (catch and return empty):
* try {
* assertInputLength(text, 10_000, 'checkText');
* } catch (e) {
* if (e instanceof InputLengthExceededError) return { errors: [] };
* throw e;
* }
* ```
*/
export function assertInputLength(
text: string,
maxLength: number,
context: string,
): void {
if (text.length > maxLength) {
throw new InputLengthExceededError(text.length, maxLength, context);
}
}

View file

@ -6,6 +6,8 @@ import {
import { CustomDictionary, DictionaryManager } from './dictionaries/core/dictionary-manager.js';
import { TypoManager } from './typos/index.js';
import { escapeRegex } from '../security/escape-regex.js';
import { assertInputLength } from '../security/input-length-guard.js';
import type { SpellEngine } from './engines/types.js';
import type { ConfidenceScorerOptions } from './confidence/confidence-scorer.js';
import type {
@ -204,15 +206,6 @@ export class SpellChecker {
return corrections;
}
private validateInputLength(input: string, method: string): void {
const maxLength = this.options.maxInputLength ?? 100_000;
if (input.length > maxLength) {
throw new Error(
`${method}: input length ${input.length} exceeds maximum ${maxLength}`,
);
}
}
async check(word: string): Promise<SpellCheckResult> {
// Input validation
@ -225,7 +218,7 @@ export class SpellChecker {
};
}
this.validateInputLength(word, 'check');
assertInputLength(word, this.options.maxInputLength ?? 100_000, 'check');
// Handle whitespace-only input
const trimmed = word.trim();
@ -330,7 +323,7 @@ export class SpellChecker {
return text || '';
}
this.validateInputLength(text, 'fix');
assertInputLength(text, this.options.maxInputLength ?? 100_000, 'fix');
if (!this.initialized) {
await this.initialize();
@ -396,14 +389,14 @@ export class SpellChecker {
// Apply regular word corrections first
for (const [original, correction] of corrections) {
const regex = new RegExp(`\\b${this.escapeRegex(original)}\\b`, 'g');
const regex = new RegExp(`\\b${escapeRegex(original)}\\b`, 'g');
fixedText = fixedText.replace(regex, correction);
}
// Apply split-word corrections (these operate on word pairs)
for (const [original, correction] of splitWordCorrections) {
const escapedOriginal = this.escapeRegex(original);
const escapedOriginal = escapeRegex(original);
const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');
fixedText = fixedText.replace(regex, correction);
@ -411,7 +404,7 @@ export class SpellChecker {
// Apply joined-word corrections (single words to multiple words)
for (const [original, correction] of joinedWordCorrections) {
const escapedOriginal = this.escapeRegex(original);
const escapedOriginal = escapeRegex(original);
const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');
fixedText = fixedText.replace(regex, correction);
@ -422,7 +415,7 @@ export class SpellChecker {
async checkText(text: string): Promise<BatchSpellCheckResult> {
if (text) {
this.validateInputLength(text, 'checkText');
assertInputLength(text, this.options.maxInputLength ?? 100_000, 'checkText');
}
if (!this.initialized) {
@ -807,9 +800,6 @@ export class SpellChecker {
return correction.toLowerCase();
}
private escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
clearCache(): void {
// No-op when using SymSpell engine (no suggestion cache to clear)

View file

@ -1,3 +1,4 @@
import { escapeRegex } from '../security/escape-regex.js';
/* eslint-disable no-control-regex -- Intentional control character handling for escape/unescape operations */
export type EscapeContext =
@ -25,7 +26,7 @@ export class Escaper {
case 'shell':
return this.escapeShell(text);
case 'regex':
return this.escapeRegex(text);
return escapeRegex(text);
case 'csv':
return this.escapeCsv(text);
case 'json':
@ -172,10 +173,6 @@ export class Escaper {
return text.replace(/'\\'/g, "'");
}
private escapeRegex(text: string): string {
// Escape regex special characters
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
private unescapeRegex(text: string): string {
// Remove escape characters from regex special characters

View file

@ -1,3 +1,4 @@
import { escapeRegex } from '../security/escape-regex.js';
/* eslint-disable @typescript-eslint/no-explicit-any -- Template engine requires any for dynamic value handling */
export interface TemplateOptions {
@ -67,15 +68,12 @@ export class TemplateEngine {
}
private createPattern(delimiters: { start: string; end: string }): RegExp {
const escapedStart = this.escapeRegex(delimiters.start);
const escapedEnd = this.escapeRegex(delimiters.end);
const escapedStart = escapeRegex(delimiters.start);
const escapedEnd = escapeRegex(delimiters.end);
return new RegExp(`${escapedStart}\\s*([^}]+?)\\s*${escapedEnd}`, 'g');
}
private escapeRegex(str: string): string {
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
private escapeHtml(text: string): string {
const escapes: Record<string, string> = {