security(text-processing): 🔒️ Fix input sanitization for regex patterns, spell-check strings, and template rendering with strict validation and error handling
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
f117dd2fa2
commit
5ae825726e
10 changed files with 126 additions and 45 deletions
44
src/cache/regex-cache.ts
vendored
44
src/cache/regex-cache.ts
vendored
|
|
@ -1,12 +1,23 @@
|
|||
import { LIMITS } from '../constants/limits';
|
||||
|
||||
/**
|
||||
* Singleton cache for compiled RegExp objects
|
||||
* Avoids repeated compilation of the same patterns
|
||||
* Singleton LRU cache for compiled RegExp objects.
|
||||
* Avoids repeated compilation of the same patterns.
|
||||
*
|
||||
* Bounded to LIMITS.MAX_REGEX_CACHE entries. When full, evicts
|
||||
* the least-recently-used entry before inserting a new one.
|
||||
*
|
||||
* Uses Map insertion order for LRU tracking — accessing an entry
|
||||
* deletes and re-inserts it, pushing it to the end.
|
||||
*/
|
||||
export class RegexCache {
|
||||
private static instance: RegexCache;
|
||||
private readonly cache = new Map<string, RegExp>();
|
||||
private readonly maxSize: number;
|
||||
|
||||
private constructor() {}
|
||||
private constructor(maxSize: number = LIMITS.MAX_REGEX_CACHE) {
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
static getInstance(): RegexCache {
|
||||
if (!RegexCache.instance) {
|
||||
|
|
@ -17,27 +28,42 @@ export class RegexCache {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get a cached RegExp or compile and cache a new one
|
||||
* Get a cached RegExp or compile and cache a new one.
|
||||
* Promotes accessed entries to most-recently-used.
|
||||
*/
|
||||
get(pattern: string, flags?: string): RegExp {
|
||||
const key = `${pattern}::${flags || ''}`;
|
||||
|
||||
if (!this.cache.has(key)) {
|
||||
this.cache.set(key, new RegExp(pattern, flags));
|
||||
const cached = this.cache.get(key);
|
||||
if (cached) {
|
||||
// Promote to most-recently-used by re-inserting
|
||||
this.cache.delete(key);
|
||||
this.cache.set(key, cached);
|
||||
return cached;
|
||||
}
|
||||
|
||||
return this.cache.get(key)!;
|
||||
// Evict LRU (first entry in Map) if at capacity
|
||||
if (this.cache.size >= this.maxSize) {
|
||||
const lruKey = this.cache.keys().next().value;
|
||||
if (lruKey !== undefined) {
|
||||
this.cache.delete(lruKey);
|
||||
}
|
||||
}
|
||||
|
||||
const regex = new RegExp(pattern, flags);
|
||||
this.cache.set(key, regex);
|
||||
return regex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the cache
|
||||
* Clear the cache.
|
||||
*/
|
||||
clear(): void {
|
||||
this.cache.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current cache size
|
||||
* Get current cache size.
|
||||
*/
|
||||
get size(): number {
|
||||
return this.cache.size;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { withTimeout as withTimeoutRace } from '../performance/timeout-wrapper.js';
|
||||
import { TextProcessingError } from './text-error.js';
|
||||
|
||||
export interface ErrorHandlerOptions {
|
||||
|
|
@ -125,13 +126,11 @@ export class ErrorHandler {
|
|||
timeout: number,
|
||||
timeoutError?: Error,
|
||||
): Promise<T> {
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => {
|
||||
reject(timeoutError || new Error(`Operation timed out after ${timeout}ms`));
|
||||
}, timeout);
|
||||
});
|
||||
|
||||
return Promise.race([fn(), timeoutPromise]);
|
||||
return withTimeoutRace(
|
||||
fn(),
|
||||
timeout,
|
||||
timeoutError?.message,
|
||||
);
|
||||
}
|
||||
|
||||
getRetryCount(context?: string): number {
|
||||
|
|
|
|||
|
|
@ -40,6 +40,9 @@ export * from './encoders/index.js';
|
|||
// Error handling
|
||||
export * from './errors/index.js';
|
||||
|
||||
// Security utilities (escapeRegex, assertInputLength, InputLengthExceededError)
|
||||
export * from './security';
|
||||
|
||||
// Spellcheck utilities
|
||||
export * from './spellcheck/index.js';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { escapeRegex } from '../security/escape-regex.js';
|
||||
import { RegexCache } from '../cache/regex-cache.js';
|
||||
|
||||
export class PatternCompiler {
|
||||
|
|
@ -24,7 +25,7 @@ export class PatternCompiler {
|
|||
}
|
||||
|
||||
escapeForRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
return escapeRegex(str);
|
||||
}
|
||||
|
||||
createWordBoundaryPattern(word: string, flags?: string): RegExp {
|
||||
|
|
|
|||
13
src/security/escape-regex.ts
Normal file
13
src/security/escape-regex.ts
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/**
|
||||
* Escape all regex metacharacters in a string for safe use in `new RegExp()`.
|
||||
*
|
||||
* Prevents regex injection when constructing patterns from user-supplied strings.
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const pattern = new RegExp(`\\b${escapeRegex(userInput)}\\b`, 'gi');
|
||||
* ```
|
||||
*/
|
||||
export function escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
2
src/security/index.ts
Normal file
2
src/security/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export { escapeRegex } from './escape-regex';
|
||||
export { InputLengthExceededError, assertInputLength } from './input-length-guard';
|
||||
52
src/security/input-length-guard.ts
Normal file
52
src/security/input-length-guard.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import { TextProcessingError } from '../errors/text-error';
|
||||
|
||||
/**
|
||||
* Thrown when input text exceeds the configured maximum length.
|
||||
*
|
||||
* Carries structured metadata (actual length, maximum, context) for
|
||||
* programmatic handling — callers can catch this specifically to
|
||||
* implement fail-open (return empty) or fail-closed (propagate) semantics.
|
||||
*/
|
||||
export class InputLengthExceededError extends TextProcessingError {
|
||||
constructor(
|
||||
public readonly actualLength: number,
|
||||
public readonly maxLength: number,
|
||||
public readonly context: string,
|
||||
) {
|
||||
super(
|
||||
`${context}: input length ${actualLength} exceeds maximum ${maxLength}`,
|
||||
'INPUT_LENGTH_EXCEEDED',
|
||||
{ actualLength, maxLength, context },
|
||||
);
|
||||
this.name = 'InputLengthExceededError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Assert that input text does not exceed the given maximum length.
|
||||
*
|
||||
* @throws {InputLengthExceededError} when `text.length > maxLength`
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* // Fail-closed (throw propagates):
|
||||
* assertInputLength(text, 10_000, 'analyze');
|
||||
*
|
||||
* // Fail-open (catch and return empty):
|
||||
* try {
|
||||
* assertInputLength(text, 10_000, 'checkText');
|
||||
* } catch (e) {
|
||||
* if (e instanceof InputLengthExceededError) return { errors: [] };
|
||||
* throw e;
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
export function assertInputLength(
|
||||
text: string,
|
||||
maxLength: number,
|
||||
context: string,
|
||||
): void {
|
||||
if (text.length > maxLength) {
|
||||
throw new InputLengthExceededError(text.length, maxLength, context);
|
||||
}
|
||||
}
|
||||
|
|
@ -6,6 +6,8 @@ import {
|
|||
import { CustomDictionary, DictionaryManager } from './dictionaries/core/dictionary-manager.js';
|
||||
import { TypoManager } from './typos/index.js';
|
||||
|
||||
import { escapeRegex } from '../security/escape-regex.js';
|
||||
import { assertInputLength } from '../security/input-length-guard.js';
|
||||
import type { SpellEngine } from './engines/types.js';
|
||||
import type { ConfidenceScorerOptions } from './confidence/confidence-scorer.js';
|
||||
import type {
|
||||
|
|
@ -204,15 +206,6 @@ export class SpellChecker {
|
|||
return corrections;
|
||||
}
|
||||
|
||||
private validateInputLength(input: string, method: string): void {
|
||||
const maxLength = this.options.maxInputLength ?? 100_000;
|
||||
|
||||
if (input.length > maxLength) {
|
||||
throw new Error(
|
||||
`${method}: input length ${input.length} exceeds maximum ${maxLength}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async check(word: string): Promise<SpellCheckResult> {
|
||||
// Input validation
|
||||
|
|
@ -225,7 +218,7 @@ export class SpellChecker {
|
|||
};
|
||||
}
|
||||
|
||||
this.validateInputLength(word, 'check');
|
||||
assertInputLength(word, this.options.maxInputLength ?? 100_000, 'check');
|
||||
|
||||
// Handle whitespace-only input
|
||||
const trimmed = word.trim();
|
||||
|
|
@ -330,7 +323,7 @@ export class SpellChecker {
|
|||
return text || '';
|
||||
}
|
||||
|
||||
this.validateInputLength(text, 'fix');
|
||||
assertInputLength(text, this.options.maxInputLength ?? 100_000, 'fix');
|
||||
|
||||
if (!this.initialized) {
|
||||
await this.initialize();
|
||||
|
|
@ -396,14 +389,14 @@ export class SpellChecker {
|
|||
|
||||
// Apply regular word corrections first
|
||||
for (const [original, correction] of corrections) {
|
||||
const regex = new RegExp(`\\b${this.escapeRegex(original)}\\b`, 'g');
|
||||
const regex = new RegExp(`\\b${escapeRegex(original)}\\b`, 'g');
|
||||
|
||||
fixedText = fixedText.replace(regex, correction);
|
||||
}
|
||||
|
||||
// Apply split-word corrections (these operate on word pairs)
|
||||
for (const [original, correction] of splitWordCorrections) {
|
||||
const escapedOriginal = this.escapeRegex(original);
|
||||
const escapedOriginal = escapeRegex(original);
|
||||
const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');
|
||||
|
||||
fixedText = fixedText.replace(regex, correction);
|
||||
|
|
@ -411,7 +404,7 @@ export class SpellChecker {
|
|||
|
||||
// Apply joined-word corrections (single words to multiple words)
|
||||
for (const [original, correction] of joinedWordCorrections) {
|
||||
const escapedOriginal = this.escapeRegex(original);
|
||||
const escapedOriginal = escapeRegex(original);
|
||||
const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');
|
||||
|
||||
fixedText = fixedText.replace(regex, correction);
|
||||
|
|
@ -422,7 +415,7 @@ export class SpellChecker {
|
|||
|
||||
async checkText(text: string): Promise<BatchSpellCheckResult> {
|
||||
if (text) {
|
||||
this.validateInputLength(text, 'checkText');
|
||||
assertInputLength(text, this.options.maxInputLength ?? 100_000, 'checkText');
|
||||
}
|
||||
|
||||
if (!this.initialized) {
|
||||
|
|
@ -807,9 +800,6 @@ export class SpellChecker {
|
|||
return correction.toLowerCase();
|
||||
}
|
||||
|
||||
private escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
clearCache(): void {
|
||||
// No-op when using SymSpell engine (no suggestion cache to clear)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { escapeRegex } from '../security/escape-regex.js';
|
||||
/* eslint-disable no-control-regex -- Intentional control character handling for escape/unescape operations */
|
||||
|
||||
export type EscapeContext =
|
||||
|
|
@ -25,7 +26,7 @@ export class Escaper {
|
|||
case 'shell':
|
||||
return this.escapeShell(text);
|
||||
case 'regex':
|
||||
return this.escapeRegex(text);
|
||||
return escapeRegex(text);
|
||||
case 'csv':
|
||||
return this.escapeCsv(text);
|
||||
case 'json':
|
||||
|
|
@ -172,10 +173,6 @@ export class Escaper {
|
|||
return text.replace(/'\\'/g, "'");
|
||||
}
|
||||
|
||||
private escapeRegex(text: string): string {
|
||||
// Escape regex special characters
|
||||
return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
private unescapeRegex(text: string): string {
|
||||
// Remove escape characters from regex special characters
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import { escapeRegex } from '../security/escape-regex.js';
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any -- Template engine requires any for dynamic value handling */
|
||||
|
||||
export interface TemplateOptions {
|
||||
|
|
@ -67,15 +68,12 @@ export class TemplateEngine {
|
|||
}
|
||||
|
||||
private createPattern(delimiters: { start: string; end: string }): RegExp {
|
||||
const escapedStart = this.escapeRegex(delimiters.start);
|
||||
const escapedEnd = this.escapeRegex(delimiters.end);
|
||||
const escapedStart = escapeRegex(delimiters.start);
|
||||
const escapedEnd = escapeRegex(delimiters.end);
|
||||
|
||||
return new RegExp(`${escapedStart}\\s*([^}]+?)\\s*${escapedEnd}`, 'g');
|
||||
}
|
||||
|
||||
private escapeRegex(str: string): string {
|
||||
return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
private escapeHtml(text: string): string {
|
||||
const escapes: Record<string, string> = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue