docs(docs): 📝 Revise README.md to improve onboarding clarity and update TEST_PLAN.md with comprehensive test scenarios
Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
This commit is contained in:
parent
c832a606d3
commit
098b7742ad
2 changed files with 284 additions and 477 deletions
438
README.md
438
README.md
|
|
@ -2,56 +2,85 @@
|
|||
|
||||
High-performance text processing utilities for deterministic text manipulation.
|
||||
|
||||
## Features
|
||||
|
||||
- **Extractors**: URL, path, code block extraction
|
||||
- **Sanitizers**: ANSI stripping, HTML cleaning
|
||||
- **Splitters**: Sentence and chunk splitting
|
||||
- **Validators**: Email, JSON, URL validation
|
||||
- **Transformers**: Case conversion, truncation, redaction, templates
|
||||
- **Spellcheck**: Full spell checking with auto-correction
|
||||
- **Performance**: Timeout wrappers, complexity checking
|
||||
- **Caching**: Regex caching for repeated patterns
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pnpm add @lilith/text-processing-utils
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
## Modules
|
||||
|
||||
| Module | Classes | Purpose |
|
||||
|--------|---------|---------|
|
||||
| [Spellcheck](#spellcheck) | `SpellChecker`, `SymSpellEngine`, `ConfidenceScorer` | Engine-based spell checking with confidence scoring |
|
||||
| [Extractors](#extractors) | `UrlExtractor`, `PathExtractor`, `CodeBlockExtractor` | Extract structured data from text |
|
||||
| [Sanitizers](#sanitizers) | `AnsiStripper`, `HtmlStripper`, `MarkdownStripper`, `ControlCharStripper` | Strip formatting and control characters |
|
||||
| [Splitters](#splitters) | `SentenceSplitter`, `ChunkSplitter` | Split text into sentences or sized chunks |
|
||||
| [Validators](#validators) | `EmailValidator`, `JSONValidator` | Validate text formats |
|
||||
| [Transformers](#transformers) | `CaseTransformer`, `Redactor`, `TemplateEngine`, `Truncator` | Transform, redact, and template text |
|
||||
| [Normalizers](#normalizers) | `UnicodeNormalizer`, `WhitespaceNormalizer`, `TerminalNormalizer` | Normalize text representations |
|
||||
| [Comparators](#comparators) | `DiffGenerator`, `FuzzyMatcher`, `SimilarityScorer` | Compare and diff text |
|
||||
| [Encoders](#encoders) | `Base64Encoder`, `StreamingEncoder`, `TerminalEncoder` | Encode text for transport |
|
||||
| [Metrics](#metrics) | `TextAnalyzer`, `ReadabilityScorer`, `CodeMetricsAnalyzer` | Analyze text statistics and readability |
|
||||
| [Performance](#performance) | `withTimeout`, `BatchProcessor`, `StreamProcessor`, `Throttler`, `Debouncer` | Async control flow utilities |
|
||||
| [Errors](#errors) | `ErrorHandler`, `TextProcessingError` | Structured error handling |
|
||||
| [Cache](#cache) | `RegexCache` | Compiled regex caching |
|
||||
|
||||
---
|
||||
|
||||
## Spellcheck
|
||||
|
||||
Engine-first spell checking with multi-factor confidence scoring, bigram context rescoring, and pattern-based split/joined word detection.
|
||||
|
||||
Full API reference: **[docs/spellcheck.md](docs/spellcheck.md)**
|
||||
|
||||
```typescript
|
||||
import {
|
||||
UrlExtractor,
|
||||
SentenceSplitter,
|
||||
EmailValidator,
|
||||
SpellChecker,
|
||||
} from '@lilith/text-processing-utils';
|
||||
import { SpellChecker, SymSpellEngine } from '@lilith/text-processing-utils';
|
||||
|
||||
// Extract URLs
|
||||
const extractor = new UrlExtractor();
|
||||
const urls = extractor.extract('Visit https://example.com for more');
|
||||
const engine = new SymSpellEngine({
|
||||
wasmUrl: '/spellcheck-data/spellchecker-wasm.wasm',
|
||||
dictionaryUrl: '/spellcheck-data/frequency-dictionary.txt',
|
||||
bigramUrl: '/spellcheck-data/frequency-bigrams.txt',
|
||||
});
|
||||
await engine.init();
|
||||
|
||||
// Split sentences
|
||||
const splitter = new SentenceSplitter();
|
||||
const sentences = splitter.split('Hello world. How are you?');
|
||||
const checker = new SpellChecker({ engine, autoCorrect: true });
|
||||
await checker.initialize();
|
||||
|
||||
// Validate email
|
||||
const validator = new EmailValidator();
|
||||
const isValid = validator.validate('user@example.com');
|
||||
// Single word
|
||||
const result = await checker.check('recieve');
|
||||
// { word: 'recieve', correct: false, suggestions: ['receive', ...], confidence: 0.87 }
|
||||
|
||||
// Spellcheck
|
||||
const checker = new SpellChecker();
|
||||
const result = checker.check('teh quick brwon fox');
|
||||
// Auto-correct (only high-confidence fixes applied)
|
||||
const fixed = await checker.fix('teh quikc brwon fox');
|
||||
// 'the quick brown fox'
|
||||
|
||||
// Full diagnostic with positions, severities, split/joined word detection
|
||||
const report = await checker.checkText('teh quikc fox ist he best');
|
||||
// { errors: [...], stats: { totalWords: 6, misspelledWords: 2, ... } }
|
||||
```
|
||||
|
||||
### Feature System
|
||||
|
||||
9 pluggable detectors for grammar, capitalization, punctuation, homophones, redundancy, and more:
|
||||
|
||||
```typescript
|
||||
import { FeatureManager, GrammarPatternFeature, CapitalizationFeature } from '@lilith/text-processing-utils';
|
||||
|
||||
const manager = new FeatureManager();
|
||||
manager.addFeature(new GrammarPatternFeature());
|
||||
manager.addFeature(new CapitalizationFeature());
|
||||
await manager.initializeAll();
|
||||
|
||||
const results = await manager.checkText('i went too the store.');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Extractors
|
||||
|
||||
### UrlExtractor
|
||||
|
||||
Extract URLs from text:
|
||||
|
||||
```typescript
|
||||
import { UrlExtractor } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -62,8 +91,6 @@ const urls = extractor.extract('Check out https://example.com and http://test.or
|
|||
|
||||
### PathExtractor
|
||||
|
||||
Extract file paths:
|
||||
|
||||
```typescript
|
||||
import { PathExtractor } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -73,8 +100,6 @@ const paths = extractor.extract('Open /home/user/file.txt or C:\\Users\\file.txt
|
|||
|
||||
### CodeBlockExtractor
|
||||
|
||||
Extract code blocks from markdown:
|
||||
|
||||
```typescript
|
||||
import { CodeBlockExtractor } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -83,12 +108,12 @@ const blocks = extractor.extract(markdown);
|
|||
// [{ language: 'typescript', code: '...' }]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Sanitizers
|
||||
|
||||
### AnsiStripper
|
||||
|
||||
Remove ANSI escape codes:
|
||||
|
||||
```typescript
|
||||
import { AnsiStripper } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -97,36 +122,74 @@ const clean = stripper.strip('\x1b[31mRed text\x1b[0m');
|
|||
// 'Red text'
|
||||
```
|
||||
|
||||
### HtmlStripper
|
||||
|
||||
```typescript
|
||||
import { HtmlStripper } from '@lilith/text-processing-utils';
|
||||
|
||||
const stripper = new HtmlStripper();
|
||||
const clean = stripper.strip('<p>Hello <b>world</b></p>');
|
||||
// 'Hello world'
|
||||
```
|
||||
|
||||
### MarkdownStripper
|
||||
|
||||
```typescript
|
||||
import { MarkdownStripper } from '@lilith/text-processing-utils';
|
||||
|
||||
const stripper = new MarkdownStripper();
|
||||
const clean = stripper.strip('# Hello **world**');
|
||||
// 'Hello world'
|
||||
```
|
||||
|
||||
### ControlCharStripper
|
||||
|
||||
```typescript
|
||||
import { ControlCharStripper } from '@lilith/text-processing-utils';
|
||||
|
||||
const stripper = new ControlCharStripper();
|
||||
const clean = stripper.strip('Hello\x00World\x01');
|
||||
// 'HelloWorld'
|
||||
```
|
||||
|
||||
### SanitizerFactory
|
||||
|
||||
```typescript
|
||||
import { SanitizerFactory } from '@lilith/text-processing-utils';
|
||||
|
||||
const sanitizer = SanitizerFactory.create('html');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Splitters
|
||||
|
||||
### SentenceSplitter
|
||||
|
||||
Split text into sentences:
|
||||
|
||||
```typescript
|
||||
import { SentenceSplitter } from '@lilith/text-processing-utils';
|
||||
|
||||
const splitter = new SentenceSplitter();
|
||||
const sentences = splitter.split('Hello world. How are you? I am fine.');
|
||||
// ['Hello world.', 'How are you?', 'I am fine.']
|
||||
const sentences = splitter.split('Hello world. How are you? Fine.');
|
||||
// ['Hello world.', 'How are you?', 'Fine.']
|
||||
```
|
||||
|
||||
### ChunkSplitter
|
||||
|
||||
Split text into chunks with configurable size:
|
||||
|
||||
```typescript
|
||||
import { ChunkSplitter } from '@lilith/text-processing-utils';
|
||||
|
||||
const splitter = new ChunkSplitter({
|
||||
maxChunkSize: 1000,
|
||||
overlap: 100,
|
||||
splitOn: 'sentence', // 'character' | 'word' | 'sentence' | 'paragraph'
|
||||
splitOn: 'sentence',
|
||||
});
|
||||
|
||||
const chunks = splitter.split(longText);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Validators
|
||||
|
||||
### EmailValidator
|
||||
|
|
@ -148,44 +211,27 @@ const validator = new JSONValidator();
|
|||
validator.validate('{"key": "value"}'); // true
|
||||
validator.validate('{invalid}'); // false
|
||||
|
||||
// Get parsed JSON or null
|
||||
const json = validator.parse(text);
|
||||
const json = validator.parse(text); // parsed object or null
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Transformers
|
||||
|
||||
### CaseTransformer
|
||||
|
||||
Convert text case:
|
||||
|
||||
```typescript
|
||||
import { CaseTransformer } from '@lilith/text-processing-utils';
|
||||
|
||||
const transformer = new CaseTransformer();
|
||||
transformer.toUpperCase('hello'); // 'HELLO'
|
||||
transformer.toLowerCase('HELLO'); // 'hello'
|
||||
transformer.toTitleCase('hello world'); // 'Hello World'
|
||||
transformer.toCamelCase('hello world'); // 'helloWorld'
|
||||
transformer.toSnakeCase('helloWorld'); // 'hello_world'
|
||||
transformer.toKebabCase('helloWorld'); // 'hello-world'
|
||||
```
|
||||
|
||||
### Truncator
|
||||
|
||||
Truncate text with ellipsis:
|
||||
|
||||
```typescript
|
||||
import { Truncator } from '@lilith/text-processing-utils';
|
||||
|
||||
const truncator = new Truncator();
|
||||
truncator.truncate('Hello world', 8); // 'Hello...'
|
||||
truncator.truncate('Hello world', 8, { suffix: '…' }); // 'Hello wo…'
|
||||
transformer.toTitleCase('hello world'); // 'Hello World'
|
||||
transformer.toCamelCase('hello world'); // 'helloWorld'
|
||||
transformer.toSnakeCase('helloWorld'); // 'hello_world'
|
||||
transformer.toKebabCase('helloWorld'); // 'hello-world'
|
||||
```
|
||||
|
||||
### Redactor
|
||||
|
||||
Redact sensitive information:
|
||||
|
||||
```typescript
|
||||
import { Redactor } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -203,8 +249,6 @@ const clean = redactor.redact('Email me at user@example.com');
|
|||
|
||||
### TemplateEngine
|
||||
|
||||
Simple template interpolation:
|
||||
|
||||
```typescript
|
||||
import { TemplateEngine } from '@lilith/text-processing-utils';
|
||||
|
||||
|
|
@ -213,133 +257,219 @@ const result = engine.render('Hello {{name}}!', { name: 'World' });
|
|||
// 'Hello World!'
|
||||
```
|
||||
|
||||
## Spellcheck
|
||||
|
||||
Engine-first spell checking with multi-factor confidence scoring, bigram context rescoring, and pattern-based split/joined word detection. Full API reference: **[docs/spellcheck.md](docs/spellcheck.md)**
|
||||
|
||||
### With SymSpell Engine (recommended)
|
||||
### Truncator
|
||||
|
||||
```typescript
|
||||
import { SpellChecker, SymSpellEngine } from '@lilith/text-processing-utils';
|
||||
import { Truncator } from '@lilith/text-processing-utils';
|
||||
|
||||
const engine = new SymSpellEngine({
|
||||
wasmUrl: '/spellcheck-data/spellchecker-wasm.wasm',
|
||||
dictionaryUrl: '/spellcheck-data/frequency-dictionary.txt',
|
||||
bigramUrl: '/spellcheck-data/frequency-bigrams.txt',
|
||||
});
|
||||
await engine.init();
|
||||
|
||||
const checker = new SpellChecker({ engine, autoCorrect: true });
|
||||
await checker.initialize();
|
||||
|
||||
// Single word check
|
||||
const result = await checker.check('recieve');
|
||||
// { word: 'recieve', correct: false, suggestions: ['receive', ...], confidence: 0.87 }
|
||||
|
||||
// Auto-correct sentence (applies only high-confidence fixes)
|
||||
const fixed = await checker.fix('teh quikc brwon fox');
|
||||
// 'the quick brown fox'
|
||||
|
||||
// Full diagnostic with positions and severities
|
||||
const report = await checker.checkText('teh quikc fox ist he best');
|
||||
// { errors: [{ type: 'misspelling', word: 'teh', ... }, { type: 'split-word', word: 'ist he', ... }], stats: { ... } }
|
||||
const truncator = new Truncator();
|
||||
truncator.truncate('Hello world', 8); // 'Hello...'
|
||||
```
|
||||
|
||||
### Feature System
|
||||
---
|
||||
|
||||
14 pluggable detectors for grammar, capitalization, punctuation, homophones, redundancy, and more:
|
||||
## Normalizers
|
||||
|
||||
### UnicodeNormalizer
|
||||
|
||||
```typescript
|
||||
import { FeatureManager, GrammarPatternFeature, CapitalizationFeature } from '@lilith/text-processing-utils';
|
||||
import { UnicodeNormalizer } from '@lilith/text-processing-utils';
|
||||
|
||||
const manager = new FeatureManager();
|
||||
manager.addFeature(new GrammarPatternFeature());
|
||||
manager.addFeature(new CapitalizationFeature());
|
||||
await manager.initializeAll();
|
||||
|
||||
const results = await manager.checkText('i went too the store.');
|
||||
const normalizer = new UnicodeNormalizer();
|
||||
const normalized = normalizer.normalize('caf\u00e9'); // NFC normalization
|
||||
```
|
||||
|
||||
### WhitespaceNormalizer
|
||||
|
||||
```typescript
|
||||
import { WhitespaceNormalizer } from '@lilith/text-processing-utils';
|
||||
|
||||
const normalizer = new WhitespaceNormalizer();
|
||||
const clean = normalizer.normalize('hello world\t\n');
|
||||
```
|
||||
|
||||
### TerminalNormalizer
|
||||
|
||||
```typescript
|
||||
import { TerminalNormalizer } from '@lilith/text-processing-utils';
|
||||
|
||||
const normalizer = new TerminalNormalizer();
|
||||
const clean = normalizer.normalize(terminalOutput);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Comparators
|
||||
|
||||
### FuzzyMatcher
|
||||
|
||||
```typescript
|
||||
import { FuzzyMatcher } from '@lilith/text-processing-utils';
|
||||
|
||||
const matcher = new FuzzyMatcher();
|
||||
const matches = matcher.match('hello', ['helo', 'world', 'help']);
|
||||
```
|
||||
|
||||
### SimilarityScorer
|
||||
|
||||
```typescript
|
||||
import { SimilarityScorer } from '@lilith/text-processing-utils';
|
||||
|
||||
const scorer = new SimilarityScorer();
|
||||
const score = scorer.score('hello', 'helo'); // 0.0 - 1.0
|
||||
```
|
||||
|
||||
### DiffGenerator
|
||||
|
||||
```typescript
|
||||
import { DiffGenerator } from '@lilith/text-processing-utils';
|
||||
|
||||
const diff = new DiffGenerator();
|
||||
const changes = diff.generate('hello world', 'hello there');
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Encoders
|
||||
|
||||
### Base64Encoder
|
||||
|
||||
```typescript
|
||||
import { Base64Encoder } from '@lilith/text-processing-utils';
|
||||
|
||||
const encoder = new Base64Encoder();
|
||||
const encoded = encoder.encode('Hello World');
|
||||
const decoded = encoder.decode(encoded);
|
||||
```
|
||||
|
||||
### StreamingEncoder
|
||||
|
||||
```typescript
|
||||
import { StreamingEncoder } from '@lilith/text-processing-utils';
|
||||
|
||||
const encoder = new StreamingEncoder();
|
||||
```
|
||||
|
||||
### TerminalEncoder
|
||||
|
||||
```typescript
|
||||
import { TerminalEncoder } from '@lilith/text-processing-utils';
|
||||
|
||||
const encoder = new TerminalEncoder();
|
||||
const ansi = encoder.encode('Hello', { color: 'red', bold: true });
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Metrics
|
||||
|
||||
### TextAnalyzer
|
||||
|
||||
```typescript
|
||||
import { TextAnalyzer } from '@lilith/text-processing-utils';
|
||||
|
||||
const analyzer = new TextAnalyzer();
|
||||
const analysis = analyzer.analyze(text);
|
||||
// {
|
||||
// statistics: { characters, words, sentences, paragraphs, lines, ... },
|
||||
// averages: { wordLength, sentenceLength, paragraphLength, wordsPerLine },
|
||||
// complexity: { uniqueWords, lexicalDiversity, vocabularyRichness, typeTokenRatio },
|
||||
// frequency: { mostCommonWords, mostCommonBigrams, mostCommonTrigrams },
|
||||
// patterns: { hasNumbers, hasUrls, hasEmails, hasCamelCase, ... },
|
||||
// }
|
||||
```
|
||||
|
||||
### ReadabilityScorer
|
||||
|
||||
```typescript
|
||||
import { ReadabilityScorer } from '@lilith/text-processing-utils';
|
||||
|
||||
const scorer = new ReadabilityScorer();
|
||||
const scores = scorer.score(text);
|
||||
// { fleschReadingEase, fleschKincaidGrade, colemanLiauIndex, ... }
|
||||
```
|
||||
|
||||
### CodeMetricsAnalyzer
|
||||
|
||||
```typescript
|
||||
import { CodeMetricsAnalyzer } from '@lilith/text-processing-utils';
|
||||
|
||||
const analyzer = new CodeMetricsAnalyzer();
|
||||
const metrics = analyzer.analyze(sourceCode);
|
||||
// { linesOfCode, cyclomaticComplexity, halstead, maintainabilityIndex }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance
|
||||
|
||||
### withTimeout
|
||||
|
||||
Wrap operations with timeout:
|
||||
|
||||
```typescript
|
||||
import { withTimeout, TimeoutError } from '@lilith/text-processing-utils';
|
||||
|
||||
const result = await withTimeout(
|
||||
slowOperation(),
|
||||
5000, // 5 second timeout
|
||||
);
|
||||
const result = await withTimeout(slowOperation(), 5000);
|
||||
```
|
||||
|
||||
### ComplexityChecker
|
||||
|
||||
Check text complexity:
|
||||
### BatchProcessor
|
||||
|
||||
```typescript
|
||||
import { ComplexityChecker } from '@lilith/text-processing-utils';
|
||||
import { BatchProcessor } from '@lilith/text-processing-utils';
|
||||
|
||||
const checker = new ComplexityChecker();
|
||||
const complexity = checker.analyze(text);
|
||||
// {
|
||||
// wordCount: 150,
|
||||
// sentenceCount: 10,
|
||||
// avgWordsPerSentence: 15,
|
||||
// fleschReadingEase: 65,
|
||||
// gradeLevel: 8.5,
|
||||
// }
|
||||
const processor = new BatchProcessor({ batchSize: 100 });
|
||||
const results = await processor.process(items, async (batch) => {
|
||||
return batch.map(transform);
|
||||
});
|
||||
```
|
||||
|
||||
## Caching
|
||||
### Throttler / Debouncer
|
||||
|
||||
```typescript
|
||||
import { Throttler, Debouncer } from '@lilith/text-processing-utils';
|
||||
|
||||
const throttled = new Throttler(fn, 1000);
|
||||
const debounced = new Debouncer(fn, 300);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Errors
|
||||
|
||||
### ErrorHandler
|
||||
|
||||
```typescript
|
||||
import { ErrorHandler } from '@lilith/text-processing-utils';
|
||||
|
||||
const handler = new ErrorHandler({ onError: (err) => console.error(err) });
|
||||
handler.wrap(() => riskyOperation());
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cache
|
||||
|
||||
### RegexCache
|
||||
|
||||
Cache compiled regex patterns:
|
||||
|
||||
```typescript
|
||||
import { RegexCache } from '@lilith/text-processing-utils';
|
||||
|
||||
const cache = new RegexCache();
|
||||
const regex = cache.get('\\b\\w+\\b', 'gi');
|
||||
// Returns cached regex on subsequent calls
|
||||
// Returns cached compiled regex on subsequent calls
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CLI
|
||||
|
||||
Spellcheck CLI for command-line use:
|
||||
|
||||
```bash
|
||||
npx spellcheck-cli "teh quick brwon fox"
|
||||
# Output: Errors found: 'teh' (suggestions: the), 'brwon' (suggestions: brown)
|
||||
|
||||
npx spellcheck-cli --file document.txt
|
||||
npx spellcheck-cli --fix "teh quick fox"
|
||||
# Output: the quick fox
|
||||
```
|
||||
|
||||
## Metrics
|
||||
|
||||
Text metrics and analytics:
|
||||
|
||||
```typescript
|
||||
import { TextMetrics } from '@lilith/text-processing-utils';
|
||||
|
||||
const metrics = new TextMetrics();
|
||||
const stats = metrics.analyze(text);
|
||||
// {
|
||||
// characters: 1000,
|
||||
// words: 200,
|
||||
// sentences: 15,
|
||||
// paragraphs: 5,
|
||||
// uniqueWords: 120,
|
||||
// avgWordLength: 4.5,
|
||||
// }
|
||||
```
|
||||
---
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
|
||||
|
|
|
|||
323
TEST_PLAN.md
323
TEST_PLAN.md
|
|
@ -1,323 +0,0 @@
|
|||
# @uwuapps/text-utils Test Plan
|
||||
|
||||
## Overview
|
||||
Comprehensive testing strategy for the text-utils package following SOLID principles and ensuring each module is thoroughly tested in isolation.
|
||||
|
||||
## Testing Framework
|
||||
- **Vitest** - Fast, TypeScript-native test runner
|
||||
- **Coverage Target**: 90% minimum
|
||||
- **Test Structure**: Unit tests per module, integration tests for workflows
|
||||
|
||||
## Module Test Coverage
|
||||
|
||||
### 1. Cache Module (`tests/cache/`)
|
||||
|
||||
#### LruCache Tests (`lru-cache.test.ts`)
|
||||
- ✅ Basic operations (get, set, has, delete, clear)
|
||||
- ✅ LRU eviction policy (oldest items removed first)
|
||||
- ✅ Capacity limits enforcement
|
||||
- ✅ Statistics tracking (hits, misses, evictions)
|
||||
- ✅ Edge cases (empty cache, single item, max capacity)
|
||||
|
||||
#### RegexCache Tests (`regex-cache.test.ts`)
|
||||
- ✅ Singleton pattern verification
|
||||
- ✅ Regex compilation and caching
|
||||
- ✅ Flag handling
|
||||
- ✅ Cache invalidation
|
||||
- ✅ Performance improvement verification
|
||||
|
||||
#### CacheFactory Tests (`cache-factory.test.ts`)
|
||||
- ✅ Strategy pattern implementation
|
||||
- ✅ Different cache type creation
|
||||
- ✅ Configuration options passing
|
||||
|
||||
#### CacheMetrics Tests (`cache-metrics.test.ts`)
|
||||
- ✅ Metrics recording and retrieval
|
||||
- ✅ Hit rate calculation
|
||||
- ✅ Multiple cache tracking
|
||||
|
||||
### 2. Patterns Module (`tests/patterns/`)
|
||||
|
||||
#### URL Patterns Tests (`url-patterns.test.ts`)
|
||||
- ✅ HTTP/HTTPS URL detection
|
||||
- ✅ Protocol-relative URLs
|
||||
- ✅ URLs with ports
|
||||
- ✅ IP addresses
|
||||
- ✅ Localhost URLs
|
||||
- ✅ Custom protocol support
|
||||
- ✅ Edge cases (malformed URLs, special characters)
|
||||
|
||||
#### Path Patterns Tests (`path-patterns.test.ts`)
|
||||
- ✅ Unix absolute/relative paths
|
||||
- ✅ Windows absolute/relative paths
|
||||
- ✅ Extension extraction
|
||||
- ✅ Hidden file detection
|
||||
- ✅ Parent/current directory patterns
|
||||
|
||||
#### Code Patterns Tests (`code-patterns.test.ts`)
|
||||
- ✅ Markdown code block detection
|
||||
- ✅ Inline code detection
|
||||
- ✅ Language detection accuracy
|
||||
- ✅ Function/class declaration patterns
|
||||
- ✅ Import/export statements
|
||||
- ✅ Comment patterns
|
||||
|
||||
#### ANSI Patterns Tests (`ansi-patterns.test.ts`)
|
||||
- ✅ Color code detection
|
||||
- ✅ Style code detection
|
||||
- ✅ Cursor control sequences
|
||||
- ✅ Complex ANSI sequences
|
||||
|
||||
#### PatternCompiler Tests (`pattern-compiler.test.ts`)
|
||||
- ✅ Pattern compilation
|
||||
- ✅ Pattern combination
|
||||
- ✅ Regex escaping
|
||||
- ✅ Word boundary patterns
|
||||
- ✅ Line patterns
|
||||
|
||||
#### PatternValidator Tests (`pattern-validator.test.ts`)
|
||||
- ✅ Valid regex validation
|
||||
- ✅ Flag validation
|
||||
- ✅ Complexity checking
|
||||
- ✅ Catastrophic backtracking detection
|
||||
|
||||
### 3. Extractors Module (`tests/extractors/`)
|
||||
|
||||
#### UrlExtractor Tests (`url-extractor.test.ts`)
|
||||
- ✅ URL extraction from text
|
||||
- ✅ Detailed URL parsing
|
||||
- ✅ Duplicate removal
|
||||
- ✅ Position tracking
|
||||
- ✅ Protocol requirements
|
||||
|
||||
#### PathExtractor Tests (`path-extractor.test.ts`)
|
||||
- ✅ Path extraction from mixed text
|
||||
- ✅ Path parsing (segments, filename, directory)
|
||||
- ✅ Absolute vs relative detection
|
||||
- ✅ Cross-platform path handling
|
||||
|
||||
#### CodeBlockExtractor Tests (`code-block-extractor.test.ts`)
|
||||
- ✅ Markdown code block extraction
|
||||
- ✅ HTML code block extraction
|
||||
- ✅ Language detection
|
||||
- ✅ Line counting
|
||||
- ✅ Inline code extraction
|
||||
|
||||
#### QuoteExtractor Tests (`quote-extractor.test.ts`)
|
||||
- ✅ Single/double quote extraction
|
||||
- ✅ Backtick extraction
|
||||
- ✅ Multiline quote handling
|
||||
- ✅ Escaped quote handling
|
||||
- ✅ Nested quotes
|
||||
|
||||
#### NumberExtractor Tests (`number-extractor.test.ts`)
|
||||
- ✅ Integer extraction
|
||||
- ✅ Decimal extraction
|
||||
- ✅ Scientific notation
|
||||
- ✅ Percentage extraction
|
||||
- ✅ Currency extraction
|
||||
- ✅ Comma-separated numbers
|
||||
|
||||
### 4. Sanitizers Module (`tests/sanitizers/`)
|
||||
|
||||
#### AnsiStripper Tests (`ansi-stripper.test.ts`)
|
||||
- ✅ Complete ANSI removal
|
||||
- ✅ Selective stripping (colors, styles, cursor)
|
||||
- ✅ Structure preservation
|
||||
- ✅ Complex sequences
|
||||
|
||||
#### ControlCharStripper Tests (`control-char-stripper.test.ts`)
|
||||
- ✅ C0/C1 control character removal
|
||||
- ✅ Whitespace preservation options
|
||||
- ✅ Replacement with markers
|
||||
- ✅ Detection reporting
|
||||
|
||||
#### HtmlStripper Tests (`html-stripper.test.ts`)
|
||||
- ✅ Tag removal
|
||||
- ✅ Script/style removal
|
||||
- ✅ Entity decoding
|
||||
- ✅ Comment removal
|
||||
- ✅ Whitespace normalization
|
||||
|
||||
#### MarkdownStripper Tests (`markdown-stripper.test.ts`)
|
||||
- ✅ Header removal
|
||||
- ✅ Emphasis removal
|
||||
- ✅ Link/image handling
|
||||
- ✅ List formatting removal
|
||||
- ✅ Table handling
|
||||
|
||||
### 5. Performance Module (`tests/performance/`)
|
||||
|
||||
#### TimeoutWrapper Tests (`timeout-wrapper.test.ts`)
|
||||
- ✅ Async timeout enforcement
|
||||
- ✅ Sync timeout handling
|
||||
- ✅ Error messages
|
||||
- ✅ Cleanup on success/failure
|
||||
- ✅ Custom timeout values
|
||||
|
||||
#### ComplexityChecker Tests (`complexity-checker.test.ts`)
|
||||
- ✅ Length complexity
|
||||
- ✅ Nesting depth calculation
|
||||
- ✅ Entropy calculation
|
||||
- ✅ Score calculation
|
||||
- ✅ Recommendations
|
||||
|
||||
#### BatchProcessor Tests (`batch-processor.test.ts`)
|
||||
- ✅ Batch processing
|
||||
- ✅ Progress callbacks
|
||||
- ✅ Delay between batches
|
||||
- ✅ Chunking utility
|
||||
|
||||
#### StreamProcessor Tests (`stream-processor.test.ts`)
|
||||
- ✅ Text streaming
|
||||
- ✅ Line streaming
|
||||
- ✅ Stream collection
|
||||
- ✅ Stream transformation
|
||||
- ✅ ReadableStream creation
|
||||
|
||||
#### Throttler Tests (`throttler.test.ts`)
|
||||
- ✅ Basic throttling
|
||||
- ✅ Queue-based throttling
|
||||
- ✅ Async throttling
|
||||
- ✅ Timing verification
|
||||
|
||||
#### Debouncer Tests (`debouncer.test.ts`)
|
||||
- ✅ Basic debouncing
|
||||
- ✅ Promise-based debouncing
|
||||
- ✅ Cancellation
|
||||
- ✅ Flush functionality
|
||||
|
||||
## Integration Tests (`tests/integration/`)
|
||||
|
||||
### Real-world Scenarios
|
||||
1. **Claude Output Processing**
|
||||
- Parse Claude's output with ANSI codes
|
||||
- Extract code blocks and clean formatting
|
||||
- Performance with large outputs
|
||||
|
||||
2. **Log File Processing**
|
||||
- Extract timestamps, URLs, paths
|
||||
- Remove control characters
|
||||
- Batch process large files
|
||||
|
||||
3. **Markdown to Plain Text**
|
||||
- Complete markdown stripping
|
||||
- Preserve essential content
|
||||
- Handle complex nested structures
|
||||
|
||||
4. **HTML Content Extraction**
|
||||
- Strip all HTML
|
||||
- Decode entities
|
||||
- Extract readable text
|
||||
|
||||
## Performance Benchmarks (`tests/benchmarks/`)
|
||||
|
||||
### Cache Performance
|
||||
- LRU cache vs Map performance
|
||||
- Regex compilation savings
|
||||
- Hit rate analysis
|
||||
|
||||
### Pattern Matching
|
||||
- Regex performance on large texts
|
||||
- Compiled vs non-compiled patterns
|
||||
- Complex pattern performance
|
||||
|
||||
### Extraction Speed
|
||||
- URL extraction on various text sizes
|
||||
- Code block extraction performance
|
||||
- Number extraction optimization
|
||||
|
||||
### Sanitization Speed
|
||||
- ANSI stripping performance
|
||||
- HTML stripping on large documents
|
||||
- Markdown processing speed
|
||||
|
||||
## Test Utilities (`tests/utils/`)
|
||||
|
||||
### Test Data Generators
|
||||
```typescript
|
||||
// Generate test text with known patterns
|
||||
export function generateTextWithUrls(count: number): string
|
||||
export function generateAnsiText(length: number): string
|
||||
export function generateMarkdown(complexity: 'simple' | 'complex'): string
|
||||
```
|
||||
|
||||
### Assertion Helpers
|
||||
```typescript
|
||||
// Custom assertions for complex validations
|
||||
export function assertExtractedUrls(actual: ExtractedUrl[], expected: ExtractedUrl[])
|
||||
export function assertCacheStats(cache: Cache<any>, expected: Partial<CacheStats>)
|
||||
```
|
||||
|
||||
## Test Configuration
|
||||
|
||||
### vitest.config.ts
|
||||
```typescript
|
||||
import { defineConfig } from 'vitest/config';
|
||||
|
||||
export default defineConfig({
|
||||
test: {
|
||||
globals: true,
|
||||
environment: 'node',
|
||||
coverage: {
|
||||
provider: 'v8',
|
||||
reporter: ['text', 'json', 'html'],
|
||||
exclude: ['**/index.ts', '**/*.types.ts', 'tests/**'],
|
||||
thresholds: {
|
||||
statements: 90,
|
||||
branches: 90,
|
||||
functions: 90,
|
||||
lines: 90
|
||||
}
|
||||
},
|
||||
testTimeout: 5000,
|
||||
hookTimeout: 10000
|
||||
}
|
||||
});
|
||||
```
|
||||
|
||||
## CI/CD Test Pipeline
|
||||
|
||||
### Pre-commit
|
||||
- Run affected tests
|
||||
- Type checking
|
||||
- Linting
|
||||
|
||||
### Pull Request
|
||||
- Full test suite
|
||||
- Coverage report
|
||||
- Performance regression check
|
||||
|
||||
### Main Branch
|
||||
- Full test suite
|
||||
- Performance benchmarks
|
||||
- Package build verification
|
||||
|
||||
## Test Execution Commands
|
||||
|
||||
```bash
|
||||
# Run all tests
|
||||
npm test
|
||||
|
||||
# Run with coverage
|
||||
npm run test:coverage
|
||||
|
||||
# Run specific module tests
|
||||
npm test -- cache
|
||||
npm test -- extractors
|
||||
|
||||
# Run benchmarks
|
||||
npm run test:bench
|
||||
|
||||
# Watch mode for development
|
||||
npm run test:watch
|
||||
```
|
||||
|
||||
## Success Criteria
|
||||
|
||||
1. **Coverage**: Minimum 90% code coverage
|
||||
2. **Performance**: No regression in benchmarks
|
||||
3. **Reliability**: All tests pass consistently
|
||||
4. **Isolation**: Each test is independent
|
||||
5. **Speed**: Full suite runs in < 30 seconds
|
||||
6. **Documentation**: Each test clearly documents what it verifies
|
||||
Loading…
Add table
Reference in a new issue