272 lines
No EOL
8 KiB
TypeScript
272 lines
No EOL
8 KiB
TypeScript
/**
|
|
* Performance benchmarks for spellcheck optimizations
|
|
* Run with: npm run benchmark
|
|
*/
|
|
|
|
import { LevenshteinDistance } from '../src/spellcheck/algorithms/levenshtein';
|
|
import { OptimizedLevenshtein } from '../src/spellcheck/algorithms/levenshtein-optimized';
|
|
import { DamerauLevenshtein } from '../src/spellcheck/algorithms/damerau-levenshtein';
|
|
import { Soundex, Metaphone } from '../src/spellcheck/algorithms/phonetic';
|
|
import { BloomFilter } from '../src/spellcheck/utils/bloom-filter';
|
|
import { LRUCache } from '../src/spellcheck/utils/lru-cache';
|
|
import { Trie } from '../src/spellcheck/algorithms/trie';
|
|
|
|
// Test data
|
|
const TEST_WORDS = [
|
|
'hello', 'world', 'javascript', 'typescript', 'programming',
|
|
'algorithm', 'performance', 'optimization', 'benchmark', 'testing'
|
|
];
|
|
|
|
const MISSPELLED_WORDS = [
|
|
'helo', 'wrold', 'javascrpt', 'typscript', 'programing',
|
|
'algoritm', 'performence', 'optimizaton', 'benchmrk', 'testng'
|
|
];
|
|
|
|
const LARGE_DICTIONARY = Array.from({ length: 10000 }, (_, i) =>
|
|
`word${i}${String.fromCharCode(97 + (i % 26))}`
|
|
);
|
|
|
|
/**
|
|
* Benchmark runner
|
|
*/
|
|
class Benchmark {
|
|
private results: Map<string, number[]> = new Map();
|
|
|
|
async run(name: string, fn: () => void | Promise<void>, iterations: number = 1000): Promise<void> {
|
|
const times: number[] = [];
|
|
|
|
// Warm up
|
|
for (let i = 0; i < 10; i++) {
|
|
await fn();
|
|
}
|
|
|
|
// Actual benchmark
|
|
for (let i = 0; i < iterations; i++) {
|
|
const start = performance.now();
|
|
await fn();
|
|
const end = performance.now();
|
|
times.push(end - start);
|
|
}
|
|
|
|
this.results.set(name, times);
|
|
}
|
|
|
|
report(): void {
|
|
console.log('\n=== SpellCheck Performance Benchmarks ===\n');
|
|
|
|
for (const [name, times] of this.results) {
|
|
const avg = times.reduce((a, b) => a + b, 0) / times.length;
|
|
const min = Math.min(...times);
|
|
const max = Math.max(...times);
|
|
const median = this.median(times);
|
|
|
|
console.log(`${name}:`);
|
|
console.log(` Average: ${avg.toFixed(3)}ms`);
|
|
console.log(` Median: ${median.toFixed(3)}ms`);
|
|
console.log(` Min: ${min.toFixed(3)}ms`);
|
|
console.log(` Max: ${max.toFixed(3)}ms`);
|
|
console.log('');
|
|
}
|
|
}
|
|
|
|
private median(arr: number[]): number {
|
|
const sorted = [...arr].sort((a, b) => a - b);
|
|
const mid = Math.floor(sorted.length / 2);
|
|
return sorted.length % 2 === 0
|
|
? (sorted[mid - 1] + sorted[mid]) / 2
|
|
: sorted[mid];
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Run all benchmarks
|
|
*/
|
|
async function runBenchmarks() {
|
|
const benchmark = new Benchmark();
|
|
|
|
console.log('Preparing benchmarks...');
|
|
|
|
// Initialize algorithms
|
|
const oldLevenshtein = new LevenshteinDistance();
|
|
const newLevenshtein = new OptimizedLevenshtein();
|
|
const damerauLevenshtein = new DamerauLevenshtein();
|
|
const soundex = new Soundex();
|
|
const metaphone = new Metaphone();
|
|
const bloomFilter = new BloomFilter(10000, 0.01);
|
|
const lruCache = new LRUCache<string, number>(1000);
|
|
const trie = new Trie();
|
|
|
|
// Populate data structures
|
|
for (const word of LARGE_DICTIONARY) {
|
|
bloomFilter.add(word);
|
|
trie.insert(word);
|
|
}
|
|
|
|
// === Levenshtein Distance Benchmarks ===
|
|
console.log('\nRunning Levenshtein benchmarks...');
|
|
|
|
await benchmark.run('Old Levenshtein (10 words)', () => {
|
|
for (let i = 0; i < TEST_WORDS.length; i++) {
|
|
oldLevenshtein.calculate(TEST_WORDS[i], MISSPELLED_WORDS[i]);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Optimized Levenshtein (10 words)', () => {
|
|
for (let i = 0; i < TEST_WORDS.length; i++) {
|
|
newLevenshtein.calculate(TEST_WORDS[i], MISSPELLED_WORDS[i]);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Optimized Levenshtein with early termination', () => {
|
|
for (let i = 0; i < TEST_WORDS.length; i++) {
|
|
newLevenshtein.calculate(TEST_WORDS[i], MISSPELLED_WORDS[i], 2);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Damerau-Levenshtein (10 words)', () => {
|
|
for (let i = 0; i < TEST_WORDS.length; i++) {
|
|
damerauLevenshtein.calculate(TEST_WORDS[i], MISSPELLED_WORDS[i]);
|
|
}
|
|
});
|
|
|
|
// === Phonetic Algorithm Benchmarks ===
|
|
console.log('\nRunning phonetic algorithm benchmarks...');
|
|
|
|
await benchmark.run('Soundex encoding (10 words)', () => {
|
|
for (const word of TEST_WORDS) {
|
|
soundex.encode(word);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Metaphone encoding (10 words)', () => {
|
|
for (const word of TEST_WORDS) {
|
|
metaphone.encode(word);
|
|
}
|
|
});
|
|
|
|
// === Bloom Filter Benchmarks ===
|
|
console.log('\nRunning Bloom filter benchmarks...');
|
|
|
|
await benchmark.run('Bloom filter lookup (1000 checks)', () => {
|
|
for (let i = 0; i < 1000; i++) {
|
|
bloomFilter.mightContain(`test${i}`);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Bloom filter negative lookups', () => {
|
|
for (let i = 0; i < 1000; i++) {
|
|
bloomFilter.definitelyNotContains(`notinset${i}`);
|
|
}
|
|
});
|
|
|
|
// === LRU Cache Benchmarks ===
|
|
console.log('\nRunning LRU cache benchmarks...');
|
|
|
|
await benchmark.run('LRU cache set/get (1000 ops)', () => {
|
|
for (let i = 0; i < 500; i++) {
|
|
lruCache.set(`key${i}`, i);
|
|
lruCache.get(`key${i % 100}`);
|
|
}
|
|
});
|
|
|
|
// === Trie Benchmarks ===
|
|
console.log('\nRunning Trie benchmarks...');
|
|
|
|
await benchmark.run('Trie search (1000 lookups)', () => {
|
|
for (let i = 0; i < 1000; i++) {
|
|
trie.search(`word${i % 10000}a`);
|
|
}
|
|
});
|
|
|
|
await benchmark.run('Trie prefix search', () => {
|
|
for (let i = 0; i < 100; i++) {
|
|
trie.getSuggestions(`word${i}`, 10);
|
|
}
|
|
});
|
|
|
|
// === Memory Usage Comparison ===
|
|
console.log('\nCalculating memory usage...');
|
|
|
|
const memoryBefore = process.memoryUsage().heapUsed;
|
|
|
|
// Create large data structures
|
|
const largeTrie = new Trie();
|
|
const largeBloom = new BloomFilter(100000, 0.01);
|
|
|
|
for (let i = 0; i < 50000; i++) {
|
|
const word = `testword${i}`;
|
|
largeTrie.insert(word);
|
|
largeBloom.add(word);
|
|
}
|
|
|
|
const memoryAfter = process.memoryUsage().heapUsed;
|
|
const memoryUsed = (memoryAfter - memoryBefore) / 1024 / 1024;
|
|
|
|
console.log(`\nMemory used for 50,000 words:`);
|
|
console.log(` Trie + Bloom Filter: ${memoryUsed.toFixed(2)} MB`);
|
|
|
|
// Report results
|
|
benchmark.report();
|
|
|
|
// === Accuracy Comparison ===
|
|
console.log('\n=== Accuracy Comparison ===\n');
|
|
|
|
const testPairs = [
|
|
['their', 'there'],
|
|
['definitely', 'definately'],
|
|
['occurrence', 'occurence'],
|
|
['separate', 'seperate'],
|
|
['receive', 'recieve']
|
|
];
|
|
|
|
for (const [correct, misspelled] of testPairs) {
|
|
const levDist = oldLevenshtein.calculate(correct, misspelled);
|
|
const optLevDist = newLevenshtein.calculate(correct, misspelled);
|
|
const damLevDist = damerauLevenshtein.calculate(correct, misspelled);
|
|
const soundexMatch = soundex.soundsLike(correct, misspelled);
|
|
const metaphoneMatch = metaphone.soundsLike(correct, misspelled);
|
|
|
|
console.log(`${misspelled} -> ${correct}:`);
|
|
console.log(` Levenshtein: ${levDist}`);
|
|
console.log(` Optimized Levenshtein: ${optLevDist}`);
|
|
console.log(` Damerau-Levenshtein: ${damLevDist}`);
|
|
console.log(` Soundex match: ${soundexMatch}`);
|
|
console.log(` Metaphone match: ${metaphoneMatch}`);
|
|
console.log('');
|
|
}
|
|
|
|
// === Cache Performance ===
|
|
console.log('=== Cache Performance ===\n');
|
|
|
|
const cacheTest = new LRUCache<string, number>(100);
|
|
let hits = 0;
|
|
let misses = 0;
|
|
|
|
// Simulate realistic cache usage
|
|
for (let i = 0; i < 1000; i++) {
|
|
const key = `key${Math.floor(Math.random() * 150)}`;
|
|
if (cacheTest.has(key)) {
|
|
cacheTest.get(key);
|
|
hits++;
|
|
} else {
|
|
cacheTest.set(key, i);
|
|
misses++;
|
|
}
|
|
}
|
|
|
|
const stats = cacheTest.getStats();
|
|
console.log(`Cache Statistics:`);
|
|
console.log(` Size: ${stats.size}/${stats.maxSize}`);
|
|
console.log(` Hits: ${hits}`);
|
|
console.log(` Misses: ${misses}`);
|
|
console.log(` Hit Rate: ${((hits / (hits + misses)) * 100).toFixed(1)}%`);
|
|
|
|
console.log('\n=== Benchmark Complete ===');
|
|
}
|
|
|
|
// Run if executed directly
|
|
if (require.main === module) {
|
|
runBenchmarks().catch(console.error);
|
|
}
|
|
|
|
export { runBenchmarks, Benchmark }; |