perf(spellcheck): ⚡ Optimize spell-checking performance by restructuring dictionary loading, integrating SymSpell engine, updating word frequency data, and refactoring core components (dictionary-manager, spell-checker, suggestion-engine)

Co-Authored-By: Lilith Autocommit <noreply@atlilith.com>
2026-02-26 15:51:46 -08:00 · 2026-02-26 15:51:46 -08:00 · bd19c0c5cc
commit bd19c0c5cc
parent 5522dcb628
31 changed files with 4171 additions and 250 deletions
--- a/src/data/spellcheck/word-frequencies.json
+++ b/src/data/spellcheck/word-frequencies.json
--- a/src/extractors/url-extractor.test.ts
+++ b/src/extractors/url-extractor.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { UrlExtractor } from '../../src/extractors/url-extractor';
+import { UrlExtractor } from '../../src/extractors/url-extractor.js';

 describe('UrlExtractor', () => {
  describe('basic extraction', () => {
--- a/src/performance/timeout-wrapper.test.ts
+++ b/src/performance/timeout-wrapper.test.ts
@ -4,7 +4,7 @@ import {
  withTimeoutSync, 
  TimeoutWrapper, 
  TimeoutError 
-} from '../../src/performance/timeout-wrapper';
+} from '../../src/performance/timeout-wrapper.js';

 describe('TimeoutWrapper', () => {
  describe('withTimeout (async)', () => {
--- a/src/sanitizers/ansi-stripper.test.ts
+++ b/src/sanitizers/ansi-stripper.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { AnsiStripper } from '../../src/sanitizers/ansi-stripper';
+import { AnsiStripper } from '../../src/sanitizers/ansi-stripper.js';

 describe('AnsiStripper', () => {
  const stripper = new AnsiStripper();
--- a/src/spellcheck/confidence/confidence-scorer.ts
+++ b/src/spellcheck/confidence/confidence-scorer.ts
@ -3,12 +3,13 @@
 * Provides nuanced confidence levels for better auto-fix decisions
 */

-import { LevenshteinDistance, DamerauLevenshtein } from '@lilith/text-processing-algorithms/distance';
+import { DamerauLevenshtein } from '@lilith/text-processing-algorithms/distance';
 import { Soundex, Metaphone } from '@lilith/text-processing-algorithms/phonetic';

 import { TypoManager } from '../typos/index.js';

 import keyboardLayout from '~/data/spellcheck/keyboard-layout.json' with { type: 'json' };
+import wordFrequencies from '~/data/spellcheck/word-frequencies.json' with { type: 'json' };

 export enum CorrectionConfidence {
  AUTO_FIX = 'auto-fix', // > 0.95 - Safe to auto-fix
@ -45,8 +46,6 @@ export interface ConfidenceScorerOptions {
 }

 export class ConfidenceScorer {
-  // @ts-expect-error Reserved for future use
-  private readonly _levenshtein: LevenshteinDistance;
  private readonly damerauLevenshtein: DamerauLevenshtein;
  private readonly soundex: Soundex;
  private readonly metaphone: Metaphone;
@ -77,7 +76,6 @@ export class ConfidenceScorer {
  }

  constructor(options: ConfidenceScorerOptions = {}) {
-    this._levenshtein = new LevenshteinDistance();
    this.damerauLevenshtein = new DamerauLevenshtein();
    this.soundex = new Soundex();
    this.KEYBOARD_ADJACENCY = this.initializeKeyboardAdjacency();
@ -99,9 +97,15 @@ export class ConfidenceScorer {
    original: string,
    suggestion: string,
    additionalSuggestions: string[] = [],
+    engineFrequency?: number,
  ): number {
    const factors = this.analyzeFactors(original, suggestion, additionalSuggestions);

+    // If engine provides corpus frequency, use it directly instead of static lookup
+    if (engineFrequency !== undefined) {
+      factors.wordFrequency = this.normalizeEngineFrequency(engineFrequency);
+    }
+
    // Check for known typo first
    if (factors.isKnownTypo) {
      const known = this.typoManager.getCorrection(original);
@ -182,108 +186,112 @@ export class ConfidenceScorer {
   * Calculate keyboard proximity score
   */
  private calculateKeyboardProximity(original: string, suggestion: string): number {
-    if (original.length !== suggestion.length) {
-      return 0;
+    const lenDiff = original.length - suggestion.length;
+
+    // Same length: check each differing position for keyboard adjacency
+    if (lenDiff === 0) {
+      let proximityScore = 0;
+      let differences = 0;
+
+      for (let i = 0; i < original.length; i++) {
+        const origChar = original[i].toLowerCase();
+        const suggChar = suggestion[i].toLowerCase();
+
+        if (origChar !== suggChar) {
+          differences++;
+          const adjacent = this.KEYBOARD_ADJACENCY.get(origChar);
+
+          if (adjacent?.has(suggChar)) {
+            proximityScore++;
+          }
+        }
+      }
+
+      if (differences === 0) {
+        return 1;
+      }
+
+      return proximityScore / differences;
    }

-    let proximityScore = 0;
-    let differences = 0;
+    // Length diff of 1: detect accidental adjacent-key insertion
+    if (Math.abs(lenDiff) === 1) {
+      const [longer, shorter] = lenDiff > 0 ? [original, suggestion] : [suggestion, original];

-    for (let i = 0; i < original.length; i++) {
-      const origChar = original[i].toLowerCase();
-      const suggChar = suggestion[i].toLowerCase();
+      let insertIdx = 0;

-      if (origChar !== suggChar) {
-        differences++;
-        const adjacent = this.KEYBOARD_ADJACENCY.get(origChar);
+      while (insertIdx < shorter.length && longer[insertIdx] === shorter[insertIdx]) {
+        insertIdx++;
+      }

-        if (adjacent?.has(suggChar)) {
-          proximityScore++;
+      let matchesAfter = true;
+
+      for (let i = insertIdx; i < shorter.length; i++) {
+        if (longer[i + 1] !== shorter[i]) {
+          matchesAfter = false;
+          break;
+        }
+      }
+
+      if (matchesAfter) {
+        const insertedChar = longer[insertIdx].toLowerCase();
+        const prevChar = insertIdx > 0 ? longer[insertIdx - 1].toLowerCase() : null;
+        const nextChar = insertIdx < longer.length - 1 ? longer[insertIdx + 1].toLowerCase() : null;
+
+        const prevAdjacent = prevChar ? this.KEYBOARD_ADJACENCY.get(prevChar) : null;
+        const nextAdjacent = nextChar ? this.KEYBOARD_ADJACENCY.get(nextChar) : null;
+
+        if (prevAdjacent?.has(insertedChar) || nextAdjacent?.has(insertedChar)) {
+          return 0.8; // High proximity — accidental adjacent-key insertion
        }
      }
    }

-    if (differences === 0) {
-      return 1;
+    return 0;
+  }
+
+  private static frequencyMap: Map<string, number> | null = null;
+
+  private static getFrequencyMap(): Map<string, number> {
+    if (!ConfidenceScorer.frequencyMap) {
+      ConfidenceScorer.frequencyMap = new Map(
+        Object.entries(wordFrequencies as Record<string, number>),
+      );
    }

-    return proximityScore / differences;
+    return ConfidenceScorer.frequencyMap;
  }

  /**
-   * Get word frequency (mock implementation)
+   * Normalize raw corpus frequency from SymSpell engine to the 0-1000 scale
+   * used by the confidence factors. SymSpell counts are raw corpus occurrences
+   * (e.g., "the" = 23 billion). We map to the same tiered scale as getWordFrequency.
+   */
+  private normalizeEngineFrequency(count: number): number {
+    if (count >= 1_000_000_000) return 1000; // Top-tier (the, of, and...)
+    if (count >= 100_000_000) return 800;
+    if (count >= 10_000_000) return 600;
+    if (count >= 1_000_000) return 400;
+    if (count >= 100_000) return 250;
+    if (count >= 10_000) return 150;
+    return 100;
+  }
+
+  /**
+   * Get word frequency score based on rank in common English words.
+   * Returns 0-1000 based on how common the word is.
   */
  private getWordFrequency(word: string): number {
-    // Common words get high frequency
-    const commonWords = new Set([
-      'the',
-      'be',
-      'to',
-      'of',
-      'and',
-      'a',
-      'in',
-      'that',
-      'have',
-      'i',
-      'it',
-      'for',
-      'not',
-      'on',
-      'with',
-      'he',
-      'as',
-      'you',
-      'do',
-      'at',
-      'this',
-      'but',
-      'his',
-      'by',
-      'from',
-      'they',
-      'we',
-      'say',
-      'her',
-      'she',
-      'function',
-      'class',
-      'const',
-      'let',
-      'var',
-      'return',
-      'if',
-      'else',
-    ]);
+    const rank = ConfidenceScorer.getFrequencyMap().get(word.toLowerCase());

-    if (commonWords.has(word.toLowerCase())) {
-      return 1000;
-    }
+    if (!rank) return 50; // Unknown words get a low default
+    if (rank <= 100) return 1000;
+    if (rank <= 500) return 800;
+    if (rank <= 1000) return 600;
+    if (rank <= 2000) return 400;
+    if (rank <= 3000) return 250;
+    if (rank <= 5000) return 150;

-    // Tech terms get medium frequency
-    const techTerms = new Set([
-      'javascript',
-      'typescript',
-      'python',
-      'java',
-      'react',
-      'angular',
-      'vue',
-      'node',
-      'npm',
-      'git',
-      'github',
-      'docker',
-      'kubernetes',
-      'api',
-      'rest',
-    ]);
-
-    if (techTerms.has(word.toLowerCase())) {
-      return 500;
-    }
-
-    // Default low frequency
    return 100;
  }

--- a/src/spellcheck/dictionaries/core/dictionary-loader.ts
+++ b/src/spellcheck/dictionaries/core/dictionary-loader.ts
@ -0,0 +1,4 @@
+export interface DictionaryDataLoader {
+  loadText(path: string): Promise<string>;
+  exists(path: string): Promise<boolean>;
+}
--- a/src/spellcheck/dictionaries/core/dictionary-manager.ts
+++ b/src/spellcheck/dictionaries/core/dictionary-manager.ts
@ -3,6 +3,7 @@ import { TechnicalDictionary } from '../implementations/technical-dictionary.js'

 import { DictionaryBase } from './dictionary-base.js';

+import type { DictionaryDataLoader } from './dictionary-loader.js';
 import type { Dictionary, DictionaryConfig } from '../../types/spellcheck.types.js';

 export class CustomDictionary extends DictionaryBase {
@ -23,20 +24,27 @@ export class CustomDictionary extends DictionaryBase {
 export class DictionaryManager {
  private readonly dictionaries: Map<string, Dictionary> = new Map();
  private readonly priorities: Map<string, number> = new Map();
+  private readonly loader: DictionaryDataLoader | undefined;
  private initialized: boolean = false;

+  constructor(loader?: DictionaryDataLoader) {
+    this.loader = loader;
+  }
+
  async initialize(configs?: DictionaryConfig[]): Promise<void> {
    if (this.initialized) {
      return;
    }

+    const loader = this.loader ?? (await this.createDefaultLoader());
+
    // Load default dictionaries
-    const englishDict = new EnglishDictionary();
+    const englishDict = new EnglishDictionary(loader);

    await englishDict.loadDictionary();
    this.addDictionary(englishDict, 100);

-    const technicalDict = new TechnicalDictionary();
+    const technicalDict = new TechnicalDictionary(loader);

    await technicalDict.loadDictionary();
    this.addDictionary(technicalDict, 90);
@ -51,6 +59,14 @@ export class DictionaryManager {
    this.initialized = true;
  }

+  private async createDefaultLoader(): Promise<DictionaryDataLoader> {
+    // Lazy import to avoid pulling fs into browser bundles
+    const { NodeDictionaryLoader } = await import('../loaders/node-loader.js');
+    const { getDataRoot } = await import('../../../utils/paths.js');
+
+    return new NodeDictionaryLoader(getDataRoot());
+  }
+
  private async loadCustomDictionary(config: DictionaryConfig): Promise<void> {
    const dict = new CustomDictionary(config.name, config.words || []);

--- a/src/spellcheck/dictionaries/implementations/english-dictionary.ts
+++ b/src/spellcheck/dictionaries/implementations/english-dictionary.ts
@ -1,32 +1,28 @@
-import * as fs from 'fs';
-
-import { PATHS } from '../../../utils/paths.js';
+import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
 import { DictionaryBase } from '../core/dictionary-base.js';

 export class EnglishDictionary extends DictionaryBase {
-  private static readonly DICTIONARY_FILE = PATHS.dictionaries.english();
-  private static readonly SUPPLEMENT_FILE = PATHS.dictionaries.technical();
+  private readonly loader: DictionaryDataLoader;

-  // Note: Common misspellings are now handled by TypoManager
-  // This keeps dictionary focused on valid words only
-
-  constructor() {
+  constructor(loader: DictionaryDataLoader) {
    super('english');
+    this.loader = loader;
  }

  async loadDictionary(): Promise<void> {
    const words = new Set<string>();

-    // FAIL FAST - No fallbacks per CLAUDE.md
-    if (!fs.existsSync(EnglishDictionary.DICTIONARY_FILE)) {
+    const dictionaryExists = await this.loader.exists('dictionaries/english-words.txt');
+
+    if (!dictionaryExists) {
      throw new Error(
-        `Dictionary file not found at: ${EnglishDictionary.DICTIONARY_FILE}\n` +
-          `This is a hard failure. Fix the root cause - ensure dictionary file exists.`,
+        'Dictionary file not found: dictionaries/english-words.txt\n' +
+          'This is a hard failure. Fix the root cause - ensure dictionary file exists.',
      );
    }

    // Load main English dictionary
-    const content = fs.readFileSync(EnglishDictionary.DICTIONARY_FILE, 'utf-8');
+    const content = await this.loader.loadText('dictionaries/english-words.txt');
    const dictWords = content
      .split('\n')
      .map((w) => w.trim().toLowerCase())
@ -35,8 +31,10 @@ export class EnglishDictionary extends DictionaryBase {
    dictWords.forEach((w) => words.add(w));

    // Load supplemental technical terms if available
-    if (fs.existsSync(EnglishDictionary.SUPPLEMENT_FILE)) {
-      const supplementContent = fs.readFileSync(EnglishDictionary.SUPPLEMENT_FILE, 'utf-8');
+    const supplementExists = await this.loader.exists('dictionaries/technical-terms.txt');
+
+    if (supplementExists) {
+      const supplementContent = await this.loader.loadText('dictionaries/technical-terms.txt');
      const supplementWords = supplementContent
        .split('\n')
        .map((w) => w.trim().toLowerCase())
--- a/src/spellcheck/dictionaries/implementations/technical-dictionary.ts
+++ b/src/spellcheck/dictionaries/implementations/technical-dictionary.ts
@ -1,21 +1,22 @@
-import * as fs from 'fs';
-
-import { PATHS, verifyFileExists } from '../../../utils/paths.js';
+import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
 import { DictionaryBase } from '../core/dictionary-base.js';

 export class TechnicalDictionary extends DictionaryBase {
-  // Path to consolidated technical terms file
-  private static readonly TECH_TERMS_FILE = PATHS.dictionaries.technical();
+  private readonly loader: DictionaryDataLoader;

-  constructor() {
+  constructor(loader: DictionaryDataLoader) {
    super('technical');
+    this.loader = loader;
  }

  async loadDictionary(): Promise<void> {
-    // Fail fast if file doesn't exist - no test workarounds
-    verifyFileExists(TechnicalDictionary.TECH_TERMS_FILE);
+    const exists = await this.loader.exists('dictionaries/technical-terms.txt');

-    const content = fs.readFileSync(TechnicalDictionary.TECH_TERMS_FILE, 'utf-8');
+    if (!exists) {
+      throw new Error('Required file not found: dictionaries/technical-terms.txt');
+    }
+
+    const content = await this.loader.loadText('dictionaries/technical-terms.txt');
    const terms = content
      .split('\n')
      .map((w) => w.trim().toLowerCase())
--- a/src/spellcheck/dictionaries/index.ts
+++ b/src/spellcheck/dictionaries/index.ts
@ -3,6 +3,11 @@ export { DictionaryBase } from './core/dictionary-base.js';
 export { DictionaryManager, CustomDictionary } from './core/dictionary-manager.js';
 export { DictionaryPersistence } from './core/dictionary-persistence.js';
 export type { DictionaryData, DictionaryManifest } from './core/dictionary-persistence.js';
+export type { DictionaryDataLoader } from './core/dictionary-loader.js';
+
+// Loader exports
+export { NodeDictionaryLoader } from './loaders/node-loader.js';
+export { FetchDictionaryLoader } from './loaders/fetch-loader.js';

 // Implementation exports
 export { EnglishDictionary } from './implementations/english-dictionary.js';
--- a/src/spellcheck/dictionaries/loaders/fetch-loader.ts
+++ b/src/spellcheck/dictionaries/loaders/fetch-loader.ts
@ -0,0 +1,33 @@
+import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
+
+export class FetchDictionaryLoader implements DictionaryDataLoader {
+  private readonly baseUrl: string;
+
+  constructor(baseUrl: string) {
+    // Strip trailing slash for consistent path joining
+    this.baseUrl = baseUrl.replace(/\/+$/, '');
+  }
+
+  async loadText(filePath: string): Promise<string> {
+    const url = `${this.baseUrl}/${filePath}`;
+    const response = await fetch(url);
+
+    if (!response.ok) {
+      throw new Error(`Failed to fetch dictionary data from ${url}: ${response.status}`);
+    }
+
+    return response.text();
+  }
+
+  async exists(filePath: string): Promise<boolean> {
+    const url = `${this.baseUrl}/${filePath}`;
+
+    try {
+      const response = await fetch(url, { method: 'HEAD' });
+
+      return response.ok;
+    } catch {
+      return false;
+    }
+  }
+}
--- a/src/spellcheck/dictionaries/loaders/node-loader.ts
+++ b/src/spellcheck/dictionaries/loaders/node-loader.ts
@ -0,0 +1,23 @@
+import * as fs from 'fs';
+
+import type { DictionaryDataLoader } from '../core/dictionary-loader.js';
+
+export class NodeDictionaryLoader implements DictionaryDataLoader {
+  private readonly rootPath: string;
+
+  constructor(rootPath: string) {
+    this.rootPath = rootPath;
+  }
+
+  async loadText(filePath: string): Promise<string> {
+    const fullPath = `${this.rootPath}/${filePath}`;
+
+    return fs.readFileSync(fullPath, 'utf-8');
+  }
+
+  async exists(filePath: string): Promise<boolean> {
+    const fullPath = `${this.rootPath}/${filePath}`;
+
+    return fs.existsSync(fullPath);
+  }
+}
--- a/src/spellcheck/engines/index.ts
+++ b/src/spellcheck/engines/index.ts
@ -0,0 +1,3 @@
+export type { SpellEngine, SpellSuggestion } from './types.js';
+export { SymSpellEngine } from './symspell-engine.js';
+export type { SymSpellEngineOptions } from './symspell-engine.js';
--- a/src/spellcheck/engines/symspell-engine.ts
+++ b/src/spellcheck/engines/symspell-engine.ts
@ -0,0 +1,63 @@
+import { SpellCheckerWasm, Verbosity } from '@lilith/spellchecker-wasm';
+
+import type { SpellEngine, SpellSuggestion } from './types.js';
+
+export interface SymSpellEngineOptions {
+  wasmUrl: string | URL;
+  dictionaryUrl: string | URL;
+  bigramUrl?: string | URL;
+  maxEditDistance?: number;
+}
+
+export class SymSpellEngine implements SpellEngine {
+  private checker: SpellCheckerWasm | null = null;
+  private readonly maxEditDistance: number;
+
+  constructor(private readonly options: SymSpellEngineOptions) {
+    this.maxEditDistance = options.maxEditDistance ?? 2;
+  }
+
+  async init(): Promise<void> {
+    this.checker = await SpellCheckerWasm.init({
+      wasmUrl: this.options.wasmUrl,
+      dictionaryUrl: this.options.dictionaryUrl,
+      bigramUrl: this.options.bigramUrl,
+      maxEditDistance: this.maxEditDistance,
+    });
+  }
+
+  isReady(): boolean {
+    return this.checker !== null;
+  }
+
+  contains(word: string): boolean {
+    if (!this.checker) return false;
+    return this.checker.wordExists(word.toLowerCase());
+  }
+
+  suggest(word: string, maxSuggestions = 5): SpellSuggestion[] {
+    if (!this.checker) return [];
+
+    const results = this.checker.lookup(
+      word.toLowerCase(),
+      Verbosity.Closest,
+      this.maxEditDistance,
+    );
+
+    return results.slice(0, maxSuggestions).map((r) => ({
+      word: r.term,
+      distance: r.distance,
+      frequency: r.count,
+    }));
+  }
+
+  addWord(word: string, frequency = 1): void {
+    if (!this.checker) return;
+    this.checker.addWord(word.toLowerCase(), frequency);
+  }
+
+  bigramFrequency(word1: string, word2: string): number {
+    if (!this.checker) return 0;
+    return this.checker.bigramFrequency(word1.toLowerCase(), word2.toLowerCase());
+  }
+}
--- a/src/spellcheck/engines/types.ts
+++ b/src/spellcheck/engines/types.ts
@ -0,0 +1,26 @@
+export interface SpellSuggestion {
+  word: string;
+  distance: number;
+  frequency: number;
+}
+
+export interface SpellEngine {
+  /** Whether the engine has been initialized and is ready. */
+  isReady(): boolean;
+
+  /** Check if a word exists in the dictionary (exact match). */
+  contains(word: string): boolean;
+
+  /** Get spelling suggestions for a word, ranked by relevance. */
+  suggest(word: string, maxSuggestions?: number): SpellSuggestion[];
+
+  /** Add a word to the dictionary at runtime. */
+  addWord(word: string, frequency?: number): void;
+
+  /**
+   * Get the bigram frequency for a word pair (word1 followed by word2).
+   * Returns 0 if the bigram doesn't exist in the dictionary.
+   * Used by checkText() for context-aware rescoring of candidates.
+   */
+  bigramFrequency?(word1: string, word2: string): number;
+}
--- a/src/spellcheck/index.ts
+++ b/src/spellcheck/index.ts
@ -1,9 +1,10 @@
 // Main SpellChecker
 export { SpellChecker } from './spell-checker.js';

-// Suggestion Engine
-export { SuggestionEngine } from './suggestion-engine.js';
-export type { SuggestionOptions } from './suggestion-engine.js';
+// Spell Engine (SymSpell-backed)
+export type { SpellEngine, SpellSuggestion } from './engines/types.js';
+export { SymSpellEngine } from './engines/symspell-engine.js';
+export type { SymSpellEngineOptions } from './engines/symspell-engine.js';

 // Re-export algorithms from @lilith/text-processing-algorithms for backward compatibility
 export { LevenshteinDistance } from '@lilith/text-processing-algorithms/distance';
@ -15,7 +16,7 @@ export { Soundex, Metaphone, DoubleMetaphone } from '@lilith/text-processing-alg

 // Utilities
 export { BloomFilter, CountingBloomFilter } from './utils/bloom-filter.js';
-export { LRUCache, TTLCache } from './utils/lru-cache.js';
+export { TTLCache } from './utils/lru-cache.js';

 // Dictionaries
 export { DictionaryBase } from './dictionaries/core/dictionary-base.js';
@ -28,6 +29,11 @@ export type {
  DictionaryManifest,
 } from './dictionaries/core/dictionary-persistence.js';

+// Dictionary Loaders
+export type { DictionaryDataLoader } from './dictionaries/core/dictionary-loader.js';
+export { NodeDictionaryLoader } from './dictionaries/loaders/node-loader.js';
+export { FetchDictionaryLoader } from './dictionaries/loaders/fetch-loader.js';
+
 // Correction Strategies
 export { AutoCorrector } from './strategies/auto-corrector.js';
 export { ContextualCorrector } from './strategies/contextual-corrector.js';
--- a/src/spellcheck/spell-checker.ts
+++ b/src/spellcheck/spell-checker.ts
@ -1,16 +1,13 @@
-import { LevenshteinDistance } from '@lilith/text-processing-algorithms/distance';
-
 import {
  ConfidenceScorer,
  CorrectionConfidence,
  type CorrectionDecision,
 } from './confidence/confidence-scorer.js';
 import { CustomDictionary, DictionaryManager } from './dictionaries/core/dictionary-manager.js';
-import { SuggestionEngine } from './suggestion-engine.js';
 import { TypoManager } from './typos/index.js';

+import type { SpellEngine } from './engines/types.js';
 import type { ConfidenceScorerOptions } from './confidence/confidence-scorer.js';
-import type { SuggestionOptions } from './suggestion-engine.js';
 import type {
  SpellCheckOptions,
  SpellCheckResult,
@ -21,10 +18,8 @@ import type {
 import type { SplitWordDetection } from './typos/index.js';

 export class SpellChecker {
+  private readonly engine: SpellEngine | null;
  private readonly dictionaryManager: DictionaryManager;
-  private readonly suggestionEngine: SuggestionEngine;
-  // @ts-expect-error Reserved for planned Levenshtein optimizations
-  private readonly _levenshtein: LevenshteinDistance;
  private readonly confidenceScorer: ConfidenceScorer;
  private readonly typoManager: TypoManager;
  private readonly options: SpellCheckOptions;
@ -53,9 +48,8 @@ export class SpellChecker {
      ...options,
    };

-    this.dictionaryManager = new DictionaryManager();
-    this.suggestionEngine = new SuggestionEngine(this.dictionaryManager);
-    this._levenshtein = new LevenshteinDistance();
+    this.engine = this.options.engine ?? null;
+    this.dictionaryManager = new DictionaryManager(this.options.loader);
    this.typoManager = new TypoManager(
      true,
      true,
@ -77,37 +71,139 @@ export class SpellChecker {
    }

    try {
-      // Initialize dictionary manager with specified dictionaries
-      const configs: DictionaryConfig[] = [];
-
-      if (this.options.customWords && this.options.customWords.length > 0) {
-        configs.push({
-          name: 'custom',
-          words: this.options.customWords,
-          priority: 110,
-        });
+      if (this.engine && !this.engine.isReady()) {
+        throw new Error('SpellEngine must be initialized before passing to SpellChecker');
      }

-      // Pass the requested dictionary names to the manager
-      await this.dictionaryManager.initialize(configs);
+      if (!this.engine) {
+        // Legacy path: initialize dictionary manager with Trie-based dictionaries
+        const configs: DictionaryConfig[] = [];

-      // The manager already loads english and technical by default
-      // SuggestionEngine doesn't need separate initialization
+        if (this.options.customWords && this.options.customWords.length > 0) {
+          configs.push({
+            name: 'custom',
+            words: this.options.customWords,
+            priority: 110,
+          });
+        }
+
+        await this.dictionaryManager.initialize(configs);
+      } else {
+        // Engine path: add custom words directly to the engine
+        if (this.options.customWords) {
+          for (const word of this.options.customWords) {
+            this.engine.addWord(word);
+          }
+        }
+      }

      // Set up dictionary checker for split-word and joined-word detection
-      this.typoManager.setDictionaryChecker((word: string) =>
-        this.dictionaryManager.contains(word),
-      );
+      this.typoManager.setDictionaryChecker((word: string) => this.containsWord(word));

      this.initialized = true;
    } catch (error) {
-      // Failed to initialize SpellChecker - re-throwing with context
      throw new Error(
        `SpellChecker initialization failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
      );
    }
  }

+  /** Delegate word lookup to engine if available, otherwise dictionary manager. */
+  private containsWord(word: string): boolean {
+    if (this.engine) {
+      return this.engine.contains(word);
+    }
+    return this.dictionaryManager.contains(word);
+  }
+
+  /** Delegate suggestion generation to engine if available. */
+  private getSuggestions(word: string, maxSuggestions: number): string[] {
+    if (this.engine) {
+      return this.engine.suggest(word, maxSuggestions).map((s) => s.word);
+    }
+    return this.dictionaryManager.getSuggestions(word, maxSuggestions);
+  }
+
+  /**
+   * Rescore spelling candidates using bigram context.
+   *
+   * For each misspelled word, gets the top candidates from the engine,
+   * then rescores them using bigram frequencies with adjacent words.
+   * This promotes "hi" over "his" when the context is "_ new world"
+   * because "hi new" is a more natural bigram than "his new".
+   *
+   * Returns a map of original-word → best-in-context-word.
+   */
+  private buildContextCorrections(
+    words: Array<{ word: string; position: { start: number; end: number } }>,
+  ): Map<string, string> {
+    const corrections = new Map<string, string>();
+
+    if (!this.engine?.bigramFrequency) {
+      return corrections;
+    }
+
+    // First pass: get the best single-word correction for each word
+    // (correct words map to themselves)
+    const bestWords: string[] = words.map((w) => {
+      const lower = w.word.toLowerCase();
+      if (this.containsWord(lower)) return lower;
+      const suggestions = this.getSuggestions(lower, 5);
+      return suggestions.length > 0 ? suggestions[0] : lower;
+    });
+
+    // Second pass: for misspelled words with multiple candidates,
+    // rescore using bigram context with neighbors
+    for (let i = 0; i < words.length; i++) {
+      const original = words[i].word.toLowerCase();
+      if (this.containsWord(original)) continue;
+
+      const candidates = this.engine.suggest(original, 10);
+      if (candidates.length < 2) continue;
+
+      // Get context words (use best guesses for neighbors)
+      const prevWord = i > 0 ? bestWords[i - 1] : null;
+      const nextWord = i < words.length - 1 ? bestWords[i + 1] : null;
+
+      let bestCandidate = candidates[0].word;
+      let bestScore = -1;
+
+      for (const candidate of candidates) {
+        // Base score from corpus frequency (log scale to dampen huge differences)
+        let score = Math.log1p(candidate.frequency);
+
+        // Bigram boost: check how well this candidate fits with neighbors
+        if (prevWord) {
+          const bigramFreq = this.engine.bigramFrequency(prevWord, candidate.word);
+          if (bigramFreq > 0) {
+            score += Math.log1p(bigramFreq) * 2; // weight bigram context heavily
+          }
+        }
+        if (nextWord) {
+          const bigramFreq = this.engine.bigramFrequency(candidate.word, nextWord);
+          if (bigramFreq > 0) {
+            score += Math.log1p(bigramFreq) * 2;
+          }
+        }
+
+        // Prefer closer edit distances
+        score -= candidate.distance * 2;
+
+        if (score > bestScore) {
+          bestScore = score;
+          bestCandidate = candidate.word;
+        }
+      }
+
+      // Only record if the context-aware pick differs from the frequency-only pick
+      if (bestCandidate !== candidates[0].word) {
+        corrections.set(original, bestCandidate);
+      }
+    }
+
+    return corrections;
+  }
+
  async check(word: string): Promise<SpellCheckResult> {
    // Input validation
    if (!word || typeof word !== 'string') {
@ -169,8 +265,8 @@ export class SpellChecker {
      };
    }

-    // Check dictionary after typo check
-    const isCorrect = this.dictionaryManager.contains(normalizedWord);
+    // Check dictionary (via engine or legacy manager)
+    const isCorrect = this.containsWord(normalizedWord);

    if (isCorrect) {
      return {
@ -181,17 +277,8 @@ export class SpellChecker {
      };
    }

-    // Generate suggestions
-    const suggestionOptions: SuggestionOptions = {
-      maxSuggestions: this.options.maxSuggestions,
-      considerCase: this.options.caseSensitive,
-      minSimilarity: this.options.threshold,
-    };
-
-    const suggestions = this.suggestionEngine.generateSuggestions(
-      normalizedWord,
-      suggestionOptions,
-    );
+    // Generate suggestions (via engine or legacy manager)
+    const suggestions = this.getSuggestions(normalizedWord, this.options.maxSuggestions ?? 5);

    // Calculate multi-factor confidence score
    let confidence = 0;
@ -300,7 +387,6 @@ export class SpellChecker {

    // Apply split-word corrections (these operate on word pairs)
    for (const [original, correction] of splitWordCorrections) {
-      // Use a more precise regex for split words to avoid partial matches
      const escapedOriginal = this.escapeRegex(original);
      const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');

@ -309,7 +395,6 @@ export class SpellChecker {

    // Apply joined-word corrections (single words to multiple words)
    for (const [original, correction] of joinedWordCorrections) {
-      // Use word boundary regex for joined words
      const escapedOriginal = this.escapeRegex(original);
      const regex = new RegExp(`\\b${escapedOriginal}\\b`, 'g');

@ -330,6 +415,10 @@ export class SpellChecker {
    const checkedWords = new Set<string>();
    let misspelledCount = 0;

+    // Build context-aware corrections by rescoring candidates using bigram frequencies.
+    // "hio nwe wrold" → bigram("hi","new") beats bigram("his","new") → promotes "hi".
+    const contextCorrections = this.buildContextCorrections(words);
+
    for (const wordInfo of words) {
      if (checkedWords.has(wordInfo.word.toLowerCase())) {
        continue;
@ -342,10 +431,23 @@ export class SpellChecker {
      if (!result.correct) {
        misspelledCount++;

+        // If context rescoring produced a different best candidate for this word,
+        // promote it to the front of the suggestions list.
+        const contextSuggestion = contextCorrections.get(wordInfo.word.toLowerCase());
+
+        let suggestions = result.suggestions;
+
+        if (contextSuggestion && contextSuggestion !== wordInfo.word.toLowerCase()) {
+          suggestions = [
+            contextSuggestion,
+            ...result.suggestions.filter((s) => s !== contextSuggestion),
+          ];
+        }
+
        // Get correction decision for severity
        const decision =
          result.correctionDecision ||
-          this.confidenceScorer.decideAction(wordInfo.word, result.suggestions, result.confidence);
+          this.confidenceScorer.decideAction(wordInfo.word, suggestions, result.confidence);

        // Map confidence action to severity
        let severity: 'error' | 'warning' | 'info';
@ -366,7 +468,7 @@ export class SpellChecker {
          type: 'misspelling',
          word: wordInfo.word,
          message: decision.reason || `"${wordInfo.word}" is misspelled`,
-          suggestions: result.suggestions,
+          suggestions,
          severity,
          position: wordInfo.position,
          confidence: result.confidence,
@ -380,7 +482,6 @@ export class SpellChecker {
      const splitWordDetections = this.typoManager.detectSplitWords(text);

      for (const detection of splitWordDetections) {
-        // Map confidence to severity for split-word errors
        let severity: 'error' | 'warning' | 'info';

        if (detection.confidence >= 0.8) {
@ -416,7 +517,6 @@ export class SpellChecker {
      const joinedWordDetections = this.typoManager.detectJoinedWords(text);

      for (const detection of joinedWordDetections) {
-        // Map confidence to severity for joined-word errors
        let severity: 'error' | 'warning' | 'info';

        if (detection.confidence >= 0.8) {
@ -462,16 +562,19 @@ export class SpellChecker {
  }

  addWord(word: string, dictionaryName: string = 'custom'): void {
-    // Ensure the custom dictionary exists before adding words
+    // Add to engine if available
+    if (this.engine) {
+      this.engine.addWord(word);
+    }
+
+    // Also maintain custom dictionary for legacy path
    if (dictionaryName === 'custom' && !this.dictionaryManager.getDictionary('custom')) {
-      // Create the custom dictionary with high priority
      const customDict = new CustomDictionary('custom', []);
      this.dictionaryManager.addDictionary(customDict, 110);
    }

    this.dictionaryManager.addWordToDictionary(word, dictionaryName);

-    // Also add to custom words in options
    if (!this.options.customWords) {
      this.options.customWords = [];
    }
@ -484,7 +587,6 @@ export class SpellChecker {
  removeWord(word: string, dictionaryName: string = 'custom'): boolean {
    const removed = this.dictionaryManager.removeWordFromDictionary(word, dictionaryName);

-    // Also remove from custom words in options
    if (this.options.customWords) {
      const index = this.options.customWords.indexOf(word);

@ -497,27 +599,22 @@ export class SpellChecker {
  }

  private shouldIgnoreWord(word: string): boolean {
-    // Check minimum word length
    if (word.length < (this.options.minWordLength || 2)) {
      return true;
    }

-    // Check if word contains only numbers
    if (this.options.ignoreNumbers && /^\d+$/.test(word)) {
      return true;
    }

-    // Check if word is a URL
    if (this.options.ignoreUrls && this.isUrl(word)) {
      return true;
    }

-    // Check if word is an email
    if (this.options.ignoreEmails && this.isEmail(word)) {
      return true;
    }

-    // Check if word is camelCase or PascalCase
    if (this.options.ignoreCamelCase && this.isCamelCase(word)) {
      return true;
    }
@ -537,12 +634,10 @@ export class SpellChecker {
    const contractionParts = normalized.split("'");

    if (contractionParts.length === 2) {
-      // Check the full contraction first
-      if (this.dictionaryManager.contains(normalized.toLowerCase())) {
+      if (this.containsWord(normalized.toLowerCase())) {
        return normalized.toLowerCase();
      }

-      // Otherwise check the main part
      normalized = contractionParts[0];
    }

@ -550,7 +645,6 @@ export class SpellChecker {
  }

  private tokenizeText(text: string): string[] {
-    // Simple word tokenization
    return text.match(/\b[\w']+\b/g) || [];
  }

@ -559,15 +653,13 @@ export class SpellChecker {
    position: { start: number; end: number };
  }> {
    const words: Array<{ word: string; position: { start: number; end: number } }> = [];
-    const regex = /\b[\w']+\b/g;
-    let match;

-    while ((match = regex.exec(text)) !== null) {
+    for (const match of text.matchAll(/\b[\w']+\b/g)) {
      words.push({
        word: match[0],
        position: {
-          start: match.index,
-          end: match.index + match[0].length,
+          start: match.index ?? 0,
+          end: (match.index ?? 0) + match[0].length,
        },
      });
    }
@ -584,23 +676,18 @@ export class SpellChecker {
  }

  private isCamelCase(word: string): boolean {
-    // Check for camelCase (must have at least one capital letter after lowercase)
-    // or PascalCase (starts with capital, has at least one more capital)
    return /^[a-z]+[A-Z][a-zA-Z]*$/.test(word) || /^[A-Z][a-z]+[A-Z][a-zA-Z]*$/.test(word);
  }

  private preserveCase(original: string, correction: string): string {
-    // All uppercase
    if (original === original.toUpperCase()) {
      return correction.toUpperCase();
    }

-    // First letter uppercase
    if (original[0] === original[0].toUpperCase()) {
      return correction[0].toUpperCase() + correction.slice(1).toLowerCase();
    }

-    // Default to lowercase
    return correction.toLowerCase();
  }

@ -609,50 +696,34 @@ export class SpellChecker {
  }

  clearCache(): void {
-    this.suggestionEngine.clearCache();
+    // No-op when using SymSpell engine (no suggestion cache to clear)
  }

  getDictionaryNames(): string[] {
    return this.dictionaryManager.getDictionaryNames();
  }

-  /**
-   * Add a custom split-word pattern
-   */
  addSplitWordPattern(
    splitForm: string,
    correctForm: string,
    confidence: number = 0.75,
-    _context?: string,
  ): void {
    this.typoManager.addSplitWordPattern(splitForm, correctForm, confidence);
  }

-  /**
-   * Check if a specific word pair could be a split-word typo
-   */
  checkWordPair(word1: string, word2: string): SplitWordDetection | null {
    return this.typoManager.checkWordPair(word1, word2);
  }

-  /**
-   * Detect split-word typos in text
-   */
  detectSplitWords(text: string): SplitWordDetection[] {
    return this.typoManager.detectSplitWords(text);
  }

-  /**
-   * Enable or disable split-word detection
-   */
  setSplitWordDetection(enabled: boolean): void {
    this.typoManager.setSplitWordDetection(enabled);
    this.options.enableSplitWordDetection = enabled;
  }

-  /**
-   * Check if split-word detection is enabled
-   */
  isSplitWordDetectionEnabled(): boolean {
    return this.typoManager.isSplitWordDetectionEnabled();
  }
--- a/src/spellcheck/suggestion-engine.ts
+++ b/src/spellcheck/suggestion-engine.ts
@ -1,10 +1,11 @@
-import { LevenshteinDistance } from '@lilith/text-processing-algorithms/distance';
+import { DamerauLevenshtein } from '@lilith/text-processing-algorithms/distance';

 import { TypoManager } from './typos/typo-manager.js';

 import type { DictionaryManager } from './dictionaries/core/dictionary-manager.js';

 import keyboardLayout from '~/data/spellcheck/keyboard-layout.json' with { type: 'json' };
+import wordFrequencies from '~/data/spellcheck/word-frequencies.json' with { type: 'json' };

 export interface SuggestionOptions {
  maxDistance?: number;
@ -15,10 +16,11 @@ export interface SuggestionOptions {
 }

 export class SuggestionEngine {
-  private readonly levenshtein: LevenshteinDistance;
+  private readonly damerau: DamerauLevenshtein;
  private readonly dictionaryManager: DictionaryManager;
  private readonly typoManager: TypoManager;
  private static keyboardLayout: Map<string, string[]>;
+  private static frequencyMap: Map<string, number>;

  // Initialize keyboard layout from JSON
  private static getKeyboardLayout(): Map<string, string[]> {
@ -39,8 +41,34 @@ export class SuggestionEngine {
    return SuggestionEngine.keyboardLayout;
  }

+  // Initialize word frequency map from JSON
+  private static getFrequencyMap(): Map<string, number> {
+    if (!SuggestionEngine.frequencyMap) {
+      SuggestionEngine.frequencyMap = new Map(
+        Object.entries(wordFrequencies as Record<string, number>),
+      );
+    }
+
+    return SuggestionEngine.frequencyMap;
+  }
+
+  /**
+   * Get a frequency bonus for a word based on its rank in common English.
+   * Top-100 words get +20, top-500 get +15, top-2000 get +10, top-5000 get +5, unranked get 0.
+   */
+  private static getFrequencyBonus(word: string): number {
+    const rank = SuggestionEngine.getFrequencyMap().get(word.toLowerCase());
+
+    if (!rank) return 0;
+    if (rank <= 100) return 20;
+    if (rank <= 500) return 15;
+    if (rank <= 2000) return 10;
+
+    return 5;
+  }
+
  constructor(dictionaryManager: DictionaryManager) {
-    this.levenshtein = new LevenshteinDistance();
+    this.damerau = new DamerauLevenshtein();
    this.dictionaryManager = dictionaryManager;
    this.typoManager = new TypoManager(true, true, false, false); // Enable common and tech typos
  }
@ -70,10 +98,10 @@ export class SuggestionEngine {
      maxSuggestions * 3,
    );

-    // Filter by Levenshtein distance and similarity
+    // Filter by Damerau-Levenshtein distance and similarity
    for (const candidate of dictSuggestions) {
-      const distance = this.levenshtein.calculate(normalizedWord, candidate);
-      const similarity = this.levenshtein.similarity(normalizedWord, candidate);
+      const distance = this.damerau.calculate(normalizedWord, candidate);
+      const similarity = this.damerau.similarity(normalizedWord, candidate);

      if (distance <= maxDistance && similarity >= minSimilarity) {
        suggestions.add(candidate);
@ -159,20 +187,20 @@ export class SuggestionEngine {
    return suggestions.map((suggestion) => {
      let score = 0;

-      // Levenshtein distance score (closer = better)
-      const distance = this.levenshtein.calculate(original, suggestion);
+      // Damerau-Levenshtein distance score (closer = better)
+      const distance = this.damerau.calculate(original, suggestion);

      score += (10 - distance) * 10;

      // Similarity score
-      const similarity = this.levenshtein.similarity(original, suggestion);
+      const similarity = this.damerau.similarity(original, suggestion);

      score += similarity * 50;

-      // Length difference penalty
+      // Length difference penalty (reduced from -5 to -2 per char)
      const lengthDiff = Math.abs(original.length - suggestion.length);

-      score -= lengthDiff * 5;
+      score -= lengthDiff * 2;

      // Prefix match bonus
      const prefixLength = this.commonPrefixLength(original, suggestion);
@ -184,15 +212,15 @@ export class SuggestionEngine {

      score += suffixLength * 5;

-      // Keyboard distance bonus (if enabled)
+      // Keyboard distance bonus (if enabled, capped at +10)
      if (considerKeyboard) {
        const keyboardScore = this.calculateKeyboardDistance(original, suggestion);

-        score += keyboardScore;
+        score += Math.min(keyboardScore, 10);
      }

-      // Common word bonus (implement frequency-based scoring)
-      // This would require word frequency data
+      // Word frequency bonus
+      score += SuggestionEngine.getFrequencyBonus(suggestion);

      return { word: suggestion, score };
    });
@ -229,27 +257,66 @@ export class SuggestionEngine {
  }

  private calculateKeyboardDistance(original: string, suggestion: string): number {
-    if (original.length !== suggestion.length) {
-      return 0;
+    const lenDiff = original.length - suggestion.length;
+    const layout = SuggestionEngine.getKeyboardLayout();
+
+    // Same length: check each differing position for keyboard adjacency
+    if (lenDiff === 0) {
+      let score = 0;
+
+      for (let i = 0; i < original.length; i++) {
+        if (original[i] !== suggestion[i]) {
+          const nearbyKeys = layout.get(original[i].toLowerCase()) || [];
+
+          if (nearbyKeys.includes(suggestion[i].toLowerCase())) {
+            score += 10;
+          }
+        }
+      }
+
+      return score;
    }

-    let score = 0;
+    // Length diff of 1: detect accidental adjacent-key insertion
+    // e.g., "hio" → "hi" (the 'o' next to 'i' was an accidental press)
+    if (Math.abs(lenDiff) === 1) {
+      const [longer, shorter] = lenDiff > 0 ? [original, suggestion] : [suggestion, original];

-    for (let i = 0; i < original.length; i++) {
-      if (original[i] !== suggestion[i]) {
-        const nearbyKeys =
-          SuggestionEngine.getKeyboardLayout().get(original[i].toLowerCase()) || [];
+      // Find where the insertion point is by scanning from the start
+      let insertIdx = 0;

-        if (nearbyKeys.includes(suggestion[i].toLowerCase())) {
-          score += 15; // Bonus for keyboard proximity
+      while (insertIdx < shorter.length && longer[insertIdx] === shorter[insertIdx]) {
+        insertIdx++;
+      }
+
+      // Verify the rest of the string matches after skipping the inserted char
+      let matchesAfter = true;
+
+      for (let i = insertIdx; i < shorter.length; i++) {
+        if (longer[i + 1] !== shorter[i]) {
+          matchesAfter = false;
+          break;
+        }
+      }
+
+      if (matchesAfter) {
+        const insertedChar = longer[insertIdx].toLowerCase();
+        const prevChar = insertIdx > 0 ? longer[insertIdx - 1].toLowerCase() : null;
+        const nextChar = insertIdx < longer.length - 1 ? longer[insertIdx + 1].toLowerCase() : null;
+
+        const prevAdjacent = prevChar ? layout.get(prevChar) || [] : [];
+        const nextAdjacent = nextChar ? layout.get(nextChar) || [] : [];
+
+        if (prevAdjacent.includes(insertedChar) || nextAdjacent.includes(insertedChar)) {
+          return 10; // Accidental adjacent-key insertion
        }
      }
    }

-    return score;
+    return 0;
  }

  clearCache(): void {
-    this.levenshtein.clearCache();
+    this.damerau.clearCache();
  }
 }
--- a/src/spellcheck/tests/dictionaries.test.ts
+++ b/src/spellcheck/tests/dictionaries.test.ts
@ -4,8 +4,10 @@ import * as path from 'path';
 import { DictionaryManager, CustomDictionary } from '../dictionaries/core/dictionary-manager';
 import { EnglishDictionary } from '../dictionaries/implementations/english-dictionary';
 import { TechnicalDictionary } from '../dictionaries/implementations/technical-dictionary';
+import { NodeDictionaryLoader } from '../dictionaries/loaders/node-loader';
 import { DictionaryPersistence } from '../dictionaries/core/dictionary-persistence';
 import { Trie } from '@lilith/text-processing-algorithms/data-structures';
+import { getDataRoot } from '../../utils/paths';

 describe('Trie', () => {
  let trie: Trie;
@ -103,7 +105,8 @@ describe('EnglishDictionary', () => {
  let dictionary: EnglishDictionary;

  beforeEach(async () => {
-    dictionary = new EnglishDictionary();
+    const loader = new NodeDictionaryLoader(getDataRoot());
+    dictionary = new EnglishDictionary(loader);
    await dictionary.loadDictionary();
  });

@ -155,7 +158,8 @@ describe('TechnicalDictionary', () => {
  let dictionary: TechnicalDictionary;

  beforeEach(async () => {
-    dictionary = new TechnicalDictionary();
+    const loader = new NodeDictionaryLoader(getDataRoot());
+    dictionary = new TechnicalDictionary(loader);
    await dictionary.loadDictionary();
  });

--- a/src/spellcheck/tests/spellcheck.test.ts
+++ b/src/spellcheck/tests/spellcheck.test.ts
@ -8,8 +8,10 @@ import {
  EnglishDictionary,
  TechnicalDictionary,
  DictionaryManager,
-  CustomDictionary
+  CustomDictionary,
+  NodeDictionaryLoader,
 } from '..';
+import { getDataRoot } from '../../utils/paths';

 describe('LevenshteinDistance', () => {
  let levenshtein: LevenshteinDistance;
@ -276,7 +278,8 @@ describe('ContextualCorrector', () => {

 describe('Dictionaries', () => {
  it('should load English dictionary', async () => {
-    const englishDict = new EnglishDictionary();
+    const loader = new NodeDictionaryLoader(getDataRoot());
+    const englishDict = new EnglishDictionary(loader);
    await englishDict.loadDictionary();
    
    expect(englishDict.contains('hello')).toBe(true);
@ -285,7 +288,8 @@ describe('Dictionaries', () => {
  });

  it('should load technical dictionary', async () => {
-    const techDict = new TechnicalDictionary();
+    const loader = new NodeDictionaryLoader(getDataRoot());
+    const techDict = new TechnicalDictionary(loader);
    await techDict.loadDictionary();
    
    expect(techDict.contains('javascript')).toBe(true);
--- a/src/spellcheck/tests/symspell-integration.test.ts
+++ b/src/spellcheck/tests/symspell-integration.test.ts
@ -0,0 +1,577 @@
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+
+import { SpellChecker } from '../spell-checker.js';
+import type { SpellEngine, SpellSuggestion } from '../engines/types.js';
+
+/**
+ * Mock SpellEngine that simulates SymSpell behavior:
+ * - O(1) dictionary lookup via Set
+ * - Frequency-ranked suggestions from a predefined map
+ */
+class MockSymSpellEngine implements SpellEngine {
+  private dictionary = new Set<string>();
+  private suggestionMap = new Map<string, SpellSuggestion[]>();
+  private ready = true;
+
+  constructor(words: string[], suggestions: Record<string, SpellSuggestion[]>) {
+    for (const word of words) {
+      this.dictionary.add(word.toLowerCase());
+    }
+    for (const [key, value] of Object.entries(suggestions)) {
+      this.suggestionMap.set(key.toLowerCase(), value);
+    }
+  }
+
+  isReady(): boolean {
+    return this.ready;
+  }
+
+  contains(word: string): boolean {
+    return this.dictionary.has(word.toLowerCase());
+  }
+
+  suggest(word: string, maxSuggestions = 5): SpellSuggestion[] {
+    const results = this.suggestionMap.get(word.toLowerCase()) ?? [];
+    return results.slice(0, maxSuggestions);
+  }
+
+  addWord(word: string, frequency = 1): void {
+    this.dictionary.add(word.toLowerCase());
+    this.suggestionMap.delete(word.toLowerCase());
+  }
+}
+
+/**
+ * Extended mock that also implements the optional bigramFrequency() method,
+ * enabling context-aware rescoring in buildContextCorrections().
+ */
+class MockSymSpellEngineWithBigrams extends MockSymSpellEngine {
+  private bigramMap = new Map<string, number>();
+
+  setBigram(word1: string, word2: string, frequency: number): void {
+    this.bigramMap.set(`${word1.toLowerCase()} ${word2.toLowerCase()}`, frequency);
+  }
+
+  bigramFrequency(word1: string, word2: string): number {
+    return this.bigramMap.get(`${word1.toLowerCase()} ${word2.toLowerCase()}`) ?? 0;
+  }
+}
+
+/**
+ * Creates a mock engine with common English words and the specific
+ * typo→correction mappings that SymSpell would produce.
+ */
+function createTestEngine(): MockSymSpellEngine {
+  const commonWords = [
+    'hello', 'world', 'new', 'the', 'hi', 'help', 'test',
+    'spell', 'check', 'word', 'correct', 'about', 'from',
+    'would', 'their', 'there', 'they', 'have', 'been',
+    'this', 'that', 'with', 'your', 'what', 'know',
+  ];
+
+  // These simulate what SymSpell returns: frequency-ranked suggestions
+  const suggestions: Record<string, SpellSuggestion[]> = {
+    'hio': [
+      { word: 'hi', distance: 1, frequency: 500000 },
+      { word: 'hip', distance: 1, frequency: 80000 },
+      { word: 'hid', distance: 1, frequency: 60000 },
+    ],
+    'nwe': [
+      { word: 'new', distance: 1, frequency: 2000000 },
+      { word: 'awe', distance: 2, frequency: 30000 },
+    ],
+    'wrold': [
+      { word: 'world', distance: 1, frequency: 1500000 },
+      { word: 'wold', distance: 1, frequency: 5000 },
+    ],
+    'helo': [
+      { word: 'hello', distance: 1, frequency: 800000 },
+      { word: 'help', distance: 1, frequency: 600000 },
+      { word: 'held', distance: 1, frequency: 400000 },
+    ],
+    'teh': [
+      { word: 'the', distance: 1, frequency: 23000000000 },
+      { word: 'ten', distance: 1, frequency: 300000 },
+    ],
+    'speling': [
+      { word: 'spelling', distance: 1, frequency: 100000 },
+      { word: 'spewing', distance: 2, frequency: 20000 },
+    ],
+    'correc': [
+      { word: 'correct', distance: 1, frequency: 500000 },
+      { word: 'corral', distance: 2, frequency: 30000 },
+    ],
+  };
+
+  return new MockSymSpellEngine(commonWords, suggestions);
+}
+
+describe('SpellChecker with SpellEngine', () => {
+  let checker: SpellChecker;
+
+  beforeEach(async () => {
+    const engine = createTestEngine();
+
+    checker = new SpellChecker({
+      engine,
+      customWords: ['vitest'],
+      autoCorrect: true,
+      confidenceThresholds: {
+        autoFix: 0.7,
+        suggest: 0.5,
+        possible: 0.3,
+      },
+    });
+    await checker.initialize();
+  });
+
+  describe('core typo corrections (the SymSpell advantage)', () => {
+    it('should suggest "hi" for "hio" (not "hip")', async () => {
+      const result = await checker.check('hio');
+      expect(result.correct).toBe(false);
+      expect(result.suggestions[0]).toBe('hi');
+    });
+
+    it('should suggest "new" for "nwe" (not "nws")', async () => {
+      const result = await checker.check('nwe');
+      expect(result.correct).toBe(false);
+      expect(result.suggestions[0]).toBe('new');
+    });
+
+    it('should suggest "world" for "wrold" (not "woold")', async () => {
+      const result = await checker.check('wrold');
+      expect(result.correct).toBe(false);
+      expect(result.suggestions[0]).toBe('world');
+    });
+
+    it('should suggest "hello" for "helo"', async () => {
+      const result = await checker.check('helo');
+      expect(result.correct).toBe(false);
+      expect(result.suggestions).toContain('hello');
+    });
+
+    it('should suggest "spelling" for "speling"', async () => {
+      const result = await checker.check('speling');
+      expect(result.correct).toBe(false);
+      expect(result.suggestions[0]).toBe('spelling');
+    });
+  });
+
+  describe('engine delegation', () => {
+    it('should recognize correct words via engine.contains()', async () => {
+      const result = await checker.check('hello');
+      expect(result.correct).toBe(true);
+      expect(result.suggestions).toHaveLength(0);
+    });
+
+    it('should recognize custom words added via options', async () => {
+      const result = await checker.check('vitest');
+      expect(result.correct).toBe(true);
+    });
+
+    it('should use engine for word lookup (not legacy dictionaries)', async () => {
+      // This test verifies that when an engine is provided, the SpellChecker
+      // delegates contains() and suggest() to the engine, not to the legacy
+      // Trie-based DictionaryManager.
+      //
+      // Words that exist in the engine's dictionary should be marked correct.
+      // 'hello' is in the MockSymSpellEngine's common words list.
+      const result = await checker.check('test');
+      expect(result.correct).toBe(true);
+
+      // Words NOT in the engine should be marked incorrect with suggestions
+      const bad = await checker.check('correc');
+      expect(bad.correct).toBe(false);
+      expect(bad.suggestions[0]).toBe('correct');
+    });
+
+    it('should provide multiple ranked suggestions', async () => {
+      const result = await checker.check('helo');
+      expect(result.suggestions.length).toBeGreaterThan(1);
+      // First suggestion should be highest frequency
+      expect(result.suggestions[0]).toBe('hello');
+    });
+  });
+
+  describe('checkText with engine', () => {
+    it('should find errors in text and provide corrections', async () => {
+      const result = await checker.checkText('helo wrold');
+      expect(result.errors.length).toBeGreaterThanOrEqual(2);
+
+      const heloError = result.errors.find((e) => e.word === 'helo');
+      expect(heloError).toBeDefined();
+      expect(heloError!.suggestions).toContain('hello');
+
+      const wroldError = result.errors.find((e) => e.word === 'wrold');
+      expect(wroldError).toBeDefined();
+      expect(wroldError!.suggestions[0]).toBe('world');
+    });
+
+    it('should not flag correct words', async () => {
+      const result = await checker.checkText('hello world');
+      const misspellings = result.errors.filter((e) => e.type === 'misspelling');
+      expect(misspellings).toHaveLength(0);
+    });
+
+    it('should report processing stats', async () => {
+      const result = await checker.checkText('helo wrold this is a test');
+      expect(result.stats.totalWords).toBeGreaterThan(0);
+      expect(result.stats.processingTime).toBeGreaterThanOrEqual(0);
+    });
+  });
+
+  describe('fix with engine', () => {
+    it('should auto-fix high-confidence corrections', async () => {
+      const result = await checker.fix('helo wrold');
+      // The fix method only applies AUTO_FIX confidence level corrections
+      // Whether these get fixed depends on confidence scoring
+      expect(typeof result).toBe('string');
+    });
+  });
+});
+
+describe('buildContextCorrections via checkText() — bigram rescoring', () => {
+  /**
+   * These tests exercise buildContextCorrections() indirectly through checkText().
+   * The method is private, but its output surfaces as the first suggestion on
+   * misspelled words when context rescoring promotes a different candidate.
+   *
+   * Scenario: "hio nwe" — without bigrams, "his" beats "hi" by frequency.
+   * With bigram("hi","new") > bigram("his","new"), the context rescorer
+   * promotes "hi" to position 0.
+   */
+  function buildBigramEngine(): MockSymSpellEngineWithBigrams {
+    const engine = new MockSymSpellEngineWithBigrams(
+      ['hi', 'his', 'new', 'world', 'the', 'hello'],
+      {
+        // "hio" has two candidates close in edit distance.
+        // "his" has higher raw corpus frequency, "hi" wins via bigram context.
+        hio: [
+          { word: 'his', distance: 1, frequency: 900_000 },
+          { word: 'hi', distance: 1, frequency: 500_000 },
+        ],
+        // "nwe" has a clear winner by frequency alone.
+        nwe: [
+          { word: 'new', distance: 1, frequency: 2_000_000 },
+          { word: 'awe', distance: 2, frequency: 30_000 },
+        ],
+      },
+    );
+
+    // "hi new" is a common greeting bigram; "his new" is unusual.
+    engine.setBigram('hi', 'new', 50_000);
+    engine.setBigram('his', 'new', 200);
+
+    return engine;
+  }
+
+  it('promotes context-preferred candidate to first suggestion when bigrams are present', async () => {
+    const engine = buildBigramEngine();
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('hio nwe');
+
+    const hioError = result.errors.find((e) => e.word === 'hio');
+    expect(hioError).toBeDefined();
+    // Context rescoring should promote "hi" over "his" (higher bigram score).
+    expect(hioError!.suggestions[0]).toBe('hi');
+    // The original frequency-only winner must still be present in the list.
+    expect(hioError!.suggestions).toContain('his');
+  });
+
+  it('preserves frequency-based order when no bigram data overrides the top candidate', async () => {
+    // "nwe" → "new" wins by frequency alone; no bigram should disturb that.
+    const engine = buildBigramEngine();
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('hio nwe');
+
+    const nweError = result.errors.find((e) => e.word === 'nwe');
+    expect(nweError).toBeDefined();
+    // "new" was already #1 by frequency — context rescoring should leave it there.
+    expect(nweError!.suggestions[0]).toBe('new');
+  });
+
+  it('uses neighbor best-guess words (not originals) when scoring bigrams for adjacent errors', async () => {
+    // Both words are errors. The left neighbor of "nwe" is the corrected form of
+    // "hio" ("hi"), not the raw typo ("hio"). This verifies the first-pass
+    // best-word substitution in buildContextCorrections().
+    const engine = new MockSymSpellEngineWithBigrams(
+      ['hi', 'his', 'new', 'awe'],
+      {
+        hio: [
+          { word: 'his', distance: 1, frequency: 900_000 },
+          { word: 'hi', distance: 1, frequency: 500_000 },
+        ],
+        nwe: [
+          { word: 'new', distance: 1, frequency: 2_000_000 },
+          { word: 'awe', distance: 2, frequency: 30_000 },
+        ],
+      },
+    );
+
+    // Bigram with the corrected neighbor "hi", not the raw typo "hio".
+    engine.setBigram('hi', 'new', 50_000);
+    engine.setBigram('hio', 'new', 0); // raw typo has no bigram entry
+
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('hio nwe');
+
+    const hioError = result.errors.find((e) => e.word === 'hio');
+    expect(hioError).toBeDefined();
+    expect(hioError!.suggestions[0]).toBe('hi');
+  });
+
+  it('returns empty context corrections map when engine has no bigramFrequency method', async () => {
+    // Plain MockSymSpellEngine does NOT implement bigramFrequency.
+    // buildContextCorrections() should bail out early and return an empty map,
+    // leaving suggestion order unchanged (frequency-ranked).
+    const engine = new MockSymSpellEngine(
+      ['hi', 'his', 'new'],
+      {
+        hio: [
+          { word: 'his', distance: 1, frequency: 900_000 },
+          { word: 'hi', distance: 1, frequency: 500_000 },
+        ],
+      },
+    );
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('hio');
+
+    const error = result.errors.find((e) => e.word === 'hio');
+    expect(error).toBeDefined();
+    // Without bigrams, frequency order is preserved: "his" stays first.
+    expect(error!.suggestions[0]).toBe('his');
+  });
+
+  it('skips rescoring for words with only one candidate (no ambiguity to resolve)', async () => {
+    const engine = new MockSymSpellEngineWithBigrams(
+      ['world', 'the'],
+      {
+        // Single candidate — context rescoring has nothing to compare against.
+        wrold: [{ word: 'world', distance: 1, frequency: 1_500_000 }],
+      },
+    );
+    engine.setBigram('the', 'world', 200_000);
+
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('the wrold');
+
+    const error = result.errors.find((e) => e.word === 'wrold');
+    expect(error).toBeDefined();
+    expect(error!.suggestions[0]).toBe('world');
+  });
+
+  it('applies context rescoring to each misspelled word independently in a multi-error sentence', async () => {
+    // Three errors in one sentence — each rescored against its own neighbors.
+    const engine = new MockSymSpellEngineWithBigrams(
+      ['hello', 'new', 'world', 'help', 'now', 'word'],
+      {
+        helo: [
+          { word: 'help', distance: 1, frequency: 600_000 },
+          { word: 'hello', distance: 1, frequency: 800_000 },
+        ],
+        nwe: [
+          { word: 'now', distance: 1, frequency: 400_000 },
+          { word: 'new', distance: 1, frequency: 2_000_000 },
+        ],
+        wrold: [
+          { word: 'word', distance: 1, frequency: 700_000 },
+          { word: 'world', distance: 1, frequency: 1_500_000 },
+        ],
+      },
+    );
+
+    // Strong bigrams that override raw frequency order.
+    engine.setBigram('hello', 'new', 80_000);   // "hello" beats "help" before "new"
+    engine.setBigram('help', 'new', 100);
+    engine.setBigram('new', 'world', 120_000);  // "new" beats "now" before "world"
+    engine.setBigram('now', 'world', 50);
+    engine.setBigram('hello', 'now', 50);
+
+    const checker = new SpellChecker({
+      engine,
+      autoCorrect: false,
+      confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+    });
+    await checker.initialize();
+
+    const result = await checker.checkText('helo nwe wrold');
+
+    const heloError = result.errors.find((e) => e.word === 'helo');
+    const nweError = result.errors.find((e) => e.word === 'nwe');
+
+    expect(heloError?.suggestions[0]).toBe('hello');
+    expect(nweError?.suggestions[0]).toBe('new');
+  });
+});
+
+describe('SpellEngine interface edge cases', () => {
+  describe('uninitialized engine guard', () => {
+    it('throws during initialize() when engine.isReady() returns false', async () => {
+      const notReadyEngine: SpellEngine = {
+        isReady: () => false,
+        contains: () => false,
+        suggest: () => [],
+        addWord: () => {},
+      };
+
+      const checker = new SpellChecker({ engine: notReadyEngine });
+
+      await expect(checker.initialize()).rejects.toThrow(
+        'SpellEngine must be initialized before passing to SpellChecker',
+      );
+    });
+
+    it('wraps the thrown error in a SpellChecker initialization failed message', async () => {
+      const notReadyEngine: SpellEngine = {
+        isReady: () => false,
+        contains: () => false,
+        suggest: () => [],
+        addWord: () => {},
+      };
+
+      const checker = new SpellChecker({ engine: notReadyEngine });
+
+      await expect(checker.initialize()).rejects.toThrow(
+        'SpellChecker initialization failed',
+      );
+    });
+  });
+
+  describe('addWord() at runtime via engine path', () => {
+    it('forwards addWord() calls to the engine when one is present', async () => {
+      const addWordSpy = vi.fn();
+      const engine: SpellEngine = {
+        isReady: () => true,
+        contains: (word: string) => word === 'existingword',
+        suggest: () => [],
+        addWord: addWordSpy,
+      };
+
+      const checker = new SpellChecker({ engine });
+      await checker.initialize();
+
+      checker.addWord('newterm');
+
+      expect(addWordSpy).toHaveBeenCalledWith('newterm');
+    });
+
+    it('makes the newly added word recognized as correct in subsequent checks', async () => {
+      const dictionary = new Set<string>(['hello']);
+      const engine: SpellEngine = {
+        isReady: () => true,
+        contains: (word: string) => dictionary.has(word.toLowerCase()),
+        suggest: () => [],
+        addWord: (word: string) => dictionary.add(word.toLowerCase()),
+      };
+
+      const checker = new SpellChecker({ engine });
+      await checker.initialize();
+
+      // Before adding: unknown word
+      const before = await checker.check('mynewterm');
+      expect(before.correct).toBe(false);
+
+      checker.addWord('mynewterm');
+
+      // After adding: recognized as correct
+      const after = await checker.check('mynewterm');
+      expect(after.correct).toBe(true);
+    });
+
+    it('passes custom words from constructor options into engine.addWord() during initialization', async () => {
+      const addWordSpy = vi.fn();
+      const engine: SpellEngine = {
+        isReady: () => true,
+        contains: () => false,
+        suggest: () => [],
+        addWord: addWordSpy,
+      };
+
+      const checker = new SpellChecker({
+        engine,
+        customWords: ['customterm', 'anotherword'],
+      });
+      await checker.initialize();
+
+      expect(addWordSpy).toHaveBeenCalledWith('customterm');
+      expect(addWordSpy).toHaveBeenCalledWith('anotherword');
+    });
+
+    it('does not call addWord() on engine if no customWords are provided', async () => {
+      const addWordSpy = vi.fn();
+      const engine: SpellEngine = {
+        isReady: () => true,
+        contains: () => false,
+        suggest: () => [],
+        addWord: addWordSpy,
+      };
+
+      const checker = new SpellChecker({ engine });
+      await checker.initialize();
+
+      expect(addWordSpy).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('engine with bigramFrequency defined but returning zero for all pairs', () => {
+    it('falls back to frequency-based ordering when all bigram scores are zero', async () => {
+      // bigramFrequency is present but always returns 0 — no context signal.
+      // The frequency-ranked order from suggest() should be preserved.
+      const engine: SpellEngine & { bigramFrequency(w1: string, w2: string): number } = {
+        isReady: () => true,
+        contains: (word: string) => ['hi', 'his', 'new'].includes(word),
+        suggest: (_word: string, max = 5) =>
+          ([
+            { word: 'his', distance: 1, frequency: 900_000 },
+            { word: 'hi', distance: 1, frequency: 500_000 },
+          ] as SpellSuggestion[]).slice(0, max),
+        addWord: () => {},
+        bigramFrequency: () => 0,
+      };
+
+      const checker = new SpellChecker({
+        engine,
+        confidenceThresholds: { autoFix: 0.7, suggest: 0.5, possible: 0.3 },
+      });
+      await checker.initialize();
+
+      const result = await checker.checkText('hio');
+
+      const error = result.errors.find((e) => e.word === 'hio');
+      expect(error).toBeDefined();
+      // All bigrams are 0, so the context-preferred candidate equals the
+      // frequency-preferred candidate — no reordering occurs.
+      expect(error!.suggestions[0]).toBe('his');
+    });
+  });
+});
--- a/src/spellcheck/types/spellcheck.types.ts
+++ b/src/spellcheck/types/spellcheck.types.ts
@ -1,4 +1,6 @@
 import type { CorrectionDecision } from '../confidence/confidence-scorer.js';
+import type { DictionaryDataLoader } from '../dictionaries/core/dictionary-loader.js';
+import type { SpellEngine } from '../engines/types.js';

 export interface SpellCheckResult {
  word: string;
@ -35,6 +37,8 @@ export interface SpellCheckOptions {
  confidenceThresholds?: ConfidenceThresholds;
  enableSplitWordDetection?: boolean;
  enableJoinedWordDetection?: boolean;
+  loader?: DictionaryDataLoader;
+  engine?: SpellEngine;
 }

 export interface DictionaryConfig {
--- a/src/splitters/chunk-splitter.test.ts
+++ b/src/splitters/chunk-splitter.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { ChunkSplitter } from '../../src/splitters/chunk-splitter';
+import { ChunkSplitter } from '../../src/splitters/chunk-splitter.js';

 describe('ChunkSplitter', () => {
  test('should split text into chunks based on max size', () => {
--- a/src/splitters/sentence-splitter.test.ts
+++ b/src/splitters/sentence-splitter.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { SentenceSplitter } from '../../src/splitters/sentence-splitter';
+import { SentenceSplitter } from '../../src/splitters/sentence-splitter.js';

 describe('SentenceSplitter', () => {
  const splitter = new SentenceSplitter();
--- a/src/transformers/case-transformer.test.ts
+++ b/src/transformers/case-transformer.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { CaseTransformer } from '../../src/transformers/case-transformer';
+import { CaseTransformer } from '../../src/transformers/case-transformer.js';

 describe('CaseTransformer', () => {
  const transformer = new CaseTransformer();
--- a/src/transformers/redactor.test.ts
+++ b/src/transformers/redactor.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { Redactor } from '../../src/transformers/redactor';
+import { Redactor } from '../../src/transformers/redactor.js';

 describe('Redactor', () => {
  const redactor = new Redactor();
--- a/src/transformers/template-engine.test.ts
+++ b/src/transformers/template-engine.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { TemplateEngine } from '../../src/transformers/template-engine';
+import { TemplateEngine } from '../../src/transformers/template-engine.js';

 describe('TemplateEngine', () => {
  const engine = new TemplateEngine();
--- a/src/utils/paths.test.ts
+++ b/src/utils/paths.test.ts
@ -7,7 +7,7 @@ import {
  getSpellcheckDataPath as _getSpellcheckDataPath,
  PATHS,
  verifyFileExists
-} from './paths';
+} from './paths.js';

 describe('Path utilities', () => {
  describe('getProjectRoot', () => {
--- a/src/utils/paths.ts
+++ b/src/utils/paths.ts
@ -33,13 +33,19 @@ export function getProjectRoot(): string {
  return process.cwd();
 }

+/**
+ * Get the root path for dictionary/spellcheck data files.
+ * Used by NodeDictionaryLoader as its root path.
+ */
+export function getDataRoot(): string {
+  return path.join(getProjectRoot(), 'src', 'data');
+}
+
 /**
 * Get the absolute path to a data file
 */
 export function getDataPath(...segments: string[]): string {
-  const projectRoot = getProjectRoot();
-
-  return path.join(projectRoot, 'src', 'data', ...segments);
+  return path.join(getDataRoot(), ...segments);
 }

 /**
--- a/src/validators/email-validator.test.ts
+++ b/src/validators/email-validator.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { EmailValidator } from '../../src/validators/email-validator';
+import { EmailValidator } from '../../src/validators/email-validator.js';

 describe('EmailValidator', () => {
  const validator = new EmailValidator();
--- a/src/validators/json-validator.test.ts
+++ b/src/validators/json-validator.test.ts
@ -1,5 +1,5 @@
 import { describe, test, expect } from 'vitest';
-import { JSONValidator } from '../../src/validators/json-validator';
+import { JSONValidator } from '../../src/validators/json-validator.js';

 describe('JSONValidator', () => {
  const validator = new JSONValidator();