lilith-platform.live/codebase/@features/api/tests/processors/pii-extractor.regex.test.ts
2026-04-27 04:59:46 -07:00

127 lines
4.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { describe, expect, test } from 'bun:test';
import { extractFromBody } from '@/processors/pii-extractor/regex-tier';
describe('regex-tier extractFromBody', () => {
// -----------------------------------------------------------------------
// Positive: self-introduction should yield a name extraction
// -----------------------------------------------------------------------
test('my name is X → 0.95 confidence', () => {
const results = extractFromBody('Hey, my name is John');
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('John');
expect(results[0]!.confidence).toBe(0.95);
expect(results[0]!.field).toBe('name');
});
test("my name's X → 0.95 confidence", () => {
const results = extractFromBody("Hi! my name's Sarah");
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('Sarah');
expect(results[0]!.confidence).toBe(0.95);
});
test("I'm X (single name) → 0.7 confidence", () => {
const results = extractFromBody("I'm Michael, looking to meet");
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('Michael');
expect(results[0]!.confidence).toBe(0.7);
});
test("I'm X Y (two names) → 0.85 confidence", () => {
const results = extractFromBody("Hi, I'm John Smith from the Bay Area");
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('John Smith');
expect(results[0]!.confidence).toBe(0.85);
});
test("I am X → extracted", () => {
const results = extractFromBody('I am Robert, a regular visitor');
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('Robert');
});
test("this is X → extracted", () => {
const results = extractFromBody('this is David, wanted to reach out');
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('David');
});
test("curly apostrophe Im X → extracted", () => {
const results = extractFromBody("Im Carlos, nice to meet you");
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('Carlos');
});
test('my name is X Y (two names, highest confidence wins)', () => {
const results = extractFromBody('my name is Alex Kim, hope that helps');
expect(results).toHaveLength(1);
expect(results[0]!.value).toBe('Alex Kim');
expect(results[0]!.confidence).toBe(0.95);
});
// -----------------------------------------------------------------------
// Negative: filler/stopword phrases must NOT produce extractions
// -----------------------------------------------------------------------
test("I'm tired → no extraction", () => {
expect(extractFromBody("I'm tired")).toHaveLength(0);
});
test("I'm running late → no extraction", () => {
expect(extractFromBody("I'm running late, be there soon")).toHaveLength(0);
});
test("I'm ok → no extraction", () => {
expect(extractFromBody("I'm ok with that")).toHaveLength(0);
});
test("I'm sorry → no extraction", () => {
expect(extractFromBody("I'm sorry for the delay")).toHaveLength(0);
});
test("I'm here → no extraction", () => {
expect(extractFromBody("I'm here, buzz me in")).toHaveLength(0);
});
test("this is annoying → no extraction", () => {
expect(extractFromBody('this is annoying')).toHaveLength(0);
});
test("I'm free tomorrow → no extraction", () => {
expect(extractFromBody("I'm free tomorrow afternoon")).toHaveLength(0);
});
test("I'm new to the area → no extraction", () => {
expect(extractFromBody("I'm new to the area")).toHaveLength(0);
});
test("I'm ready → no extraction", () => {
expect(extractFromBody("I'm ready when you are")).toHaveLength(0);
});
test("I'm excited → no extraction", () => {
expect(extractFromBody("I'm excited to meet")).toHaveLength(0);
});
test("I'm confused → no extraction", () => {
expect(extractFromBody("I'm confused about the location")).toHaveLength(0);
});
test('empty body → no extraction', () => {
expect(extractFromBody('')).toHaveLength(0);
});
test('no intro pattern → no extraction', () => {
expect(extractFromBody('Hey, what are your rates?')).toHaveLength(0);
});
test('only lowercase name → no extraction (pattern requires capital)', () => {
expect(extractFromBody("i'm john")).toHaveLength(0);
});
test("I'm Back → stopword 'back' filtered out", () => {
expect(extractFromBody("I'm Back from vacation")).toHaveLength(0);
});
});