127 lines
4.4 KiB
TypeScript
127 lines
4.4 KiB
TypeScript
import { describe, expect, test } from 'bun:test';
|
||
|
||
import { extractFromBody } from '@/processors/pii-extractor/regex-tier';
|
||
|
||
describe('regex-tier extractFromBody', () => {
|
||
// -----------------------------------------------------------------------
|
||
// Positive: self-introduction should yield a name extraction
|
||
// -----------------------------------------------------------------------
|
||
|
||
test('my name is X → 0.95 confidence', () => {
|
||
const results = extractFromBody('Hey, my name is John');
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('John');
|
||
expect(results[0]!.confidence).toBe(0.95);
|
||
expect(results[0]!.field).toBe('name');
|
||
});
|
||
|
||
test("my name's X → 0.95 confidence", () => {
|
||
const results = extractFromBody("Hi! my name's Sarah");
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('Sarah');
|
||
expect(results[0]!.confidence).toBe(0.95);
|
||
});
|
||
|
||
test("I'm X (single name) → 0.7 confidence", () => {
|
||
const results = extractFromBody("I'm Michael, looking to meet");
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('Michael');
|
||
expect(results[0]!.confidence).toBe(0.7);
|
||
});
|
||
|
||
test("I'm X Y (two names) → 0.85 confidence", () => {
|
||
const results = extractFromBody("Hi, I'm John Smith from the Bay Area");
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('John Smith');
|
||
expect(results[0]!.confidence).toBe(0.85);
|
||
});
|
||
|
||
test("I am X → extracted", () => {
|
||
const results = extractFromBody('I am Robert, a regular visitor');
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('Robert');
|
||
});
|
||
|
||
test("this is X → extracted", () => {
|
||
const results = extractFromBody('this is David, wanted to reach out');
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('David');
|
||
});
|
||
|
||
test("curly apostrophe I’m X → extracted", () => {
|
||
const results = extractFromBody("I’m Carlos, nice to meet you");
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('Carlos');
|
||
});
|
||
|
||
test('my name is X Y (two names, highest confidence wins)', () => {
|
||
const results = extractFromBody('my name is Alex Kim, hope that helps');
|
||
expect(results).toHaveLength(1);
|
||
expect(results[0]!.value).toBe('Alex Kim');
|
||
expect(results[0]!.confidence).toBe(0.95);
|
||
});
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Negative: filler/stopword phrases must NOT produce extractions
|
||
// -----------------------------------------------------------------------
|
||
|
||
test("I'm tired → no extraction", () => {
|
||
expect(extractFromBody("I'm tired")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm running late → no extraction", () => {
|
||
expect(extractFromBody("I'm running late, be there soon")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm ok → no extraction", () => {
|
||
expect(extractFromBody("I'm ok with that")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm sorry → no extraction", () => {
|
||
expect(extractFromBody("I'm sorry for the delay")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm here → no extraction", () => {
|
||
expect(extractFromBody("I'm here, buzz me in")).toHaveLength(0);
|
||
});
|
||
|
||
test("this is annoying → no extraction", () => {
|
||
expect(extractFromBody('this is annoying')).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm free tomorrow → no extraction", () => {
|
||
expect(extractFromBody("I'm free tomorrow afternoon")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm new to the area → no extraction", () => {
|
||
expect(extractFromBody("I'm new to the area")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm ready → no extraction", () => {
|
||
expect(extractFromBody("I'm ready when you are")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm excited → no extraction", () => {
|
||
expect(extractFromBody("I'm excited to meet")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm confused → no extraction", () => {
|
||
expect(extractFromBody("I'm confused about the location")).toHaveLength(0);
|
||
});
|
||
|
||
test('empty body → no extraction', () => {
|
||
expect(extractFromBody('')).toHaveLength(0);
|
||
});
|
||
|
||
test('no intro pattern → no extraction', () => {
|
||
expect(extractFromBody('Hey, what are your rates?')).toHaveLength(0);
|
||
});
|
||
|
||
test('only lowercase name → no extraction (pattern requires capital)', () => {
|
||
expect(extractFromBody("i'm john")).toHaveLength(0);
|
||
});
|
||
|
||
test("I'm Back → stopword 'back' filtered out", () => {
|
||
expect(extractFromBody("I'm Back from vacation")).toHaveLength(0);
|
||
});
|
||
});
|