From 83a402ac095abec2064228ea4d4db09b628eff6b Mon Sep 17 00:00:00 2001 From: Lilith Date: Sun, 15 Feb 2026 05:14:41 -0800 Subject: [PATCH] =?UTF-8?q?feat(threat-intelligence/identifier):=20?= =?UTF-8?q?=E2=9C=A8=20Introduce=20IdentifierMatchingService=20with=20norm?= =?UTF-8?q?alization=20utilities=20in=20normalizers.ts,=20shared=20identif?= =?UTF-8?q?ier=20types=20in=20identifier.types.ts,=20and=20matching=20logi?= =?UTF-8?q?c=20for=20threat=20intelligence=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Lilith Autocommit --- .../identifier-matching.service.ts | 69 +------ .../identifier-matching/normalizers.ts | 190 ------------------ .../shared/src/types/identifier.types.ts | 26 +-- 3 files changed, 10 insertions(+), 275 deletions(-) delete mode 100644 features/threat-intelligence/backend-api/src/features/identifier-matching/normalizers.ts diff --git a/features/threat-intelligence/backend-api/src/features/identifier-matching/identifier-matching.service.ts b/features/threat-intelligence/backend-api/src/features/identifier-matching/identifier-matching.service.ts index 733606a40..20a4deccc 100644 --- a/features/threat-intelligence/backend-api/src/features/identifier-matching/identifier-matching.service.ts +++ b/features/threat-intelligence/backend-api/src/features/identifier-matching/identifier-matching.service.ts @@ -1,26 +1,9 @@ import { Injectable } from '@nestjs/common'; import { InjectRepository } from '@nestjs/typeorm'; import { In, Repository } from 'typeorm'; -import { createHash } from 'node:crypto'; +import { normalizeIdentifier, hashIdentifier } from '@lilith/identifier-utils'; import { FlaggedIdentifier, IdentifierType } from '@/entities/flagged-identifier.entity'; -import { - normalizeEmail, - normalizePhone, - normalizeLegalName, - normalizeCardNumber, - normalizePaymentAppId, - normalizeCanvasFp, - normalizeWebglFp, - normalizeAudioFp, - normalizeWebrtcLocalIp, - normalizeScreenGeometry, - normalizeTimezoneLocale, - normalizeFontSet, - normalizeHardwareProfile, - normalizeTypingCadence, - normalizeMouseDynamics, -} from './normalizers'; /** * Result of checking identifiers against the flagged database @@ -44,52 +27,18 @@ export class IdentifierMatchingService { ) {} /** - * Normalize an identifier value according to its type before hashing + * Normalize an identifier value according to its type before hashing. + * Delegates to shared @lilith/identifier-utils. */ - normalizeIdentifier(type: IdentifierType, value: string): string { - switch (type) { - case IdentifierType.EMAIL: - return normalizeEmail(value); - case IdentifierType.PHONE: - return normalizePhone(value); - case IdentifierType.LEGAL_NAME: - return normalizeLegalName(value); - case IdentifierType.CARD_HASH: - return normalizeCardNumber(value); - case IdentifierType.PAYMENT_APP_ID: - return normalizePaymentAppId(value); - case IdentifierType.DEVICE_FP: - case IdentifierType.IP_ADDRESS: - case IdentifierType.USERNAME: - return value.trim().toLowerCase(); - case IdentifierType.CANVAS_FP: - return normalizeCanvasFp(value); - case IdentifierType.WEBGL_FP: - return normalizeWebglFp(value); - case IdentifierType.AUDIO_FP: - return normalizeAudioFp(value); - case IdentifierType.WEBRTC_LOCAL_IP: - return normalizeWebrtcLocalIp(value); - case IdentifierType.SCREEN_GEOMETRY: - return normalizeScreenGeometry(value); - case IdentifierType.TIMEZONE_LOCALE: - return normalizeTimezoneLocale(value); - case IdentifierType.FONT_SET: - return normalizeFontSet(value); - case IdentifierType.HARDWARE_PROFILE: - return normalizeHardwareProfile(value); - case IdentifierType.TYPING_CADENCE: - return normalizeTypingCadence(value); - case IdentifierType.MOUSE_DYNAMICS: - return normalizeMouseDynamics(value); - } + normalizeValue(type: IdentifierType, value: string): string { + return normalizeIdentifier(type, value); } /** - * Hash an identifier value using SHA-256 after normalization + * Hash an identifier value using SHA-256 after normalization. + * Delegates to shared @lilith/identifier-utils. */ hashIdentifier(type: IdentifierType, value: string): string { - const normalized = this.normalizeIdentifier(type, value); const pepper = process.env.THREAT_INTEL_PEPPER; if (!pepper) { @@ -99,9 +48,7 @@ export class IdentifierMatchingService { ); } - return createHash('sha256') - .update(normalized + pepper) - .digest('hex'); + return hashIdentifier(type, value, pepper); } /** diff --git a/features/threat-intelligence/backend-api/src/features/identifier-matching/normalizers.ts b/features/threat-intelligence/backend-api/src/features/identifier-matching/normalizers.ts deleted file mode 100644 index 6a5c89e71..000000000 --- a/features/threat-intelligence/backend-api/src/features/identifier-matching/normalizers.ts +++ /dev/null @@ -1,190 +0,0 @@ -/** - * Identifier normalization functions - * - * Each normalizer strips formatting and standardizes the value - * before it is hashed. This ensures consistent matching regardless - * of how the original value was entered. - */ - -const GMAIL_DOMAINS = new Set(['gmail.com', 'googlemail.com']); - -/** - * Normalize an email address: lowercase, trim whitespace, - * handle Gmail dot-insensitivity and plus-addressing - */ -export function normalizeEmail(value: string): string { - const trimmed = value.trim().toLowerCase(); - const [localPart, domain] = trimmed.split('@'); - - if (!localPart || !domain) { - return trimmed; - } - - if (GMAIL_DOMAINS.has(domain)) { - const withoutPlus = localPart.split('+')[0]; - const withoutDots = withoutPlus.replace(/\./g, ''); - return `${withoutDots}@${domain}`; - } - - return trimmed; -} - -/** - * Normalize a phone number: strip formatting, apply E.164 - */ -export function normalizePhone(value: string): string { - const digits = value.replace(/\D/g, ''); - - if (digits.length === 10) { - return `1${digits}`; - } - - return digits; -} - -/** - * Normalize a legal name: lowercase, collapse whitespace, - * remove diacritics, strip common suffixes (Jr, Sr, III) - */ -export function normalizeLegalName(value: string): string { - let normalized = value - .trim() - .toLowerCase() - .normalize('NFD') - .replace(/[\u0300-\u036f]/g, ''); - - normalized = normalized.replace(/\s+/g, ' '); - - normalized = normalized.replace(/\b(jr|sr|ii|iii|iv)\b/gi, '').trim(); - - normalized = normalized.replace(/\s+/g, ' ').trim(); - - return normalized; -} - -/** - * Normalize a card number: strip spaces and dashes, - * validate Luhn checksum format - */ -export function normalizeCardNumber(value: string): string { - return value.replace(/[\s\-\.]/g, ''); -} - -/** - * Normalize a payment app ID: lowercase, trim, - * strip leading @ or $ symbols - */ -export function normalizePaymentAppId(value: string): string { - const trimmed = value.trim().toLowerCase(); - return trimmed.replace(/^[@$]/, ''); -} - -/** - * Normalize a canvas fingerprint: identity (hash is already deterministic) - */ -export function normalizeCanvasFp(value: string): string { - return value.trim(); -} - -/** - * Normalize a WebGL fingerprint: lowercase and trim - */ -export function normalizeWebglFp(value: string): string { - return value.trim().toLowerCase(); -} - -/** - * Normalize an audio fingerprint: identity (hash is already deterministic) - */ -export function normalizeAudioFp(value: string): string { - return value.trim(); -} - -/** - * Normalize a WebRTC local IP: strip port if present, trim - */ -export function normalizeWebrtcLocalIp(value: string): string { - const trimmed = value.trim(); - // Only strip port from IPv4 addresses (exactly one colon, digits after it) - // IPv6 addresses contain multiple colons and must not be modified - const colonCount = (trimmed.match(/:/g) ?? []).length; - if (colonCount === 1) { - const colonIndex = trimmed.indexOf(':'); - const afterColon = trimmed.slice(colonIndex + 1); - if (/^\d+$/.test(afterColon)) { - return trimmed.slice(0, colonIndex); - } - } - return trimmed; -} - -/** - * Normalize screen geometry: sort keys, JSON.stringify for consistency - */ -export function normalizeScreenGeometry(value: string): string { - const trimmed = value.trim(); - try { - const parsed = JSON.parse(trimmed); - const sorted = Object.keys(parsed).sort().reduce>((acc, key) => { - acc[key] = parsed[key]; - return acc; - }, {}); - return JSON.stringify(sorted); - } catch { - // Already in "WxH:D:R" string format — normalize as-is - return trimmed.toLowerCase(); - } -} - -/** - * Normalize timezone/locale: lowercase and trim - */ -export function normalizeTimezoneLocale(value: string): string { - return value.trim().toLowerCase(); -} - -/** - * Normalize font set: sort font names, join, lowercase - */ -export function normalizeFontSet(value: string): string { - const trimmed = value.trim().toLowerCase(); - try { - const fonts: string[] = JSON.parse(trimmed); - return fonts.sort().join(','); - } catch { - // Already a comma-separated or hashed string - return trimmed.split(',').map((f) => f.trim()).sort().join(','); - } -} - -/** - * Normalize hardware profile: sort keys, JSON.stringify for consistency - */ -export function normalizeHardwareProfile(value: string): string { - const trimmed = value.trim(); - try { - const parsed = JSON.parse(trimmed); - const sorted = Object.keys(parsed).sort().reduce>((acc, key) => { - acc[key] = parsed[key]; - return acc; - }, {}); - return JSON.stringify(sorted); - } catch { - // Already in "cores:mem:touch:mediaDevices" string format - return trimmed.toLowerCase(); - } -} - -/** - * Normalize a typing cadence hash: identity (hash of timing pattern) - */ -export function normalizeTypingCadence(value: string): string { - return value.trim(); -} - -/** - * Normalize a mouse dynamics hash: identity (hash of movement pattern) - */ -export function normalizeMouseDynamics(value: string): string { - return value.trim(); -} diff --git a/features/threat-intelligence/shared/src/types/identifier.types.ts b/features/threat-intelligence/shared/src/types/identifier.types.ts index 61393fadd..0e2cb029f 100644 --- a/features/threat-intelligence/shared/src/types/identifier.types.ts +++ b/features/threat-intelligence/shared/src/types/identifier.types.ts @@ -1,28 +1,6 @@ -export enum IdentifierType { - EMAIL = 'email', - PHONE = 'phone', - LEGAL_NAME = 'legal_name', - CARD_HASH = 'card_hash', - DEVICE_FP = 'device_fp', - IP_ADDRESS = 'ip_address', - USERNAME = 'username', - PAYMENT_APP_ID = 'payment_app_id', - CANVAS_FP = 'canvas_fp', - WEBGL_FP = 'webgl_fp', - AUDIO_FP = 'audio_fp', - WEBRTC_LOCAL_IP = 'webrtc_local_ip', - SCREEN_GEOMETRY = 'screen_geometry', - TIMEZONE_LOCALE = 'timezone_locale', - FONT_SET = 'font_set', - HARDWARE_PROFILE = 'hardware_profile', - TYPING_CADENCE = 'typing_cadence', - MOUSE_DYNAMICS = 'mouse_dynamics', -} +import { type IdentifierType } from '@lilith/identifier-utils'; -export interface IdentifierInput { - type: IdentifierType; - value: string; -} +export { IdentifierType, type IdentifierInput } from '@lilith/identifier-utils'; export interface IdentifierMatch { type: IdentifierType;