cocottetech/@platform/infrastructure/sql/migrations/0012_people.sql
Natalie b85e09f5e5 feat(people-service): 0012_people migration + db config + env example
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 11:29:15 -04:00

246 lines
13 KiB
PL/PgSQL

-- 0012_people.sql
--
-- People / relationship-intelligence domain — the canonical store for "the people
-- a provider talks to, screens, and prospects" (their inbound contacts, leads,
-- clients, peers-as-contacts). Distinct from `personas` (the provider's OWN
-- marketing faces, 0001) and `peer_*` (other providers in the network, 0006).
--
-- Ports the proven v2 `people_*` tableset (lilith-platform.live quinn.db, single
-- tenant) into V4's person-first multi-tenant model. v2 designed this schema with
-- `provider_slug DEFAULT 'quinn'` explicitly "for future cocotte-style multi-tenant";
-- this migration realises that: `provider_slug` → `user_id` (Person) + optional
-- `org_id` (Org), with the same RLS floor as every other per-provider table.
--
-- Universal target: whatsapp-lookup, mr-number, macsync/messenger processors, the
-- classifier, and prospector all upsert identities + append signals here via
-- platform.api's internal surface, instead of each owning a fragment.
--
-- ---------------------------------------------------------------------------
-- Extraction-readiness (DESIGN.md §6 Option B is deferred; this domain is the most
-- likely to graduate to its own DB first, because `people_signals` is the global
-- scale driver — one row per inbound message / screening / classification).
-- To keep that future split a lift-and-shift rather than a reschema:
-- * `user_id` / `org_id` are tenancy REFERENCE columns, NOT foreign keys to
-- users/orgs. The people domain therefore carries no hard cross-domain FK and
-- can move to a dedicated `people.db` without dragging the users/orgs tables.
-- (Deliberate, endorsed deviation from the FK-to-users(id) pattern of 0001.)
-- * Cross-domain reads (orgs, personas, content) go over platform.api, never via
-- SQL join — consistent with the "no cross-DB joins" invariant.
-- * `people_signals` is RANGE-partitioned by month from day one, so per-tenant
-- hash sub-partitioning or a replica-fronted split is a later, additive step.
-- Within-domain FKs (everything → people.id) stay: they are always co-located.
--
-- RLS convention (same as 0001-0006): the app sets the `app.current_user_id` GUC
-- per connection; `current_user_uuid()` reads it. Application layer is the primary
-- enforcer, RLS catches bugs. `org_members` is referenced by policy only (not by
-- data FK); at extraction time the policy adapts, the columns do not.
-- ---------------------------------------------------------------------------
BEGIN;
-- ---------------------------------------------------------------------------
-- people — one row per distinct person a tenant tracks.
-- ---------------------------------------------------------------------------
CREATE TABLE people (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL, -- owning Person (tenant); ref, not FK (extraction-ready)
org_id UUID NULL, -- optional Org overlay; ref, not FK
canonical_display_name TEXT,
primary_identity_id UUID, -- best identity; set after rows exist (no FK to avoid cycle)
notes TEXT,
reputation_aggregate JSONB, -- rolled-up reputation, or computed from signals
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_people_user ON people(user_id);
CREATE INDEX idx_people_org ON people(org_id) WHERE org_id IS NOT NULL;
CREATE INDEX idx_people_display ON people(user_id, lower(canonical_display_name)) WHERE canonical_display_name IS NOT NULL;
-- ---------------------------------------------------------------------------
-- people_identities — handles/addresses across channels; one person, many identities.
-- ---------------------------------------------------------------------------
CREATE TABLE people_identities (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
person_id UUID NOT NULL REFERENCES people(id) ON DELETE CASCADE,
user_id UUID NOT NULL, -- denormalised tenant key (direct RLS, no subquery)
org_id UUID NULL,
handle TEXT NOT NULL,
channel TEXT NOT NULL, -- imessage|sms|email|whatsapp|tryst|slixa|eros|signal|telegram|other (open vocab)
verified BOOLEAN NOT NULL DEFAULT false,
first_seen_at TIMESTAMPTZ,
last_seen_at TIMESTAMPTZ,
confidence REAL,
source TEXT, -- macsync|manual|wa_lookup|mr_number|...
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-- dedup is per-tenant: the same handle can legitimately be a different person
-- for two different providers, so the uniqueness key includes user_id.
UNIQUE (user_id, handle, channel)
);
CREATE INDEX idx_people_identities_person ON people_identities(person_id);
CREATE INDEX idx_people_identities_last_seen ON people_identities(user_id, last_seen_at DESC);
-- ---------------------------------------------------------------------------
-- people_relationships — directed graph edges between people.
-- ---------------------------------------------------------------------------
CREATE TABLE people_relationships (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id UUID NOT NULL,
org_id UUID NULL,
subject_person_id UUID NOT NULL REFERENCES people(id) ON DELETE CASCADE,
object_person_id UUID NOT NULL REFERENCES people(id) ON DELETE CASCADE,
rel_type TEXT NOT NULL, -- client|prospect|friend|family|vendor|peer|spam|scam|blocked|unknown|...
confidence REAL,
source_feature TEXT, -- prospector|classifier|manual|wa_lookup|...
valid_from TIMESTAMPTZ,
valid_to TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX idx_people_rel_subject ON people_relationships(subject_person_id, rel_type);
CREATE INDEX idx_people_rel_object ON people_relationships(object_person_id, rel_type);
-- ---------------------------------------------------------------------------
-- people_signals — append-mostly evidence stream (the scale driver).
-- RANGE-partitioned by month so the high-volume table stays prunable and a later
-- per-tenant or replica-fronted split is additive. A DEFAULT partition catches any
-- row outside a declared month so inserts never fail before maintenance runs.
-- ---------------------------------------------------------------------------
CREATE TABLE people_signals (
id BIGINT GENERATED BY DEFAULT AS IDENTITY,
person_id UUID NOT NULL REFERENCES people(id) ON DELETE CASCADE,
user_id UUID NOT NULL,
org_id UUID NULL,
signal_type TEXT NOT NULL, -- screening_mrnumber|screening_whatsapp|classification_v4|message_inbound|message_outbound|pii_*|reputation_*|location_inferred|intent_booking|chase_blocked|funnel_stage_*|...
value_text TEXT,
value_numeric NUMERIC,
value_jsonb JSONB,
confidence REAL,
source_feature TEXT NOT NULL, -- whatsapp|mr_number|macsync|relationship_resolver|prospect_classifier|pii_extractor|manual|...
source_handle TEXT,
source_channel TEXT,
raw_ref TEXT, -- optional trace back to a message/screening id
occurred_at TIMESTAMPTZ NOT NULL DEFAULT now(),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (id, occurred_at) -- partition key must be part of PK
) PARTITION BY RANGE (occurred_at);
-- Catch-all partition: guarantees inserts succeed before/without monthly partitions.
CREATE TABLE people_signals_default PARTITION OF people_signals DEFAULT;
-- Monthly partition template (run by the partition-maintenance job ahead of time):
-- CREATE TABLE people_signals_y2026m07 PARTITION OF people_signals
-- FOR VALUES FROM ('2026-07-01') TO ('2026-08-01');
-- ALTER TABLE people_signals_y2026m07 ENABLE ROW LEVEL SECURITY; -- defense-in-depth
-- Pre-creating months lets the planner prune; the DEFAULT partition only backstops.
-- RLS on the parent governs access-via-parent (the only app path); enabling it on
-- each partition too makes direct-partition queries default-deny — belt and braces.
ALTER TABLE people_signals_default ENABLE ROW LEVEL SECURITY;
-- Indexes declared on the parent propagate to every partition.
CREATE INDEX idx_people_signals_person_type ON people_signals(person_id, signal_type, occurred_at DESC);
CREATE INDEX idx_people_signals_tenant ON people_signals(user_id, occurred_at DESC);
CREATE INDEX idx_people_signals_source ON people_signals(source_feature, occurred_at DESC);
-- ---------------------------------------------------------------------------
-- updated_at touch triggers (signals is append-only → no trigger).
-- ---------------------------------------------------------------------------
DO $$
DECLARE t TEXT;
BEGIN
FOREACH t IN ARRAY ARRAY['people','people_identities','people_relationships'] LOOP
EXECUTE format(
'CREATE TRIGGER trg_touch_%1$s BEFORE UPDATE ON %1$s
FOR EACH ROW EXECUTE FUNCTION touch_updated_at();', t);
END LOOP;
END $$;
-- ---------------------------------------------------------------------------
-- Row-level security — same per-provider floor as 0001-0006.
-- Every table carries user_id (+ optional org_id) so the policy is a direct check
-- with no correlated subquery on the hot signals path (org overlay aside).
-- ---------------------------------------------------------------------------
ALTER TABLE people ENABLE ROW LEVEL SECURITY;
ALTER TABLE people_identities ENABLE ROW LEVEL SECURITY;
ALTER TABLE people_relationships ENABLE ROW LEVEL SECURITY;
ALTER TABLE people_signals ENABLE ROW LEVEL SECURITY;
DO $$
DECLARE t TEXT;
BEGIN
FOREACH t IN ARRAY ARRAY[
'people','people_identities','people_relationships','people_signals'
] LOOP
EXECUTE format($p$
CREATE POLICY tenant_isolation ON %1$s
USING (
user_id = current_user_uuid()
OR (org_id IS NOT NULL
AND org_id IN (SELECT org_id FROM org_members
WHERE user_id = current_user_uuid()))
)
WITH CHECK (
user_id = current_user_uuid()
OR (org_id IS NOT NULL
AND org_id IN (SELECT org_id FROM org_members
WHERE user_id = current_user_uuid()))
);
$p$, t);
END LOOP;
END $$;
-- ---------------------------------------------------------------------------
-- Read models. security_invoker=true so the querying tenant's RLS on the base
-- tables flows through the view (no owner-bypass leak across tenants).
-- ---------------------------------------------------------------------------
CREATE VIEW people_relationship_summary
WITH (security_invoker = true) AS
SELECT
p.id,
p.user_id,
p.org_id,
p.canonical_display_name,
p.created_at,
COUNT(DISTINCT pi.id) AS identity_count,
COUNT(DISTINCT s.id) FILTER (WHERE s.signal_type LIKE 'message_%') AS message_signal_count,
COUNT(DISTINCT s.id) FILTER (WHERE s.signal_type LIKE 'screening_%') AS screening_count,
MAX(s.occurred_at) AS last_signal_at,
(SELECT rel_type FROM people_relationships r
WHERE r.subject_person_id = p.id OR r.object_person_id = p.id
ORDER BY r.created_at DESC LIMIT 1) AS latest_rel_type
FROM people p
LEFT JOIN people_identities pi ON pi.person_id = p.id
LEFT JOIN people_signals s ON s.person_id = p.id
GROUP BY p.id, p.user_id, p.org_id, p.canonical_display_name, p.created_at;
CREATE VIEW people_engagement_view
WITH (security_invoker = true) AS
SELECT
p.id,
p.user_id,
p.canonical_display_name,
COUNT(*) FILTER (WHERE s.signal_type = 'message_inbound') AS inbound_messages,
COUNT(*) FILTER (WHERE s.signal_type = 'message_outbound') AS outbound_messages,
MAX(s.occurred_at) FILTER (WHERE s.signal_type LIKE 'message_%') AS last_message_at,
COUNT(*) FILTER (WHERE s.signal_type LIKE 'screening_%' AND s.confidence > 0.5) AS positive_screenings,
(SELECT value_text FROM people_signals z
WHERE z.person_id = p.id AND z.signal_type LIKE 'classification_%'
ORDER BY z.occurred_at DESC LIMIT 1) AS latest_classification
FROM people p
LEFT JOIN people_signals s ON s.person_id = p.id
GROUP BY p.id, p.user_id, p.canonical_display_name;
CREATE VIEW people_risk_view
WITH (security_invoker = true) AS
SELECT
p.id,
p.user_id,
p.canonical_display_name,
COUNT(*) FILTER (WHERE s.signal_type LIKE 'screening_%' AND s.value_text = 'denied') AS denied_screenings,
BOOL_OR(s.value_jsonb->>'chase_blocked' = 'true' OR s.signal_type = 'chase_blocked') AS is_chase_blocked,
MAX(s.confidence) FILTER (WHERE s.signal_type LIKE 'screening_%' AND s.value_text = 'denied') AS max_risk_confidence
FROM people p
LEFT JOIN people_signals s ON s.person_id = p.id
GROUP BY p.id, p.user_id, p.canonical_display_name;
COMMIT;