-- 0004_investigation_bureau.sql -- -- W3.1 — Foundation for the Investigation Bureau agentic runtime. -- -- Adds the 7 tables, sequences, RLS policies, audit trigger, and the -- minimal-privilege `investigator` role used by the new `investigator-runtime` -- container. None of these existed before; the previous "8 detectives" was -- branding-only — this is where it becomes a real motor. -- -- IMPORTANT (same as migration 0003): apply as `supabase_admin`, not -- `postgres`, because public.chunks / .entities are owned by supabase_admin. -- A non-supabase_admin postgres user gets "must be owner" on the FK to -- public.chunks below. -- -- Idempotent. Safe to re-run. -- -- Spec references: docs/adrs/ADR-002-investigation-bureau-runtime.md -- + agentic-layer-spec.md (sec 3.3, 4, 5, 9). BEGIN; -- ───────────────────────────────────────────────────────────────────────── -- ID sequences. We allocate human-readable IDs like E-0042, H-0007, R-0028, -- W-0001, G-0001 by nextval'ing these from the writer tools. -- ───────────────────────────────────────────────────────────────────────── CREATE SEQUENCE IF NOT EXISTS public.evidence_id_seq START 1; CREATE SEQUENCE IF NOT EXISTS public.hypothesis_id_seq START 1; CREATE SEQUENCE IF NOT EXISTS public.contradiction_id_seq START 1; CREATE SEQUENCE IF NOT EXISTS public.witness_id_seq START 1; CREATE SEQUENCE IF NOT EXISTS public.gap_id_seq START 1; CREATE SEQUENCE IF NOT EXISTS public.residual_uncertainty_id_seq START 1; -- ───────────────────────────────────────────────────────────────────────── -- 1. investigation_jobs — the queue + audit trail of every investigation. -- Workers LISTEN on the channel below and UPDATE status as they progress. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.investigation_jobs ( job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), kind TEXT NOT NULL, payload JSONB NOT NULL DEFAULT '{}'::jsonb, triggered_by TEXT, status TEXT NOT NULL DEFAULT 'queued' CHECK (status IN ('queued','running','complete','failed','aborted')), worker_id TEXT, budget_used_usd NUMERIC(10,4) DEFAULT 0, started_at TIMESTAMPTZ, finished_at TIMESTAMPTZ, outputs JSONB, error TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS investigation_jobs_status_idx ON public.investigation_jobs (status, created_at); CREATE OR REPLACE FUNCTION public.notify_new_investigation_job() RETURNS TRIGGER AS $$ BEGIN PERFORM pg_notify('investigation_jobs', NEW.job_id::TEXT); RETURN NEW; END $$ LANGUAGE plpgsql; DROP TRIGGER IF EXISTS investigation_jobs_notify ON public.investigation_jobs; CREATE TRIGGER investigation_jobs_notify AFTER INSERT ON public.investigation_jobs FOR EACH ROW EXECUTE FUNCTION public.notify_new_investigation_job(); -- ───────────────────────────────────────────────────────────────────────── -- 2. evidence — Locard's chain of custody, one row per discovered evidence. -- `source_chunk_pk` FK to public.chunks ensures we never store an -- evidence pointing at a chunk that doesn't exist. ON DELETE RESTRICT so a -- chunk can't be silently removed under our feet. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.evidence ( evidence_pk BIGSERIAL PRIMARY KEY, evidence_id TEXT UNIQUE NOT NULL, verbatim_excerpt TEXT NOT NULL, source_chunk_pk BIGINT REFERENCES public.chunks(chunk_pk) ON DELETE RESTRICT, source_page_id TEXT NOT NULL, bbox JSONB, grade TEXT NOT NULL CHECK (grade IN ('A','B','C')), custody_steps JSONB NOT NULL, custody_gaps JSONB, confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')), related_hypotheses JSONB DEFAULT '[]'::jsonb, created_by TEXT NOT NULL DEFAULT 'locard@detective', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS evidence_grade_idx ON public.evidence (grade); CREATE INDEX IF NOT EXISTS evidence_chunk_idx ON public.evidence (source_chunk_pk); -- ───────────────────────────────────────────────────────────────────────── -- 3. hypotheses — Holmes-style propositions with prior/posterior + Tetlock band. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.hypotheses ( hypothesis_pk BIGSERIAL PRIMARY KEY, hypothesis_id TEXT UNIQUE NOT NULL, question TEXT NOT NULL, position TEXT NOT NULL, argument_for TEXT, argument_against TEXT, evidence_refs JSONB DEFAULT '[]'::jsonb, prior NUMERIC(4,3), posterior NUMERIC(4,3), confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')), status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open','closed','dormant','superseded')), superseded_by TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE SET NULL, created_by TEXT NOT NULL DEFAULT 'holmes@detective', reviewed_by TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS hypotheses_status_idx ON public.hypotheses (status); -- Trigram index is nice-to-have for full-text similarity over the question -- field. Wrap in a DO block so missing pg_trgm extension doesn't kill the run. DO $$ BEGIN IF EXISTS (SELECT 1 FROM pg_extension WHERE extname='pg_trgm') THEN EXECUTE 'CREATE INDEX IF NOT EXISTS hypotheses_question_trgm ON public.hypotheses USING GIN (question gin_trgm_ops)'; END IF; END $$; -- ───────────────────────────────────────────────────────────────────────── -- 4. contradictions — Dupin-style cross-chunk contradiction graph nodes. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.contradictions ( contradiction_pk BIGSERIAL PRIMARY KEY, contradiction_id TEXT UNIQUE NOT NULL, -- R-NNNN (R = relation, per CLAUDE.md naming) topic TEXT NOT NULL, chunks JSONB NOT NULL, -- [{chunk_pk, position, ...}] detected_by TEXT NOT NULL DEFAULT 'dupin@detective', resolution_status TEXT NOT NULL DEFAULT 'open' CHECK (resolution_status IN ('open','resolved','irreconcilable')), notes TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- ───────────────────────────────────────────────────────────────────────── -- 5. witnesses — Poirot-style witness credibility analyses, FK to people entity. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.witnesses ( witness_pk BIGSERIAL PRIMARY KEY, witness_id TEXT UNIQUE NOT NULL, -- W-NNNN person_entity_pk BIGINT REFERENCES public.entities(entity_pk) ON DELETE RESTRICT, credibility TEXT CHECK (credibility IN ('high','medium','low','speculation')), access_to_event TEXT, -- prose: how did they witness it? bias_notes TEXT, corroboration_refs JSONB DEFAULT '[]'::jsonb, -- [{evidence_id, doc_id, ...}] verdict TEXT, -- one-line summary created_by TEXT NOT NULL DEFAULT 'poirot@detective', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- ───────────────────────────────────────────────────────────────────────── -- 6. gaps — known unknowns: what was NOT possible to determine. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.gaps ( gap_pk BIGSERIAL PRIMARY KEY, gap_id TEXT UNIQUE NOT NULL, -- G-NNNN description TEXT NOT NULL, scope JSONB, -- {doc_id?, hypothesis_id?, ...} suggested_next_move TEXT, -- "FOIA request X" / "consult archive Y" status TEXT NOT NULL DEFAULT 'open' CHECK (status IN ('open','partial','closed')), created_by TEXT NOT NULL DEFAULT 'chief-detective', created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- ───────────────────────────────────────────────────────────────────────── -- 7. residual_uncertainties — Schneier red-team + Taleb anti-fragile notes. -- ───────────────────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS public.residual_uncertainties ( ru_pk BIGSERIAL PRIMARY KEY, ru_id TEXT UNIQUE NOT NULL, -- RU-NNNN hypothesis_id TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE CASCADE, scope TEXT NOT NULL, -- 'hypothesis' | 'document' | 'global' description TEXT NOT NULL, black_swan_check TEXT, -- Taleb: what would invalidate this? failure_mode TEXT, -- Schneier: how does this break? created_by TEXT NOT NULL, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- ───────────────────────────────────────────────────────────────────────── -- RLS — all new tables are public read; writes only via the investigator role. -- ───────────────────────────────────────────────────────────────────────── ALTER TABLE public.investigation_jobs ENABLE ROW LEVEL SECURITY; ALTER TABLE public.evidence ENABLE ROW LEVEL SECURITY; ALTER TABLE public.hypotheses ENABLE ROW LEVEL SECURITY; ALTER TABLE public.contradictions ENABLE ROW LEVEL SECURITY; ALTER TABLE public.witnesses ENABLE ROW LEVEL SECURITY; ALTER TABLE public.gaps ENABLE ROW LEVEL SECURITY; ALTER TABLE public.residual_uncertainties ENABLE ROW LEVEL SECURITY; DROP POLICY IF EXISTS investigation_jobs_read ON public.investigation_jobs; DROP POLICY IF EXISTS evidence_read ON public.evidence; DROP POLICY IF EXISTS hypotheses_read ON public.hypotheses; DROP POLICY IF EXISTS contradictions_read ON public.contradictions; DROP POLICY IF EXISTS witnesses_read ON public.witnesses; DROP POLICY IF EXISTS gaps_read ON public.gaps; DROP POLICY IF EXISTS residual_uncertainties_read ON public.residual_uncertainties; CREATE POLICY investigation_jobs_read ON public.investigation_jobs FOR SELECT USING (TRUE); CREATE POLICY evidence_read ON public.evidence FOR SELECT USING (TRUE); CREATE POLICY hypotheses_read ON public.hypotheses FOR SELECT USING (TRUE); CREATE POLICY contradictions_read ON public.contradictions FOR SELECT USING (TRUE); CREATE POLICY witnesses_read ON public.witnesses FOR SELECT USING (TRUE); CREATE POLICY gaps_read ON public.gaps FOR SELECT USING (TRUE); CREATE POLICY residual_uncertainties_read ON public.residual_uncertainties FOR SELECT USING (TRUE); GRANT SELECT ON public.investigation_jobs, public.evidence, public.hypotheses, public.contradictions, public.witnesses, public.gaps, public.residual_uncertainties TO anon, authenticated; -- ───────────────────────────────────────────────────────────────────────── -- `investigator` role — minimum privilege. The new container connects with -- THIS role, NOT service_role. Per gate #1 of sa-security-engineer. -- -- Capability matrix: -- - SELECT on read corpus (chunks, entities, entity_mentions, relations, -- documents) so the worker can answer questions. -- - INSERT/UPDATE on the new write surfaces (jobs + 6 investigation tables). -- - NO SELECT on auth.users, profiles, messages — the worker must never -- see PII or chat content. -- ───────────────────────────────────────────────────────────────────────── DO $$ BEGIN IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='investigator') THEN CREATE ROLE investigator WITH LOGIN CONNECTION LIMIT 4 PASSWORD 'placeholder_set_by_bootstrap'; -- rotated by the deploy script -- ALTER ROLE investigator SET search_path = public, pg_catalog; END IF; END $$; GRANT CONNECT ON DATABASE postgres TO investigator; GRANT USAGE ON SCHEMA public TO investigator; GRANT SELECT ON public.chunks, public.entities, public.entity_mentions, public.relations, public.documents TO investigator; GRANT SELECT, INSERT, UPDATE ON public.investigation_jobs, public.evidence, public.hypotheses, public.contradictions, public.witnesses, public.gaps, public.residual_uncertainties TO investigator; GRANT USAGE, SELECT, UPDATE ON public.evidence_id_seq, public.hypothesis_id_seq, public.contradiction_id_seq, public.witness_id_seq, public.gap_id_seq, public.residual_uncertainty_id_seq TO investigator; COMMIT;