disclosure-bureau/infra/supabase/migrations/0004_investigation_bureau.sql

276 lines
16 KiB
MySQL
Raw Permalink Normal View History

W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard Migrations: - 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence, hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id sequences, pg_notify trigger on investigation_jobs, RLS read-only public, investigator role with least-privilege grants (no service_role). - 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE policies bound to investigator + service_role + postgres (RLS with only a SELECT policy was silently blocking the worker's claim UPDATE). investigator-runtime/ (new Bun + TS container): - src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool, healthcheck file, graceful SIGTERM shutdown. - src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard). Marks job failed when all per-item outputs error; surfaces first errors. - src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool + dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc), claude -p subprocess with quota detection (api_error_status=429). - src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps), resolve chunk_pk via FK, verify verbatim_excerpt actually appears in chunk content, INSERT + render case/evidence/E-NNNN.md + audit. - src/detectives/locard.ts: load chunk → call Claude with locard.md system prompt → parse strict JSON → call writeEvidence locally. - Dockerfile installs `claude` CLI (OAuth) at build time. Compose: - new `investigator` service builds from investigator-runtime/, connects with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap. Web: - /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1). POST creates a job, GET polls status. For W3.6 it becomes the chat tool. End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch → claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed isQuota error, marks job failed with surfaced reason. Architecture proven; quota reset enables real evidence creation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 22:49:33 +00:00
-- 0004_investigation_bureau.sql
--
-- W3.1 — Foundation for the Investigation Bureau agentic runtime.
--
-- Adds the 7 tables, sequences, RLS policies, audit trigger, and the
-- minimal-privilege `investigator` role used by the new `investigator-runtime`
-- container. None of these existed before; the previous "8 detectives" was
-- branding-only — this is where it becomes a real motor.
--
-- IMPORTANT (same as migration 0003): apply as `supabase_admin`, not
-- `postgres`, because public.chunks / .entities are owned by supabase_admin.
-- A non-supabase_admin postgres user gets "must be owner" on the FK to
-- public.chunks below.
--
-- Idempotent. Safe to re-run.
--
-- Spec references: docs/adrs/ADR-002-investigation-bureau-runtime.md
-- + agentic-layer-spec.md (sec 3.3, 4, 5, 9).
BEGIN;
-- ─────────────────────────────────────────────────────────────────────────
-- ID sequences. We allocate human-readable IDs like E-0042, H-0007, R-0028,
-- W-0001, G-0001 by nextval'ing these from the writer tools.
-- ─────────────────────────────────────────────────────────────────────────
CREATE SEQUENCE IF NOT EXISTS public.evidence_id_seq START 1;
CREATE SEQUENCE IF NOT EXISTS public.hypothesis_id_seq START 1;
CREATE SEQUENCE IF NOT EXISTS public.contradiction_id_seq START 1;
CREATE SEQUENCE IF NOT EXISTS public.witness_id_seq START 1;
CREATE SEQUENCE IF NOT EXISTS public.gap_id_seq START 1;
CREATE SEQUENCE IF NOT EXISTS public.residual_uncertainty_id_seq START 1;
-- ─────────────────────────────────────────────────────────────────────────
-- 1. investigation_jobs — the queue + audit trail of every investigation.
-- Workers LISTEN on the channel below and UPDATE status as they progress.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.investigation_jobs (
job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
kind TEXT NOT NULL,
payload JSONB NOT NULL DEFAULT '{}'::jsonb,
triggered_by TEXT,
status TEXT NOT NULL DEFAULT 'queued'
CHECK (status IN ('queued','running','complete','failed','aborted')),
worker_id TEXT,
budget_used_usd NUMERIC(10,4) DEFAULT 0,
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
outputs JSONB,
error TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS investigation_jobs_status_idx
ON public.investigation_jobs (status, created_at);
CREATE OR REPLACE FUNCTION public.notify_new_investigation_job() RETURNS TRIGGER AS $$
BEGIN
PERFORM pg_notify('investigation_jobs', NEW.job_id::TEXT);
RETURN NEW;
END
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS investigation_jobs_notify ON public.investigation_jobs;
CREATE TRIGGER investigation_jobs_notify
AFTER INSERT ON public.investigation_jobs
FOR EACH ROW EXECUTE FUNCTION public.notify_new_investigation_job();
-- ─────────────────────────────────────────────────────────────────────────
-- 2. evidence — Locard's chain of custody, one row per discovered evidence.
-- `source_chunk_pk` FK to public.chunks ensures we never store an
-- evidence pointing at a chunk that doesn't exist. ON DELETE RESTRICT so a
-- chunk can't be silently removed under our feet.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.evidence (
evidence_pk BIGSERIAL PRIMARY KEY,
evidence_id TEXT UNIQUE NOT NULL,
verbatim_excerpt TEXT NOT NULL,
source_chunk_pk BIGINT REFERENCES public.chunks(chunk_pk) ON DELETE RESTRICT,
source_page_id TEXT NOT NULL,
bbox JSONB,
grade TEXT NOT NULL CHECK (grade IN ('A','B','C')),
custody_steps JSONB NOT NULL,
custody_gaps JSONB,
confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')),
related_hypotheses JSONB DEFAULT '[]'::jsonb,
created_by TEXT NOT NULL DEFAULT 'locard@detective',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS evidence_grade_idx ON public.evidence (grade);
CREATE INDEX IF NOT EXISTS evidence_chunk_idx ON public.evidence (source_chunk_pk);
-- ─────────────────────────────────────────────────────────────────────────
-- 3. hypotheses — Holmes-style propositions with prior/posterior + Tetlock band.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.hypotheses (
hypothesis_pk BIGSERIAL PRIMARY KEY,
hypothesis_id TEXT UNIQUE NOT NULL,
question TEXT NOT NULL,
position TEXT NOT NULL,
argument_for TEXT,
argument_against TEXT,
evidence_refs JSONB DEFAULT '[]'::jsonb,
prior NUMERIC(4,3),
posterior NUMERIC(4,3),
confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')),
status TEXT NOT NULL DEFAULT 'open'
CHECK (status IN ('open','closed','dormant','superseded')),
superseded_by TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE SET NULL,
created_by TEXT NOT NULL DEFAULT 'holmes@detective',
reviewed_by TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS hypotheses_status_idx ON public.hypotheses (status);
-- Trigram index is nice-to-have for full-text similarity over the question
-- field. Wrap in a DO block so missing pg_trgm extension doesn't kill the run.
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname='pg_trgm') THEN
EXECUTE 'CREATE INDEX IF NOT EXISTS hypotheses_question_trgm ON public.hypotheses USING GIN (question gin_trgm_ops)';
END IF;
END
$$;
-- ─────────────────────────────────────────────────────────────────────────
-- 4. contradictions — Dupin-style cross-chunk contradiction graph nodes.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.contradictions (
contradiction_pk BIGSERIAL PRIMARY KEY,
contradiction_id TEXT UNIQUE NOT NULL, -- R-NNNN (R = relation, per CLAUDE.md naming)
topic TEXT NOT NULL,
chunks JSONB NOT NULL, -- [{chunk_pk, position, ...}]
detected_by TEXT NOT NULL DEFAULT 'dupin@detective',
resolution_status TEXT NOT NULL DEFAULT 'open'
CHECK (resolution_status IN ('open','resolved','irreconcilable')),
notes TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─────────────────────────────────────────────────────────────────────────
-- 5. witnesses — Poirot-style witness credibility analyses, FK to people entity.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.witnesses (
witness_pk BIGSERIAL PRIMARY KEY,
witness_id TEXT UNIQUE NOT NULL, -- W-NNNN
person_entity_pk BIGINT REFERENCES public.entities(entity_pk) ON DELETE RESTRICT,
credibility TEXT CHECK (credibility IN ('high','medium','low','speculation')),
access_to_event TEXT, -- prose: how did they witness it?
bias_notes TEXT,
corroboration_refs JSONB DEFAULT '[]'::jsonb, -- [{evidence_id, doc_id, ...}]
verdict TEXT, -- one-line summary
created_by TEXT NOT NULL DEFAULT 'poirot@detective',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─────────────────────────────────────────────────────────────────────────
-- 6. gaps — known unknowns: what was NOT possible to determine.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.gaps (
gap_pk BIGSERIAL PRIMARY KEY,
gap_id TEXT UNIQUE NOT NULL, -- G-NNNN
description TEXT NOT NULL,
scope JSONB, -- {doc_id?, hypothesis_id?, ...}
suggested_next_move TEXT, -- "FOIA request X" / "consult archive Y"
status TEXT NOT NULL DEFAULT 'open'
CHECK (status IN ('open','partial','closed')),
created_by TEXT NOT NULL DEFAULT 'chief-detective',
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─────────────────────────────────────────────────────────────────────────
-- 7. residual_uncertainties — Schneier red-team + Taleb anti-fragile notes.
-- ─────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS public.residual_uncertainties (
ru_pk BIGSERIAL PRIMARY KEY,
ru_id TEXT UNIQUE NOT NULL, -- RU-NNNN
hypothesis_id TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE CASCADE,
scope TEXT NOT NULL, -- 'hypothesis' | 'document' | 'global'
description TEXT NOT NULL,
black_swan_check TEXT, -- Taleb: what would invalidate this?
failure_mode TEXT, -- Schneier: how does this break?
created_by TEXT NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- ─────────────────────────────────────────────────────────────────────────
-- RLS — all new tables are public read; writes only via the investigator role.
-- ─────────────────────────────────────────────────────────────────────────
ALTER TABLE public.investigation_jobs ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.evidence ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.hypotheses ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.contradictions ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.witnesses ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.gaps ENABLE ROW LEVEL SECURITY;
ALTER TABLE public.residual_uncertainties ENABLE ROW LEVEL SECURITY;
DROP POLICY IF EXISTS investigation_jobs_read ON public.investigation_jobs;
DROP POLICY IF EXISTS evidence_read ON public.evidence;
DROP POLICY IF EXISTS hypotheses_read ON public.hypotheses;
DROP POLICY IF EXISTS contradictions_read ON public.contradictions;
DROP POLICY IF EXISTS witnesses_read ON public.witnesses;
DROP POLICY IF EXISTS gaps_read ON public.gaps;
DROP POLICY IF EXISTS residual_uncertainties_read ON public.residual_uncertainties;
CREATE POLICY investigation_jobs_read ON public.investigation_jobs FOR SELECT USING (TRUE);
CREATE POLICY evidence_read ON public.evidence FOR SELECT USING (TRUE);
CREATE POLICY hypotheses_read ON public.hypotheses FOR SELECT USING (TRUE);
CREATE POLICY contradictions_read ON public.contradictions FOR SELECT USING (TRUE);
CREATE POLICY witnesses_read ON public.witnesses FOR SELECT USING (TRUE);
CREATE POLICY gaps_read ON public.gaps FOR SELECT USING (TRUE);
CREATE POLICY residual_uncertainties_read ON public.residual_uncertainties FOR SELECT USING (TRUE);
GRANT SELECT ON
public.investigation_jobs,
public.evidence,
public.hypotheses,
public.contradictions,
public.witnesses,
public.gaps,
public.residual_uncertainties
TO anon, authenticated;
-- ─────────────────────────────────────────────────────────────────────────
-- `investigator` role — minimum privilege. The new container connects with
-- THIS role, NOT service_role. Per gate #1 of sa-security-engineer.
--
-- Capability matrix:
-- - SELECT on read corpus (chunks, entities, entity_mentions, relations,
-- documents) so the worker can answer questions.
-- - INSERT/UPDATE on the new write surfaces (jobs + 6 investigation tables).
-- - NO SELECT on auth.users, profiles, messages — the worker must never
-- see PII or chat content.
-- ─────────────────────────────────────────────────────────────────────────
DO $$
BEGIN
IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='investigator') THEN
CREATE ROLE investigator WITH LOGIN
CONNECTION LIMIT 4
PASSWORD 'placeholder_set_by_bootstrap'; -- rotated by the deploy script
-- ALTER ROLE investigator SET search_path = public, pg_catalog;
END IF;
END
$$;
GRANT CONNECT ON DATABASE postgres TO investigator;
GRANT USAGE ON SCHEMA public TO investigator;
GRANT SELECT ON
public.chunks,
public.entities,
public.entity_mentions,
public.relations,
public.documents
TO investigator;
GRANT SELECT, INSERT, UPDATE ON
public.investigation_jobs,
public.evidence,
public.hypotheses,
public.contradictions,
public.witnesses,
public.gaps,
public.residual_uncertainties
TO investigator;
GRANT USAGE, SELECT, UPDATE ON
public.evidence_id_seq,
public.hypothesis_id_seq,
public.contradiction_id_seq,
public.witness_id_seq,
public.gap_id_seq,
public.residual_uncertainty_id_seq
TO investigator;
COMMIT;