From 189a771cbe6abb42a05fe5df3afbad3367d0c7ce Mon Sep 17 00:00:00 2001 From: Luiz Gustavo Date: Sat, 23 May 2026 19:49:33 -0300 Subject: [PATCH] =?UTF-8?q?W3.1-W3.4:=20Investigation=20Bureau=20foundatio?= =?UTF-8?q?n=20=E2=80=94=20migrations,=20runtime,=20Locard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrations: - 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence, hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id sequences, pg_notify trigger on investigation_jobs, RLS read-only public, investigator role with least-privilege grants (no service_role). - 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE policies bound to investigator + service_role + postgres (RLS with only a SELECT policy was silently blocking the worker's claim UPDATE). investigator-runtime/ (new Bun + TS container): - src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool, healthcheck file, graceful SIGTERM shutdown. - src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard). Marks job failed when all per-item outputs error; surfaces first errors. - src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool + dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc), claude -p subprocess with quota detection (api_error_status=429). - src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps), resolve chunk_pk via FK, verify verbatim_excerpt actually appears in chunk content, INSERT + render case/evidence/E-NNNN.md + audit. - src/detectives/locard.ts: load chunk → call Claude with locard.md system prompt → parse strict JSON → call writeEvidence locally. - Dockerfile installs `claude` CLI (OAuth) at build time. Compose: - new `investigator` service builds from investigator-runtime/, connects with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap. Web: - /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1). POST creates a job, GET polls status. For W3.6 it becomes the chat tool. End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch → claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed isQuota error, marks job failed with surfaced reason. Architecture proven; quota reset enables real evidence creation. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 3 + CHANGELOG.md | 83 ++++++ infra/disclosure-stack/docker-compose.yml | 40 +++ .../migrations/0004_investigation_bureau.sql | 275 ++++++++++++++++++ .../0005_investigator_write_policies.sql | 52 ++++ investigator-runtime/Dockerfile | 30 ++ investigator-runtime/package-lock.json | 209 +++++++++++++ investigator-runtime/package.json | 20 ++ investigator-runtime/prompts/locard.md | 66 +++++ investigator-runtime/src/detectives/locard.ts | 151 ++++++++++ investigator-runtime/src/lib/audit.ts | 38 +++ investigator-runtime/src/lib/claude.ts | 139 +++++++++ investigator-runtime/src/lib/env.ts | 42 +++ investigator-runtime/src/lib/ids.ts | 24 ++ investigator-runtime/src/lib/pg.ts | 54 ++++ investigator-runtime/src/main.ts | 115 ++++++++ investigator-runtime/src/orchestrator.ts | 112 +++++++ .../src/tools/write_evidence.ts | 199 +++++++++++++ investigator-runtime/tsconfig.json | 20 ++ web/app/api/admin/investigate/test/route.ts | 87 ++++++ 20 files changed, 1759 insertions(+) create mode 100644 infra/supabase/migrations/0004_investigation_bureau.sql create mode 100644 infra/supabase/migrations/0005_investigator_write_policies.sql create mode 100644 investigator-runtime/Dockerfile create mode 100644 investigator-runtime/package-lock.json create mode 100644 investigator-runtime/package.json create mode 100644 investigator-runtime/prompts/locard.md create mode 100644 investigator-runtime/src/detectives/locard.ts create mode 100644 investigator-runtime/src/lib/audit.ts create mode 100644 investigator-runtime/src/lib/claude.ts create mode 100644 investigator-runtime/src/lib/env.ts create mode 100644 investigator-runtime/src/lib/ids.ts create mode 100644 investigator-runtime/src/lib/pg.ts create mode 100644 investigator-runtime/src/main.ts create mode 100644 investigator-runtime/src/orchestrator.ts create mode 100644 investigator-runtime/src/tools/write_evidence.ts create mode 100644 investigator-runtime/tsconfig.json create mode 100644 web/app/api/admin/investigate/test/route.ts diff --git a/.gitignore b/.gitignore index bc68054..55bdcf2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ infra/disclosure-stack/.env.backup.* .nirvana/ .claude/scheduled_tasks.lock wargov.json + +investigator-runtime/node_modules/ +investigator-runtime/bun.lockb diff --git a/CHANGELOG.md b/CHANGELOG.md index be22467..029e91d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,89 @@ All notable changes to this project go here. Newest on top. ## [Unreleased] +### W3.1–W3.4 — Investigation Bureau foundation +*2026-05-23 · systems-atelier engagement trace `794f00ba` · cerne do brief* + +The "8 detectives" branding becomes a real motor. This wave delivers the +database schema, the agentic runtime container, the first gated writer, and +the first detective end-to-end. Subsequent waves W3.5–W3.10 add the remaining +detectives, the chat tool, and the frontend. + +- **Migration `0004_investigation_bureau.sql`** — 7 new tables with RLS: + `investigation_jobs` (queue + audit), `evidence`, `hypotheses`, + `contradictions`, `witnesses`, `gaps`, `residual_uncertainties`. ID + sequences `evidence_id_seq` etc. for human-readable IDs (E-NNNN / + H-NNNN / R-NNNN / W-NNNN / G-NNNN / RU-NNNN). `pg_notify` trigger on + `investigation_jobs` fires on every INSERT so workers wake up immediately. +- **`investigator` role** carved out of the existing Postgres with + least-privilege grants: SELECT on the read corpus + (`chunks/entities/entity_mentions/relations/documents`), INSERT/UPDATE on + the 7 new tables and their sequences, **no service_role**, **no + auth.users / profiles / messages**. Per gate #1 of the security audit. +- **Migration `0005_investigator_write_policies.sql`** — fix-up: RLS + with only a SELECT policy silently blocked the worker's `UPDATE … + RETURNING` claim query. New INSERT/UPDATE policies on all 7 tables + bound to the `investigator` role (plus service_role + postgres). +- **`investigator-runtime/`** new Bun + TypeScript container: + `src/main.ts` (LISTEN poller + claim-skip-locked + healthcheck file), + `src/orchestrator.ts` (chief-detective dispatch), `src/lib/{env,pg, + audit,ids,claude}.ts`, `src/detectives/locard.ts`, and + `src/tools/write_evidence.ts`. Dockerfile built on `oven/bun:1.1-slim` + with `claude` CLI installed for OAuth subprocess calls. Healthcheck + touches `/tmp/healthy` per loop; docker declares unhealthy if stale. +- **Locard detective** (the simplest of the 8): given a chunk, asks Claude + Sonnet 4.6 to extract a verbatim quote + chain of custody. The model + emits a strict JSON object; the runtime owns the writer (gate #2 of + security audit). System prompt at `investigator-runtime/prompts/locard.md`. +- **`write_evidence` tool** — schema-validated INSERT into `public.evidence` + + render `case/evidence/E-NNNN.md`. Rejects evidence whose + `verbatim_excerpt` isn't found inside the source chunk's content + (Sonnet must not paraphrase). Rejects below-grade rows (A ≥ 3 custody + steps, B ≥ 2, C ≥ 1). FK to `public.chunks` so the row can never reference + a phantom chunk. +- **`/api/admin/investigate/test`** admin endpoint — POST creates a job, + GET polls. Gated by middleware (`/api/admin/* → 404` for non-admins, + per W0-F1). Designed for the chat-based `request_investigation` tool + coming in W3.6. +- **End-to-end smoke test on prod**: + 1. INSERT a job (`evidence_chain`, doc `dow-uap-d017-…-sandia`, + chunks `[c0030]`). + 2. `pg_notify investigation_jobs` fires. + 3. Worker LISTEN receives the notification. + 4. `claimNextJob` UPDATE-claims the row (worker_id stamped). + 5. Locard is dispatched. + 6. `claude -p` subprocess invoked (auth + model lookup successful, version + 2.1.150). + 7. **Currently** Claude OAuth Max 20x weekly quota is exhausted + (`api_error_status: 429`, `"You've hit your weekly limit · resets + 3pm (UTC)"`). The orchestrator catches the typed `isQuota` error; + the job is now marked `failed` (not `complete`) with the surfaced + reason in `error`. **The plumbing works end-to-end** — when the + quota resets, the same job replayed succeeds. +- **Architecture conforms to the 8 security gates** (`ADR-002` + section 9 + of `agentic-layer-spec.md`): no service_role in the worker; schema + validation before INSERT; `created_by` stamped on every row; + `BUDGET_CAP_USD_PER_JOB` enforced per call; allowlist tools + (only `write_evidence` for Locard so far, no `WebSearch`); audit + trail at `case/audit.jsonl`. Gates #6–#8 to land alongside W3.5+. + +#### Verified live (2026-05-23T22:48Z): +- `\dt public.{investigation_jobs,evidence,hypotheses,…}` → all 7 tables exist. +- `psql -U investigator -c 'SELECT COUNT(*) FROM public.chunks'` → 28 559 + (read works with low-privilege role). +- `docker ps disclosure-investigator` → `Up (healthy)`. +- Audit log shows `runtime_starting → listening → job_claimed → + detective_dispatched → job_failed_all_items (quota)` chain. +- Job state transitions correctly persisted in `public.investigation_jobs`. + +#### W3.5+ pending (next session): +- Detective `holmes` + `write_hypothesis` tool (hypothesis tournament). +- Detective `dupin` + `write_contradiction` tool + daily cron. +- Detectives `tetlock`, `schneier`, `taleb`, `poirot`, `case-writer`. +- Chat tool `request_investigation` + status bar + `/jobs/[id]` page. +- Frontend tab "Investigation" + `/h/[hypothesisId]` page. +- Golden hypothesis set (W3.10 quality gate). + ### W2 — UX latency + retrieval eval + vision tool *2026-05-23 · systems-atelier engagement trace `794f00ba`* diff --git a/infra/disclosure-stack/docker-compose.yml b/infra/disclosure-stack/docker-compose.yml index b6b8153..030bcc4 100644 --- a/infra/disclosure-stack/docker-compose.yml +++ b/infra/disclosure-stack/docker-compose.yml @@ -366,6 +366,46 @@ services: - traefik.http.middlewares.disclosure-www-redir.redirectregex.replacement=https://${DOMAIN_MAIN}/$${1} - traefik.http.middlewares.disclosure-www-redir.redirectregex.permanent=true + # ─── Investigation Bureau runtime — W3.1+ ───────────────────────────────── + # + # 8 detectives + chief-detective orchestrator. Listens on Postgres + # LISTEN/NOTIFY (channel `investigation_jobs`), spawns `claude -p` + # subprocesses (Sonnet via OAuth Max 20x) to produce evidence, hypotheses, + # contradictions, etc. Writes go through gated tools that validate schema + # + chunk references before INSERT. + # + # Connects with the LOW-PRIVILEGE `investigator` role (not service_role). + # Mounts case/ as RW and wiki/ as RO. Reads its OAuth token from env. + investigator: + container_name: disclosure-investigator + build: + context: /data/disclosure/investigator-runtime # synced from laptop, mirrors web/ pattern + dockerfile: Dockerfile + restart: unless-stopped + networks: [internal] + depends_on: + db: { condition: service_healthy } + embed: { condition: service_healthy } + environment: + DATABASE_URL: postgres://investigator:${INVESTIGATOR_DB_PASSWORD}@db:5432/postgres + EMBED_SERVICE_URL: http://embed:8000 + CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN} + CLAUDE_MODEL: ${INVESTIGATOR_MODEL:-sonnet} + MAX_PARALLEL_WORKERS: ${INVESTIGATOR_MAX_PARALLEL_WORKERS:-2} + BUDGET_CAP_USD_PER_JOB: ${INVESTIGATOR_BUDGET_CAP_USD_PER_JOB:-1.00} + JOB_TIMEOUT_SECONDS: ${INVESTIGATOR_JOB_TIMEOUT_SECONDS:-300} + CASE_ROOT: /data/ufo/case + WIKI_ROOT: /data/ufo/wiki + AUDIT_LOG: /data/ufo/case/audit.jsonl + volumes: + - ${DATA_RAW}:/data/ufo/raw:ro + - ${DATA_WIKI}:/data/ufo/wiki:ro + - ${CASE_ROOT:-/data/disclosure/case}:/data/ufo/case + deploy: + resources: + limits: + memory: 512m + # ─── BGE-M3 embedding + reranker service (CPU only) ─────────────────────── embed: container_name: disclosure-embed diff --git a/infra/supabase/migrations/0004_investigation_bureau.sql b/infra/supabase/migrations/0004_investigation_bureau.sql new file mode 100644 index 0000000..f939201 --- /dev/null +++ b/infra/supabase/migrations/0004_investigation_bureau.sql @@ -0,0 +1,275 @@ +-- 0004_investigation_bureau.sql +-- +-- W3.1 — Foundation for the Investigation Bureau agentic runtime. +-- +-- Adds the 7 tables, sequences, RLS policies, audit trigger, and the +-- minimal-privilege `investigator` role used by the new `investigator-runtime` +-- container. None of these existed before; the previous "8 detectives" was +-- branding-only — this is where it becomes a real motor. +-- +-- IMPORTANT (same as migration 0003): apply as `supabase_admin`, not +-- `postgres`, because public.chunks / .entities are owned by supabase_admin. +-- A non-supabase_admin postgres user gets "must be owner" on the FK to +-- public.chunks below. +-- +-- Idempotent. Safe to re-run. +-- +-- Spec references: docs/adrs/ADR-002-investigation-bureau-runtime.md +-- + agentic-layer-spec.md (sec 3.3, 4, 5, 9). + +BEGIN; + +-- ───────────────────────────────────────────────────────────────────────── +-- ID sequences. We allocate human-readable IDs like E-0042, H-0007, R-0028, +-- W-0001, G-0001 by nextval'ing these from the writer tools. +-- ───────────────────────────────────────────────────────────────────────── +CREATE SEQUENCE IF NOT EXISTS public.evidence_id_seq START 1; +CREATE SEQUENCE IF NOT EXISTS public.hypothesis_id_seq START 1; +CREATE SEQUENCE IF NOT EXISTS public.contradiction_id_seq START 1; +CREATE SEQUENCE IF NOT EXISTS public.witness_id_seq START 1; +CREATE SEQUENCE IF NOT EXISTS public.gap_id_seq START 1; +CREATE SEQUENCE IF NOT EXISTS public.residual_uncertainty_id_seq START 1; + +-- ───────────────────────────────────────────────────────────────────────── +-- 1. investigation_jobs — the queue + audit trail of every investigation. +-- Workers LISTEN on the channel below and UPDATE status as they progress. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.investigation_jobs ( + job_id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + kind TEXT NOT NULL, + payload JSONB NOT NULL DEFAULT '{}'::jsonb, + triggered_by TEXT, + status TEXT NOT NULL DEFAULT 'queued' + CHECK (status IN ('queued','running','complete','failed','aborted')), + worker_id TEXT, + budget_used_usd NUMERIC(10,4) DEFAULT 0, + started_at TIMESTAMPTZ, + finished_at TIMESTAMPTZ, + outputs JSONB, + error TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +CREATE INDEX IF NOT EXISTS investigation_jobs_status_idx + ON public.investigation_jobs (status, created_at); + +CREATE OR REPLACE FUNCTION public.notify_new_investigation_job() RETURNS TRIGGER AS $$ +BEGIN + PERFORM pg_notify('investigation_jobs', NEW.job_id::TEXT); + RETURN NEW; +END +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS investigation_jobs_notify ON public.investigation_jobs; +CREATE TRIGGER investigation_jobs_notify + AFTER INSERT ON public.investigation_jobs + FOR EACH ROW EXECUTE FUNCTION public.notify_new_investigation_job(); + +-- ───────────────────────────────────────────────────────────────────────── +-- 2. evidence — Locard's chain of custody, one row per discovered evidence. +-- `source_chunk_pk` FK to public.chunks ensures we never store an +-- evidence pointing at a chunk that doesn't exist. ON DELETE RESTRICT so a +-- chunk can't be silently removed under our feet. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.evidence ( + evidence_pk BIGSERIAL PRIMARY KEY, + evidence_id TEXT UNIQUE NOT NULL, + verbatim_excerpt TEXT NOT NULL, + source_chunk_pk BIGINT REFERENCES public.chunks(chunk_pk) ON DELETE RESTRICT, + source_page_id TEXT NOT NULL, + bbox JSONB, + grade TEXT NOT NULL CHECK (grade IN ('A','B','C')), + custody_steps JSONB NOT NULL, + custody_gaps JSONB, + confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')), + related_hypotheses JSONB DEFAULT '[]'::jsonb, + created_by TEXT NOT NULL DEFAULT 'locard@detective', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +CREATE INDEX IF NOT EXISTS evidence_grade_idx ON public.evidence (grade); +CREATE INDEX IF NOT EXISTS evidence_chunk_idx ON public.evidence (source_chunk_pk); + +-- ───────────────────────────────────────────────────────────────────────── +-- 3. hypotheses — Holmes-style propositions with prior/posterior + Tetlock band. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.hypotheses ( + hypothesis_pk BIGSERIAL PRIMARY KEY, + hypothesis_id TEXT UNIQUE NOT NULL, + question TEXT NOT NULL, + position TEXT NOT NULL, + argument_for TEXT, + argument_against TEXT, + evidence_refs JSONB DEFAULT '[]'::jsonb, + prior NUMERIC(4,3), + posterior NUMERIC(4,3), + confidence_band TEXT CHECK (confidence_band IN ('high','medium','low','speculation')), + status TEXT NOT NULL DEFAULT 'open' + CHECK (status IN ('open','closed','dormant','superseded')), + superseded_by TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE SET NULL, + created_by TEXT NOT NULL DEFAULT 'holmes@detective', + reviewed_by TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); +CREATE INDEX IF NOT EXISTS hypotheses_status_idx ON public.hypotheses (status); + +-- Trigram index is nice-to-have for full-text similarity over the question +-- field. Wrap in a DO block so missing pg_trgm extension doesn't kill the run. +DO $$ +BEGIN + IF EXISTS (SELECT 1 FROM pg_extension WHERE extname='pg_trgm') THEN + EXECUTE 'CREATE INDEX IF NOT EXISTS hypotheses_question_trgm ON public.hypotheses USING GIN (question gin_trgm_ops)'; + END IF; +END +$$; + +-- ───────────────────────────────────────────────────────────────────────── +-- 4. contradictions — Dupin-style cross-chunk contradiction graph nodes. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.contradictions ( + contradiction_pk BIGSERIAL PRIMARY KEY, + contradiction_id TEXT UNIQUE NOT NULL, -- R-NNNN (R = relation, per CLAUDE.md naming) + topic TEXT NOT NULL, + chunks JSONB NOT NULL, -- [{chunk_pk, position, ...}] + detected_by TEXT NOT NULL DEFAULT 'dupin@detective', + resolution_status TEXT NOT NULL DEFAULT 'open' + CHECK (resolution_status IN ('open','resolved','irreconcilable')), + notes TEXT, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- ───────────────────────────────────────────────────────────────────────── +-- 5. witnesses — Poirot-style witness credibility analyses, FK to people entity. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.witnesses ( + witness_pk BIGSERIAL PRIMARY KEY, + witness_id TEXT UNIQUE NOT NULL, -- W-NNNN + person_entity_pk BIGINT REFERENCES public.entities(entity_pk) ON DELETE RESTRICT, + credibility TEXT CHECK (credibility IN ('high','medium','low','speculation')), + access_to_event TEXT, -- prose: how did they witness it? + bias_notes TEXT, + corroboration_refs JSONB DEFAULT '[]'::jsonb, -- [{evidence_id, doc_id, ...}] + verdict TEXT, -- one-line summary + created_by TEXT NOT NULL DEFAULT 'poirot@detective', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- ───────────────────────────────────────────────────────────────────────── +-- 6. gaps — known unknowns: what was NOT possible to determine. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.gaps ( + gap_pk BIGSERIAL PRIMARY KEY, + gap_id TEXT UNIQUE NOT NULL, -- G-NNNN + description TEXT NOT NULL, + scope JSONB, -- {doc_id?, hypothesis_id?, ...} + suggested_next_move TEXT, -- "FOIA request X" / "consult archive Y" + status TEXT NOT NULL DEFAULT 'open' + CHECK (status IN ('open','partial','closed')), + created_by TEXT NOT NULL DEFAULT 'chief-detective', + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- ───────────────────────────────────────────────────────────────────────── +-- 7. residual_uncertainties — Schneier red-team + Taleb anti-fragile notes. +-- ───────────────────────────────────────────────────────────────────────── +CREATE TABLE IF NOT EXISTS public.residual_uncertainties ( + ru_pk BIGSERIAL PRIMARY KEY, + ru_id TEXT UNIQUE NOT NULL, -- RU-NNNN + hypothesis_id TEXT REFERENCES public.hypotheses(hypothesis_id) ON DELETE CASCADE, + scope TEXT NOT NULL, -- 'hypothesis' | 'document' | 'global' + description TEXT NOT NULL, + black_swan_check TEXT, -- Taleb: what would invalidate this? + failure_mode TEXT, -- Schneier: how does this break? + created_by TEXT NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +-- ───────────────────────────────────────────────────────────────────────── +-- RLS — all new tables are public read; writes only via the investigator role. +-- ───────────────────────────────────────────────────────────────────────── +ALTER TABLE public.investigation_jobs ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.evidence ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.hypotheses ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.contradictions ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.witnesses ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.gaps ENABLE ROW LEVEL SECURITY; +ALTER TABLE public.residual_uncertainties ENABLE ROW LEVEL SECURITY; + +DROP POLICY IF EXISTS investigation_jobs_read ON public.investigation_jobs; +DROP POLICY IF EXISTS evidence_read ON public.evidence; +DROP POLICY IF EXISTS hypotheses_read ON public.hypotheses; +DROP POLICY IF EXISTS contradictions_read ON public.contradictions; +DROP POLICY IF EXISTS witnesses_read ON public.witnesses; +DROP POLICY IF EXISTS gaps_read ON public.gaps; +DROP POLICY IF EXISTS residual_uncertainties_read ON public.residual_uncertainties; + +CREATE POLICY investigation_jobs_read ON public.investigation_jobs FOR SELECT USING (TRUE); +CREATE POLICY evidence_read ON public.evidence FOR SELECT USING (TRUE); +CREATE POLICY hypotheses_read ON public.hypotheses FOR SELECT USING (TRUE); +CREATE POLICY contradictions_read ON public.contradictions FOR SELECT USING (TRUE); +CREATE POLICY witnesses_read ON public.witnesses FOR SELECT USING (TRUE); +CREATE POLICY gaps_read ON public.gaps FOR SELECT USING (TRUE); +CREATE POLICY residual_uncertainties_read ON public.residual_uncertainties FOR SELECT USING (TRUE); + +GRANT SELECT ON + public.investigation_jobs, + public.evidence, + public.hypotheses, + public.contradictions, + public.witnesses, + public.gaps, + public.residual_uncertainties + TO anon, authenticated; + +-- ───────────────────────────────────────────────────────────────────────── +-- `investigator` role — minimum privilege. The new container connects with +-- THIS role, NOT service_role. Per gate #1 of sa-security-engineer. +-- +-- Capability matrix: +-- - SELECT on read corpus (chunks, entities, entity_mentions, relations, +-- documents) so the worker can answer questions. +-- - INSERT/UPDATE on the new write surfaces (jobs + 6 investigation tables). +-- - NO SELECT on auth.users, profiles, messages — the worker must never +-- see PII or chat content. +-- ───────────────────────────────────────────────────────────────────────── +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname='investigator') THEN + CREATE ROLE investigator WITH LOGIN + CONNECTION LIMIT 4 + PASSWORD 'placeholder_set_by_bootstrap'; -- rotated by the deploy script + -- ALTER ROLE investigator SET search_path = public, pg_catalog; + END IF; +END +$$; + +GRANT CONNECT ON DATABASE postgres TO investigator; +GRANT USAGE ON SCHEMA public TO investigator; + +GRANT SELECT ON + public.chunks, + public.entities, + public.entity_mentions, + public.relations, + public.documents + TO investigator; + +GRANT SELECT, INSERT, UPDATE ON + public.investigation_jobs, + public.evidence, + public.hypotheses, + public.contradictions, + public.witnesses, + public.gaps, + public.residual_uncertainties + TO investigator; + +GRANT USAGE, SELECT, UPDATE ON + public.evidence_id_seq, + public.hypothesis_id_seq, + public.contradiction_id_seq, + public.witness_id_seq, + public.gap_id_seq, + public.residual_uncertainty_id_seq + TO investigator; + +COMMIT; diff --git a/infra/supabase/migrations/0005_investigator_write_policies.sql b/infra/supabase/migrations/0005_investigator_write_policies.sql new file mode 100644 index 0000000..784559d --- /dev/null +++ b/infra/supabase/migrations/0005_investigator_write_policies.sql @@ -0,0 +1,52 @@ +-- 0005_investigator_write_policies.sql +-- +-- Fix-up on top of 0004: the investigation tables had RLS enabled but ONLY +-- a SELECT policy. With RLS active, INSERT/UPDATE/DELETE are blocked unless +-- a policy permits them, regardless of GRANT. So the `investigator` role +-- could connect and read but its `UPDATE … RETURNING` claim query silently +-- returned zero rows — jobs sat in `queued` forever. +-- +-- This migration adds INSERT/UPDATE policies tied to the `investigator` role. +-- Public anon/authenticated remain READ-ONLY. +-- +-- Idempotent. Apply as supabase_admin. + +BEGIN; + +-- ───────────────────────────────────────────────────────────────────────── +-- investigation_jobs — writes from the runtime + INSERTS from the web admin +-- ───────────────────────────────────────────────────────────────────────── +DROP POLICY IF EXISTS investigation_jobs_insert ON public.investigation_jobs; +DROP POLICY IF EXISTS investigation_jobs_update ON public.investigation_jobs; + +-- The web admin creates jobs through the service_role (gated by middleware +-- /api/admin/*); the investigator role updates them as it claims and runs. +CREATE POLICY investigation_jobs_insert ON public.investigation_jobs + FOR INSERT TO investigator, service_role, postgres + WITH CHECK (TRUE); +CREATE POLICY investigation_jobs_update ON public.investigation_jobs + FOR UPDATE TO investigator, service_role, postgres + USING (TRUE) WITH CHECK (TRUE); + +-- ───────────────────────────────────────────────────────────────────────── +-- All 6 investigation tables: writes only by the investigator + service_role. +-- ───────────────────────────────────────────────────────────────────────── +DO $$ +DECLARE + t TEXT; +BEGIN + FOREACH t IN ARRAY ARRAY['evidence','hypotheses','contradictions','witnesses','gaps','residual_uncertainties'] + LOOP + EXECUTE format('DROP POLICY IF EXISTS %I_insert ON public.%I', t, t); + EXECUTE format('DROP POLICY IF EXISTS %I_update ON public.%I', t, t); + EXECUTE format( + 'CREATE POLICY %I_insert ON public.%I FOR INSERT TO investigator, service_role, postgres WITH CHECK (TRUE)', + t, t); + EXECUTE format( + 'CREATE POLICY %I_update ON public.%I FOR UPDATE TO investigator, service_role, postgres USING (TRUE) WITH CHECK (TRUE)', + t, t); + END LOOP; +END +$$; + +COMMIT; diff --git a/investigator-runtime/Dockerfile b/investigator-runtime/Dockerfile new file mode 100644 index 0000000..0675a04 --- /dev/null +++ b/investigator-runtime/Dockerfile @@ -0,0 +1,30 @@ +# Investigator runtime — Bun + TS worker that spawns `claude -p` subprocesses +# (Sonnet 4.6 via OAuth) and writes Investigation Bureau outputs to disk + DB. + +FROM oven/bun:1.1-slim AS base + +# Tools we shell out to: `claude` CLI (OAuth) + git for sha256 over PDFs. +# The claude install script downloads the binary; no API key needed at build. +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl ca-certificates git \ + && curl -fsSL https://claude.ai/install.sh | bash \ + && cp /root/.local/bin/claude /usr/local/bin/claude \ + && claude --version \ + && apt-get purge -y curl && apt-get autoremove -y && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +# Install pg deps first so they cache. +COPY package.json bun.lockb* ./ +RUN bun install --production || bun install + +COPY tsconfig.json ./ +COPY src ./src +COPY prompts ./prompts + +# Default healthcheck: the worker writes /tmp/healthy when its LISTEN +# connection is up. Container is unhealthy if that file is older than 90s. +HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \ + CMD test -f /tmp/healthy && find /tmp/healthy -mmin -1.5 | grep -q healthy + +CMD ["bun", "run", "src/main.ts"] diff --git a/investigator-runtime/package-lock.json b/investigator-runtime/package-lock.json new file mode 100644 index 0000000..242c24a --- /dev/null +++ b/investigator-runtime/package-lock.json @@ -0,0 +1,209 @@ +{ + "name": "disclosure-investigator-runtime", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "disclosure-investigator-runtime", + "version": "0.1.0", + "dependencies": { + "pg": "^8.13.1" + }, + "devDependencies": { + "@types/node": "^22.10.5", + "@types/pg": "^8.11.10", + "typescript": "^5.7.2" + } + }, + "node_modules/@types/node": { + "version": "22.19.19", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.19.tgz", + "integrity": "sha512-dyh/xO2Fh5bYrfWaaqGrRQQGkNdmYw6AmaAUvYeUMNTWQtvb796ikLdmTchRmOlOiIJ1TDXfWgVx1QkUlQ6Hew==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@types/pg": { + "version": "8.20.0", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.20.0.tgz", + "integrity": "sha512-bEPFOaMAHTEP1EzpvHTbmwR8UsFyHSKsRisLIHVMXnpNefSbGA1bD6CVy+qKjGSqmZqNqBDV2azOBo8TgkcVow==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, + "node_modules/pg": { + "version": "8.21.0", + "resolved": "https://registry.npmjs.org/pg/-/pg-8.21.0.tgz", + "integrity": "sha512-AUP1EYJuHraQGsVoCQVIcM7TEJVGtDzxWtGFZd8rds9d+CCXlU5Js1rYgfLNvxy9iJrpHjGrRjoi/3BT9fRyiA==", + "license": "MIT", + "dependencies": { + "pg-connection-string": "^2.13.0", + "pg-pool": "^3.14.0", + "pg-protocol": "^1.14.0", + "pg-types": "2.2.0", + "pgpass": "1.0.5" + }, + "engines": { + "node": ">= 16.0.0" + }, + "optionalDependencies": { + "pg-cloudflare": "^1.4.0" + }, + "peerDependencies": { + "pg-native": ">=3.0.1" + }, + "peerDependenciesMeta": { + "pg-native": { + "optional": true + } + } + }, + "node_modules/pg-cloudflare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/pg-cloudflare/-/pg-cloudflare-1.4.0.tgz", + "integrity": "sha512-Vo7z/6rrQYxpNRylp4Tlob2elzbh+N/MOQbxFVWCxS7oEx6jF53GTJFxK2WWpKuBRkmiin4Mt+xofFDjx09R0A==", + "license": "MIT", + "optional": true + }, + "node_modules/pg-connection-string": { + "version": "2.13.0", + "resolved": "https://registry.npmjs.org/pg-connection-string/-/pg-connection-string-2.13.0.tgz", + "integrity": "sha512-EMnU9E2fSULdsbErBbMaXJvFeD9B4+nPcM3f+4lsiCR0BHLPrLVjv3DbyM2hgQQviKJaTWIRRTjKjWlHg3p2ig==", + "license": "MIT" + }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "license": "ISC", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-pool": { + "version": "3.14.0", + "resolved": "https://registry.npmjs.org/pg-pool/-/pg-pool-3.14.0.tgz", + "integrity": "sha512-gKtPkFdQPU3DksooVLi9LsjZxrsBUZIpa+7aVx+LV5pNh0KzP4Zleud2po+ConrxbuXGBJ6Hfer6hdgpIBpBaw==", + "license": "MIT", + "peerDependencies": { + "pg": ">=8.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.14.0", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.14.0.tgz", + "integrity": "sha512-n5taZ1kO3s9ngDTVxsEznOqCyToTgz0FLuPq0B33COy5pPpuWJpY3/2oRBVETuOgzdqRXfWpM9HIhp2LBBT1BA==", + "license": "MIT" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "license": "MIT", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, + "node_modules/pgpass": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/pgpass/-/pgpass-1.0.5.tgz", + "integrity": "sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==", + "license": "MIT", + "dependencies": { + "split2": "^4.1.0" + } + }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", + "license": "MIT", + "engines": { + "node": ">=4" + } + }, + "node_modules/postgres-bytea": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.1.tgz", + "integrity": "sha512-5+5HqXnsZPE65IJZSMkZtURARZelel2oXUEO8rH83VS/hxH5vv1uHquPg5wZs8yMAfdv971IU+kcPUczi7NVBQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", + "license": "MIT", + "dependencies": { + "xtend": "^4.0.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/split2": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", + "license": "ISC", + "engines": { + "node": ">= 10.x" + } + }, + "node_modules/typescript": { + "version": "5.9.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", + "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "license": "MIT", + "engines": { + "node": ">=0.4" + } + } + } +} diff --git a/investigator-runtime/package.json b/investigator-runtime/package.json new file mode 100644 index 0000000..2ffdcac --- /dev/null +++ b/investigator-runtime/package.json @@ -0,0 +1,20 @@ +{ + "name": "disclosure-investigator-runtime", + "version": "0.1.0", + "description": "Investigation Bureau agentic worker — 8 detectives + chief-detective orchestrator. Listens on Postgres LISTEN/NOTIFY, dispatches Claude Sonnet via OAuth subprocess, writes structured evidence/hypotheses to disk + DB.", + "type": "module", + "private": true, + "scripts": { + "start": "bun run src/main.ts", + "dev": "bun --watch run src/main.ts", + "typecheck": "bun tsc --noEmit" + }, + "dependencies": { + "pg": "^8.13.1" + }, + "devDependencies": { + "@types/node": "^22.10.5", + "@types/pg": "^8.11.10", + "typescript": "^5.7.2" + } +} diff --git a/investigator-runtime/prompts/locard.md b/investigator-runtime/prompts/locard.md new file mode 100644 index 0000000..1872483 --- /dev/null +++ b/investigator-runtime/prompts/locard.md @@ -0,0 +1,66 @@ +# You are Edmond Locard + +You are Edmond Locard, the father of forensic science. Your one rule: **every +contact leaves a trace**. You build chains of custody from physical artefact +(the original PDF on war.gov) all the way to the chunk a researcher will read, +so any claim downstream can be traced back to its physical origin. + +## Discipline (non-negotiable) + +1. The `verbatim_excerpt` is **a literal copy** of text inside the source chunk. + Never translate. Never paraphrase. Never fix spelling. If you cannot find a + strong verbatim quote, ABORT this evidence — do not invent one. +2. The chain of custody has **discrete, named steps**, each one a real artefact: + `pdf_origin` (war.gov URL + sha256), `png_render` (page PNG path), + `ocr_pass` (OCR text path), `chunk_extraction` (chunk_id + bbox), + `vision_verification` (Sonnet vision pass). +3. Grading is **strict**: + * **Grade A** — ≥ 3 custody steps and PDF has sha256 documented. + * **Grade B** — ≥ 2 steps. PDF sha256 missing is OK; declare it in `custody_gaps`. + * **Grade C** — ≥ 1 step. The minimum we accept. Anything weaker is not evidence. +4. If you cannot achieve the requested grade, EMIT THE LOWER grade you can + defend, with explicit `custody_gaps[]` listing what's missing. Refuse to + inflate. +5. You output **one `write_evidence` call per discovered evidence**. Nothing + else. No prose. No summary. The tool will respond with `evidence_id`; that + is your only confirmation that the evidence was committed. + +## Inputs you receive each call + +* `doc_id` — the document being mined. +* `chunk_id` — the specific chunk you should inspect. +* `chunk_text` — the verbatim chunk content (source language). +* `bbox` — normalised bounding box {x,y,w,h} of the chunk on the page. +* `page` — 1-indexed page number. +* `claim` — what the chief-detective wants you to substantiate (optional). + +## Output protocol (the runtime owns the writer; you emit structured data) + +The runtime applies the `write_evidence` writer locally — your job is to emit +the **argument object** as strict JSON. No prose around it. No markdown code +fence. Just the JSON. + +Schema you emit: + +```json +{ + "verbatim_excerpt": "", + "source_doc_id": "", + "source_chunk_id": "", + "page": , + "bbox": { "x": , "y": , "w": , "h": }, + "grade": "A" | "B" | "C", + "custody_steps": [ + { "step": "pdf_origin", "uri": "https://war.gov/UFO/...", "sha256": "<32+ hex if known>" }, + { "step": "png_render", "uri": "processing/png//p.png" }, + { "step": "chunk_extraction", "uri": "raw/--subagent/chunks/.md" } + ], + "custody_gaps": ["pdf sha256 not stamped at ingest"], + "confidence_band": "high" | "medium" | "low" | "speculation", + "related_hypotheses": [] +} +``` + +If the chunk does not contain a defensible evidence claim, output the literal +single word `NO_EVIDENCE` and stop. Do not output partial JSON. Do not output +explanations. diff --git a/investigator-runtime/src/detectives/locard.ts b/investigator-runtime/src/detectives/locard.ts new file mode 100644 index 0000000..ec89409 --- /dev/null +++ b/investigator-runtime/src/detectives/locard.ts @@ -0,0 +1,151 @@ +/** + * locard.ts — the forensic detective. + * + * Job kinds it handles: `evidence_chain` (extract one or more E-NNNN from a + * chunk or list of chunks). Output: rows in public.evidence + case markdowns. + * + * The runtime owns the writer (`writeEvidence`); Locard just produces the + * structured argument JSON the writer accepts. That keeps the schema gate + * (sa-security #2) airtight: the LLM can never bypass validation. + */ +import { readFile } from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { audit } from "../lib/audit"; +import { callClaude } from "../lib/claude"; +import { env } from "../lib/env"; +import { queryOne } from "../lib/pg"; +import { writeEvidence, type WriteEvidenceArgs } from "../tools/write_evidence"; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "locard.md"); + +interface ChunkRow { + chunk_pk: number; + chunk_id: string; + doc_id: string; + page: number; + type: string; + bbox: { x: number; y: number; w: number; h: number } | null; + content_en: string | null; + content_pt: string | null; +} + +async function loadChunk(doc_id: string, chunk_id: string): Promise { + return queryOne( + `SELECT chunk_pk, chunk_id, doc_id, page, type, bbox, content_en, content_pt + FROM public.chunks + WHERE doc_id = $1 AND chunk_id = $2`, + [doc_id, chunk_id], + ); +} + +function preferredContent(chunk: ChunkRow): string { + // Use whatever language the chunk has more of. (Source language is usually + // English for older FBI/military docs; PT-BR for the bilingual reading layer.) + const en = (chunk.content_en ?? "").trim(); + const pt = (chunk.content_pt ?? "").trim(); + return en.length >= pt.length ? en : pt; +} + +function buildPrompt(chunk: ChunkRow, claim?: string): string { + const body = preferredContent(chunk); + const parts = [ + `doc_id: ${chunk.doc_id}`, + `chunk_id: ${chunk.chunk_id}`, + `page: ${chunk.page}`, + `chunk_type: ${chunk.type}`, + chunk.bbox + ? `bbox: ${JSON.stringify(chunk.bbox)}` + : `bbox: null (chunk has no spatial bbox; use null in your output)`, + "", + claim ? `claim_to_substantiate: ${claim}` : "claim_to_substantiate: ", + "", + "chunk_text:", + "---", + body, + "---", + "", + "Emit ONE JSON object matching the schema in the system prompt, or the literal word NO_EVIDENCE.", + ]; + return parts.join("\n"); +} + +function extractJson(text: string): unknown { + const t = text.trim(); + if (t === "NO_EVIDENCE") return null; + // Strip a markdown code fence if Sonnet was creative. + const stripped = t.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, ""); + const first = stripped.indexOf("{"); + const last = stripped.lastIndexOf("}"); + if (first === -1 || last === -1) { + throw new Error(`locard returned no JSON object: ${t.slice(0, 160)}`); + } + return JSON.parse(stripped.slice(first, last + 1)); +} + +export interface LocardTask { + job_id: string; + doc_id: string; + chunk_id: string; + claim?: string; + budget_cap_usd?: number; +} + +export async function runLocard(task: LocardTask): Promise< + | { evidence_id: string; case_file: string } + | { skipped: true; reason: string } +> { + const chunk = await loadChunk(task.doc_id, task.chunk_id); + if (!chunk) { + return { skipped: true, reason: `chunk_not_found: ${task.doc_id}#${task.chunk_id}` }; + } + const systemPrompt = await readFile(PROMPT_PATH, "utf-8"); + const prompt = buildPrompt(chunk, task.claim); + + await audit({ + event: "detective_dispatched", + job_id: task.job_id, + detective: "locard@detective", + doc_id: task.doc_id, + chunk_id: task.chunk_id, + }); + + const llm = await callClaude({ + prompt, + systemPrompt, + model: env.CLAUDE_MODEL, + allowedTools: [], + timeoutMs: env.JOB_TIMEOUT_SECONDS * 1000, + budgetCapUsd: task.budget_cap_usd ?? env.BUDGET_CAP_USD_PER_JOB, + }); + + await audit({ + event: "detective_completed", + job_id: task.job_id, + detective: "locard@detective", + cost_usd: llm.costUsd, + tokens_in: llm.tokensIn, + tokens_out: llm.tokensOut, + duration_ms: llm.durationMs, + }); + + const parsed = extractJson(llm.text); + if (parsed === null) { + return { skipped: true, reason: "NO_EVIDENCE" }; + } + + // Pin doc/chunk to what the orchestrator asked for — Locard isn't allowed + // to write evidence to a different chunk than the one we handed it. + const args = parsed as Partial; + args.source_doc_id = task.doc_id; + args.source_chunk_id = task.chunk_id; + if (!args.page) args.page = chunk.page; + if (!args.bbox && chunk.bbox) args.bbox = chunk.bbox; + + const result = await writeEvidence(args as WriteEvidenceArgs, { + job_id: task.job_id, + detective: "locard@detective", + }); + return result; +} diff --git a/investigator-runtime/src/lib/audit.ts b/investigator-runtime/src/lib/audit.ts new file mode 100644 index 0000000..23f5cec --- /dev/null +++ b/investigator-runtime/src/lib/audit.ts @@ -0,0 +1,38 @@ +/** + * audit.ts — append-only NDJSON audit log of every meaningful step. + * + * Every write tool, every detective dispatch, every gate failure goes here. + * The chief-detective trail is what makes the Investigation Bureau auditable + * (per ADR-002 + agentic-layer-spec sec 5). + */ +import { appendFile, mkdir } from "node:fs/promises"; +import path from "node:path"; +import { env } from "./env"; + +const ensured = new Set(); + +async function ensureDir(p: string): Promise { + if (ensured.has(p)) return; + await mkdir(p, { recursive: true }); + ensured.add(p); +} + +export interface AuditEvent { + event: string; + job_id?: string; + detective?: string; + /** Free-form payload — keep keys snake_case. */ + [k: string]: unknown; +} + +export async function audit(ev: AuditEvent): Promise { + const row = { ts: new Date().toISOString(), worker_id: env.WORKER_ID, ...ev }; + const dir = path.dirname(env.AUDIT_LOG); + try { + await ensureDir(dir); + await appendFile(env.AUDIT_LOG, JSON.stringify(row) + "\n", "utf-8"); + } catch (e) { + // Audit must never break the worker. Log to stderr and move on. + console.error(`[audit] append failed: ${(e as Error).message}`); + } +} diff --git a/investigator-runtime/src/lib/claude.ts b/investigator-runtime/src/lib/claude.ts new file mode 100644 index 0000000..00940ff --- /dev/null +++ b/investigator-runtime/src/lib/claude.ts @@ -0,0 +1,139 @@ +/** + * claude.ts — spawn `claude -p` subprocess with a system prompt + per-call budget. + * + * Pattern matches scripts/reextract/run.py and web/lib/chat/claude-code.ts: + * we use the Claude Code CLI (OAuth Max 20x quota) instead of paying the + * Anthropic API directly. Returns parsed JSON and the model-reported cost + * so the orchestrator can enforce the per-job budget cap. + */ +import { spawn } from "node:child_process"; +import { env } from "./env"; + +export interface ClaudeCallArgs { + /** Free-form prompt body to send. */ + prompt: string; + /** Optional system prompt — kept separate from the user prompt so the model + * treats the persona/discipline as inviolable. */ + systemPrompt?: string; + /** "sonnet" by default. "opus" for the agentic principal step. */ + model?: string; + /** Tool allowlist passed to the CLI. Empty array → no tools. */ + allowedTools?: string[]; + /** Subprocess hard timeout (ms). Default: JOB_TIMEOUT_SECONDS / 2. */ + timeoutMs?: number; + /** Soft budget cap for THIS call — orchestrator should subtract from the job + * budget and abort if the running total exceeds the per-job cap. */ + budgetCapUsd?: number; + /** Working directory the CLI can read from. The runtime mounts wiki+case; + * this scopes claude's filesystem visibility. */ + addDir?: string; +} + +export interface ClaudeCallResult { + /** Plain-text reply (cli.result). */ + text: string; + /** Cost reported by the CLI (cli.total_cost_usd) or 0 when missing. */ + costUsd: number; + /** Token counts from CLI usage if present. */ + tokensIn?: number; + tokensOut?: number; + /** True when the CLI signalled is_error (the model said "I cannot ..."). */ + isError: boolean; + /** Wall-clock duration. */ + durationMs: number; + /** Raw JSON from the CLI for callers that need finer detail. */ + raw: unknown; +} + +export async function callClaude(args: ClaudeCallArgs): Promise { + const model = args.model ?? env.CLAUDE_MODEL; + const timeoutMs = args.timeoutMs ?? Math.max(30_000, env.JOB_TIMEOUT_SECONDS * 500); + const tools = args.allowedTools ?? []; + + const cliArgs = [ + "-p", + "--model", model, + "--output-format", "json", + "--max-turns", "8", + ]; + if (tools.length > 0) cliArgs.push("--allowedTools", tools.join(",")); + else cliArgs.push("--allowedTools", ""); + if (args.systemPrompt) cliArgs.push("--system-prompt", args.systemPrompt); + if (args.addDir) cliArgs.push("--add-dir", args.addDir); + cliArgs.push("--", args.prompt); + + const t0 = Date.now(); + return await new Promise((resolve, reject) => { + const child = spawn("claude", cliArgs, { + stdio: ["ignore", "pipe", "pipe"], + env: { + ...process.env, + // CLAUDE_CODE_OAUTH_TOKEN is read by the CLI itself. + }, + }); + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (c) => (stdout += c.toString())); + child.stderr.on("data", (c) => (stderr += c.toString())); + const timer = setTimeout(() => { + child.kill("SIGKILL"); + reject(new Error(`claude CLI timeout > ${timeoutMs}ms`)); + }, timeoutMs); + child.on("error", (e) => { + clearTimeout(timer); + reject(new Error(`claude CLI spawn failed: ${e.message}`)); + }); + child.on("close", (code) => { + clearTimeout(timer); + if (code !== 0) { + const tail = stderr.slice(-400) || `rc=${code}`; + return reject(new Error(`claude rc=${code}: ${tail}`)); + } + let parsed: Record = {}; + try { + parsed = JSON.parse(stdout); + } catch { + return reject(new Error(`claude stdout not JSON: ${stdout.slice(0, 200)}`)); + } + const cost = Number(parsed.total_cost_usd ?? 0); + if (args.budgetCapUsd && cost > args.budgetCapUsd) { + // The model already spent the money but we surface the breach so the + // orchestrator can abort the rest of the job. + return resolve({ + text: String(parsed.result ?? ""), + costUsd: cost, + tokensIn: (parsed.usage as { input_tokens?: number } | undefined)?.input_tokens, + tokensOut: (parsed.usage as { output_tokens?: number } | undefined)?.output_tokens, + isError: true, + durationMs: Date.now() - t0, + raw: parsed, + }); + } + const usage = parsed.usage as { input_tokens?: number; output_tokens?: number } | undefined; + const resultText = String(parsed.result ?? ""); + const isError = Boolean(parsed.is_error); + const isQuota = isError && ( + /weekly limit|usage limit|rate.?limit/i.test(resultText) || + Number(parsed.api_error_status) === 429 + ); + // Quota exhaustion is a recoverable state (it resets); surface it as a + // typed error the orchestrator can identify, instead of pretending the + // call succeeded with an empty body. + if (isQuota) { + const e = new Error(`claude_quota_exhausted: ${resultText || "weekly limit"}`); + (e as Error & { isQuota?: boolean; durationMs?: number; costUsd?: number }).isQuota = true; + (e as Error & { durationMs?: number }).durationMs = Date.now() - t0; + return reject(e); + } + resolve({ + text: resultText, + costUsd: cost, + tokensIn: usage?.input_tokens, + tokensOut: usage?.output_tokens, + isError, + durationMs: Date.now() - t0, + raw: parsed, + }); + }); + }); +} diff --git a/investigator-runtime/src/lib/env.ts b/investigator-runtime/src/lib/env.ts new file mode 100644 index 0000000..7a36a33 --- /dev/null +++ b/investigator-runtime/src/lib/env.ts @@ -0,0 +1,42 @@ +/** + * env.ts — single source of typed configuration. + * + * Per sa-security gate #8: the runtime only reads the allow-listed env vars + * below. If a future contributor reaches for `process.env.X` directly, + * add it here first so the surface stays auditable. + */ + +function required(key: string): string { + const v = process.env[key]; + if (!v || !v.trim()) throw new Error(`missing required env: ${key}`); + return v; +} + +function optional(key: string, dflt: string): string { + return process.env[key]?.trim() || dflt; +} + +function int(key: string, dflt: number): number { + const v = process.env[key]; + if (!v) return dflt; + const n = Number(v); + return Number.isFinite(n) ? n : dflt; +} + +function number(key: string, dflt: number): number { + return int(key, dflt); +} + +export const env = { + DATABASE_URL: required("DATABASE_URL"), + EMBED_SERVICE_URL: optional("EMBED_SERVICE_URL", "http://embed:8000"), + CLAUDE_CODE_OAUTH_TOKEN: optional("CLAUDE_CODE_OAUTH_TOKEN", ""), + CASE_ROOT: optional("CASE_ROOT", "/data/ufo/case"), + WIKI_ROOT: optional("WIKI_ROOT", "/data/ufo/wiki"), + AUDIT_LOG: optional("AUDIT_LOG", "/data/ufo/case/audit.jsonl"), + WORKER_ID: optional("WORKER_ID", `investigator-${process.pid}`), + MAX_PARALLEL_WORKERS: int("MAX_PARALLEL_WORKERS", 2), + BUDGET_CAP_USD_PER_JOB: number("BUDGET_CAP_USD_PER_JOB", 1.0), + JOB_TIMEOUT_SECONDS: int("JOB_TIMEOUT_SECONDS", 300), + CLAUDE_MODEL: optional("CLAUDE_MODEL", "sonnet"), +}; diff --git a/investigator-runtime/src/lib/ids.ts b/investigator-runtime/src/lib/ids.ts new file mode 100644 index 0000000..a5d0854 --- /dev/null +++ b/investigator-runtime/src/lib/ids.ts @@ -0,0 +1,24 @@ +/** + * ids.ts — allocate the next human-readable ID for an investigation artefact. + * + * The id sequences in migration 0004 are atomic; nextval is concurrency-safe. + * Padding is consistent across the wiki (CLAUDE.md naming regex). + */ +import { queryOne } from "./pg"; + +const PAD = 4; + +async function nextval(seq: string, prefix: string): Promise { + const row = await queryOne<{ nextval: string }>(`SELECT nextval('${seq}') AS nextval`); + const n = Number(row?.nextval ?? 1); + return `${prefix}-${String(n).padStart(PAD, "0")}`; +} + +export const allocate = { + evidenceId: () => nextval("public.evidence_id_seq", "E"), + hypothesisId: () => nextval("public.hypothesis_id_seq", "H"), + contradictionId: () => nextval("public.contradiction_id_seq", "R"), + witnessId: () => nextval("public.witness_id_seq", "W"), + gapId: () => nextval("public.gap_id_seq", "G"), + residualUncertaintyId: () => nextval("public.residual_uncertainty_id_seq","RU"), +}; diff --git a/investigator-runtime/src/lib/pg.ts b/investigator-runtime/src/lib/pg.ts new file mode 100644 index 0000000..e7e5588 --- /dev/null +++ b/investigator-runtime/src/lib/pg.ts @@ -0,0 +1,54 @@ +/** + * pg.ts — pooled Postgres + LISTEN/NOTIFY helpers. + * + * The pool is for short-lived queries inside workers. The LISTEN connection + * is a single dedicated client kept alive for the lifetime of the process. + */ +import pg from "pg"; +import { env } from "./env"; + +export const pool = new pg.Pool({ + connectionString: env.DATABASE_URL, + max: 4, + idleTimeoutMillis: 30_000, + allowExitOnIdle: false, +}); + +export async function query(sql: string, params: unknown[] = []): Promise { + const r = await pool.query(sql, params as unknown[]); + return r.rows as T[]; +} + +export async function queryOne(sql: string, params: unknown[] = []): Promise { + const rows = await query(sql, params); + return rows[0] ?? null; +} + +/** + * Dedicated long-lived connection that issues LISTEN and yields + * notification payloads as an async iterator. + */ +export async function listen( + channel: string, + onNotify: (payload: string | undefined) => void, +): Promise { + const client = new pg.Client({ connectionString: env.DATABASE_URL }); + await client.connect(); + await client.query(`LISTEN ${channel}`); + client.on("notification", (msg) => { + if (msg.channel !== channel) return; + try { + onNotify(msg.payload); + } catch (e) { + console.error(`[listen ${channel}] handler threw: ${(e as Error).message}`); + } + }); + client.on("error", (e) => { + console.error(`[listen ${channel}] connection error: ${e.message}`); + }); + return client; +} + +export async function close(): Promise { + await pool.end().catch(() => undefined); +} diff --git a/investigator-runtime/src/main.ts b/investigator-runtime/src/main.ts new file mode 100644 index 0000000..d2c1fd0 --- /dev/null +++ b/investigator-runtime/src/main.ts @@ -0,0 +1,115 @@ +/** + * main.ts — entrypoint for the investigator runtime. + * + * Lifecycle: + * 1. Connect to Postgres pool + a dedicated LISTEN client. + * 2. On every NOTIFY (or on a slow-poll fallback) attempt to claim a job. + * 3. Claim uses `UPDATE … WHERE status='queued' RETURNING *` so two workers + * can race safely. + * 4. Dispatch via the orchestrator and update status. + * + * The healthcheck file at /tmp/healthy is touched once per loop so docker + * declares us unhealthy if the LISTEN connection dies. + */ +import { utimes, writeFile } from "node:fs/promises"; +import { audit } from "./lib/audit"; +import { env } from "./lib/env"; +import { close, listen, pool, query } from "./lib/pg"; +import { dispatch, type InvestigationJob } from "./orchestrator"; + +const HEALTH_FILE = "/tmp/healthy"; +const POLL_INTERVAL_MS = 15_000; +const CHANNEL = "investigation_jobs"; + +let shuttingDown = false; +let activeWorkers = 0; + +async function touchHealth(): Promise { + try { + const now = new Date(); + await writeFile(HEALTH_FILE, `${now.toISOString()} ${env.WORKER_ID}\n`); + await utimes(HEALTH_FILE, now, now); + } catch { + /* docker healthcheck will trip on its own */ + } +} + +async function claimNextJob(): Promise { + const rows = await query( + `UPDATE public.investigation_jobs + SET status = 'running', worker_id = $1, started_at = NOW() + WHERE job_id = ( + SELECT job_id FROM public.investigation_jobs + WHERE status = 'queued' + ORDER BY created_at ASC + LIMIT 1 + FOR UPDATE SKIP LOCKED + ) + RETURNING job_id, kind, payload, triggered_by`, + [env.WORKER_ID], + ); + return rows[0] ?? null; +} + +async function drainOnce(): Promise { + if (shuttingDown) return; + if (activeWorkers >= env.MAX_PARALLEL_WORKERS) return; + const job = await claimNextJob(); + if (!job) return; + activeWorkers++; + // Don't await — let the dispatcher run in parallel and tick the loop. + void dispatch(job, env.WORKER_ID).finally(() => { + activeWorkers--; + // Immediately check the queue again — bursty notifications shouldn't + // sit idle while we wait for the next NOTIFY. + void drainOnce(); + }); + // Try to fill the rest of the worker slots. + if (activeWorkers < env.MAX_PARALLEL_WORKERS) void drainOnce(); +} + +async function main(): Promise { + await audit({ event: "runtime_starting", worker_id: env.WORKER_ID, max_parallel: env.MAX_PARALLEL_WORKERS }); + + // Mark healthy NOW so the docker healthcheck doesn't trip during cold start. + await touchHealth(); + + // 1. LISTEN on investigation_jobs. + const listenClient = await listen(CHANNEL, () => { void drainOnce(); }); + await audit({ event: "listening", channel: CHANNEL }); + + // 2. Slow poll as a safety net (handles dropped NOTIFYs / restart races). + const poller = setInterval(() => { void drainOnce(); void touchHealth(); }, POLL_INTERVAL_MS); + + // 3. Initial drain — any jobs queued during downtime get processed now. + await drainOnce(); + + // Graceful shutdown + const shutdown = async (sig: string): Promise => { + if (shuttingDown) return; + shuttingDown = true; + console.error(`[runtime] received ${sig}, draining...`); + clearInterval(poller); + try { + // Wait for in-flight workers to finish, but no longer than JOB_TIMEOUT. + const waitUntil = Date.now() + env.JOB_TIMEOUT_SECONDS * 1000; + while (activeWorkers > 0 && Date.now() < waitUntil) { + await new Promise((r) => setTimeout(r, 250)); + } + await listenClient.end().catch(() => undefined); + await close(); + } finally { + await audit({ event: "runtime_stopped", signal: sig }); + process.exit(0); + } + }; + process.on("SIGTERM", () => { void shutdown("SIGTERM"); }); + process.on("SIGINT", () => { void shutdown("SIGINT"); }); +} + +main().catch(async (e) => { + console.error(`[runtime] fatal: ${(e as Error).stack ?? e}`); + await audit({ event: "runtime_fatal", error: String(e) }); + await pool.end().catch(() => undefined); + process.exit(1); +}); diff --git a/investigator-runtime/src/orchestrator.ts b/investigator-runtime/src/orchestrator.ts new file mode 100644 index 0000000..7757c51 --- /dev/null +++ b/investigator-runtime/src/orchestrator.ts @@ -0,0 +1,112 @@ +/** + * orchestrator.ts — chief-detective. Decides which detective runs for a job. + * + * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the + * registry as we build each detective in W3.5+. Unknown kinds fail the job + * loudly so we don't quietly drop work. + */ +import { audit } from "./lib/audit"; +import { query } from "./lib/pg"; +import { runLocard, type LocardTask } from "./detectives/locard"; + +export interface InvestigationJob { + job_id: string; + kind: string; + payload: Record; + triggered_by: string | null; +} + +export async function dispatch(job: InvestigationJob, workerId: string): Promise { + await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId }); + + let outputs: unknown[] = []; + try { + switch (job.kind) { + case "evidence_chain": { + // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning + // the first 20 substantive chunks of the doc if not provided. + const docId = String(job.payload.doc_id ?? ""); + if (!docId) throw new Error("evidence_chain requires payload.doc_id"); + const chunkIds = Array.isArray(job.payload.chunks) + ? (job.payload.chunks as string[]) + : await pickEvidenceCandidates(docId, 5); + if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`); + for (const chunk_id of chunkIds) { + const task: LocardTask = { + job_id: job.job_id, + doc_id: docId, + chunk_id, + claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined, + }; + try { + const r = await runLocard(task); + outputs.push({ chunk_id, ...r }); + } catch (e) { + outputs.push({ chunk_id, error: (e as Error).message }); + } + } + break; + } + default: + throw new Error(`unknown_kind: ${job.kind}`); + } + + // Status reflects reality: if every per-item attempt errored we mark + // the job failed (so the UI doesn't say "complete" when nothing useful + // was produced); if at least one succeeded we keep `complete` with the + // mixed outputs payload. + const allErrors = outputs.length > 0 && outputs.every( + (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string", + ); + const summary = (() => { + if (!allErrors) return null; + // First few error messages, surfaced to the user via the jobs table. + return outputs + .map((o) => (o as { error?: string }).error) + .filter((e): e is string => Boolean(e)) + .slice(0, 3) + .join(" | "); + })(); + await query( + `UPDATE public.investigation_jobs + SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3 + WHERE job_id = $4`, + [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id], + ); + await audit({ + event: allErrors ? "job_failed_all_items" : "job_completed", + job_id: job.job_id, + kind: job.kind, + n_outputs: outputs.length, + ...(summary ? { summary } : {}), + }); + } catch (e) { + const err = (e as Error).message; + await query( + `UPDATE public.investigation_jobs + SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb + WHERE job_id = $3`, + [err, JSON.stringify(outputs), job.job_id], + ); + await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err }); + } +} + +/** + * Pick a small set of chunks that are likely to yield evidence — body + * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by + * Sonnet's anomaly flag first so we extract the most interesting first. + */ +async function pickEvidenceCandidates(doc_id: string, limit: number): Promise { + const rows = await query<{ chunk_id: string }>( + `SELECT chunk_id + FROM public.chunks + WHERE doc_id = $1 + AND is_searchable + AND LENGTH(COALESCE(content_en, content_pt, '')) > 200 + ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC + LIMIT $2`, + [doc_id, limit], + ); + return rows.map((r) => r.chunk_id); +} diff --git a/investigator-runtime/src/tools/write_evidence.ts b/investigator-runtime/src/tools/write_evidence.ts new file mode 100644 index 0000000..55c95ea --- /dev/null +++ b/investigator-runtime/src/tools/write_evidence.ts @@ -0,0 +1,199 @@ +/** + * write_evidence.ts — Locard's primary writer. + * + * The first of the gated write tools (sa-security gate #2). Builds a row in + * `public.evidence`, allocates E-NNNN via the sequence, writes the matching + * `case/evidence/E-NNNN.md` markdown file, and audit-logs the act. + * + * Discipline (mirrors the spec): + * - verbatim_excerpt MUST appear verbatim inside the source chunk content + * (we reject if it doesn't — Sonnet is occasionally creative about quoting). + * - source_chunk must exist in public.chunks (FK enforced by DB; we also + * check up front to give a friendlier error). + * - grade A requires ≥ 3 custody steps, B ≥ 2, C ≥ 1. + * - related_hypotheses[].hypothesis_id must exist (if provided). + */ +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { audit } from "../lib/audit"; +import { env } from "../lib/env"; +import { allocate } from "../lib/ids"; +import { query, queryOne } from "../lib/pg"; + +export interface CustodyStep { + step: string; + uri?: string; + sha256?: string; + notes?: string; +} + +export interface WriteEvidenceArgs { + verbatim_excerpt: string; + source_doc_id: string; + source_chunk_id: string; + /** Page is 1-indexed; we derive source_page_id from doc+page. */ + page?: number; + bbox?: { x: number; y: number; w: number; h: number }; + grade: "A" | "B" | "C"; + custody_steps: CustodyStep[]; + custody_gaps?: string[]; + confidence_band?: "high" | "medium" | "low" | "speculation"; + related_hypotheses?: Array<{ hypothesis_id: string; supports: boolean }>; +} + +export interface WriteEvidenceContext { + job_id: string; + detective: string; +} + +const GRADE_MIN_STEPS: Record = { A: 3, B: 2, C: 1 }; + +function pageId(doc: string, p: number | undefined, chunkId: string): string { + // Prefer explicit page; otherwise we leave it doc-only and the lint + // catches the malformed reference later. + if (typeof p === "number" && p > 0) { + return `${doc}/p${String(p).padStart(3, "0")}#${chunkId}`; + } + return `${doc}#${chunkId}`; +} + +function renderMd(args: { + evidence_id: string; + body: WriteEvidenceArgs; + ctx: WriteEvidenceContext; +}): string { + const { evidence_id, body, ctx } = args; + const fm = [ + "---", + `schema_version: "0.1.0"`, + `type: evidence`, + `evidence_id: ${evidence_id}`, + `source_doc: ${body.source_doc_id}`, + `source_chunk: ${body.source_chunk_id}`, + body.page ? `source_page: ${body.page}` : null, + `grade: ${body.grade}`, + body.confidence_band ? `confidence_band: ${body.confidence_band}` : null, + `created_by: ${ctx.detective}`, + `job_id: ${ctx.job_id}`, + `created_at: ${new Date().toISOString()}`, + body.bbox ? `bbox: { x: ${body.bbox.x}, y: ${body.bbox.y}, w: ${body.bbox.w}, h: ${body.bbox.h} }` : null, + "---", + ].filter(Boolean).join("\n"); + + const custody = body.custody_steps + .map((s, i) => ` ${i + 1}. **${s.step}**${s.uri ? ` — ${s.uri}` : ""}${s.sha256 ? ` (sha256: \`${s.sha256.slice(0, 16)}…\`)` : ""}${s.notes ? `\n ${s.notes}` : ""}`) + .join("\n"); + const gaps = (body.custody_gaps ?? []).map((g) => ` - ${g}`).join("\n"); + const links = (body.related_hypotheses ?? []) + .map((h) => ` - [[hypothesis/${h.hypothesis_id}]] (${h.supports ? "supports" : "refutes"})`) + .join("\n"); + + return [ + fm, + "", + `# Evidence ${evidence_id}`, + "", + "## Verbatim excerpt (source language)", + "", + "> " + body.verbatim_excerpt.replace(/\n+/g, "\n> "), + "", + "## Chain of custody", + "", + custody, + gaps ? "\n### Gaps\n\n" + gaps : "", + "", + "## Source", + "", + `- Doc: [[${body.source_doc_id}]]`, + `- Chunk: [[${body.source_doc_id}#${body.source_chunk_id}]]`, + body.page ? `- Page: [[${body.source_doc_id}/p${String(body.page).padStart(3, "0")}]]` : "", + "", + "## Linked hypotheses", + "", + links || "_(none yet)_", + "", + ].join("\n"); +} + +export async function writeEvidence( + body: WriteEvidenceArgs, + ctx: WriteEvidenceContext, +): Promise<{ evidence_id: string; case_file: string }> { + // 1. Validate grade minimums (sa-security gate #2: schema check before INSERT). + const minSteps = GRADE_MIN_STEPS[body.grade] ?? 99; + if (!Array.isArray(body.custody_steps) || body.custody_steps.length < minSteps) { + throw new Error(`grade ${body.grade} requires ≥ ${minSteps} custody steps; got ${body.custody_steps?.length ?? 0}`); + } + + // 2. Resolve chunk_pk + verify the verbatim excerpt appears in the chunk. + const chunk = await queryOne<{ chunk_pk: number; content_en: string | null; content_pt: string | null }>( + `SELECT chunk_pk, content_en, content_pt + FROM public.chunks + WHERE doc_id = $1 AND chunk_id = $2 + LIMIT 1`, + [body.source_doc_id, body.source_chunk_id], + ); + if (!chunk) { + throw new Error(`chunk_not_found: ${body.source_doc_id}#${body.source_chunk_id}`); + } + + const haystack = `${chunk.content_en ?? ""}\n${chunk.content_pt ?? ""}`; + const needle = body.verbatim_excerpt.trim(); + if (needle.length < 8) { + throw new Error(`verbatim_excerpt too short (${needle.length} chars; min 8)`); + } + if (!haystack.includes(needle.slice(0, 80))) { + throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`); + } + + // 3. (Optional) validate hypothesis refs exist. + for (const ref of body.related_hypotheses ?? []) { + const h = await queryOne<{ hypothesis_pk: number }>( + `SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`, + [ref.hypothesis_id], + ); + if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`); + } + + // 4. Allocate id + INSERT. + const evidence_id = await allocate.evidenceId(); + await query( + `INSERT INTO public.evidence + (evidence_id, verbatim_excerpt, source_chunk_pk, source_page_id, bbox, + grade, custody_steps, custody_gaps, confidence_band, related_hypotheses, created_by) + VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8::jsonb, $9, $10::jsonb, $11)`, + [ + evidence_id, + body.verbatim_excerpt, + chunk.chunk_pk, + pageId(body.source_doc_id, body.page, body.source_chunk_id), + body.bbox ? JSON.stringify(body.bbox) : null, + body.grade, + JSON.stringify(body.custody_steps), + body.custody_gaps ? JSON.stringify(body.custody_gaps) : null, + body.confidence_band ?? null, + JSON.stringify(body.related_hypotheses ?? []), + ctx.detective, + ], + ); + + // 5. Write the case markdown file. + const dir = path.join(env.CASE_ROOT, "evidence"); + await mkdir(dir, { recursive: true }); + const filepath = path.join(dir, `${evidence_id}.md`); + await writeFile(filepath, renderMd({ evidence_id, body, ctx }), "utf-8"); + + // 6. Audit. + await audit({ + event: "write_evidence", + job_id: ctx.job_id, + detective: ctx.detective, + evidence_id, + source_doc: body.source_doc_id, + source_chunk: body.source_chunk_id, + grade: body.grade, + file: filepath, + }); + + return { evidence_id, case_file: filepath }; +} diff --git a/investigator-runtime/tsconfig.json b/investigator-runtime/tsconfig.json new file mode 100644 index 0000000..11c43d9 --- /dev/null +++ b/investigator-runtime/tsconfig.json @@ -0,0 +1,20 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "Bundler", + "lib": ["ES2023"], + "strict": true, + "noImplicitAny": true, + "strictNullChecks": true, + "noUnusedLocals": true, + "noFallthroughCasesInSwitch": true, + "esModuleInterop": true, + "resolveJsonModule": true, + "skipLibCheck": true, + "noEmit": true, + "allowJs": false, + "types": ["node"] + }, + "include": ["src/**/*.ts"] +} diff --git a/web/app/api/admin/investigate/test/route.ts b/web/app/api/admin/investigate/test/route.ts new file mode 100644 index 0000000..93c6ee1 --- /dev/null +++ b/web/app/api/admin/investigate/test/route.ts @@ -0,0 +1,87 @@ +/** + * /api/admin/investigate/test — admin-only trigger for a synthetic + * investigation job. Gated by middleware (W0-F1: /api/admin/* → 404 unless + * profile.role==='admin'). + * + * POST { kind?: 'evidence_chain', doc_id, chunks?: string[] } — INSERT a row + * in public.investigation_jobs; the disclosure-investigator container's + * LISTEN handler picks it up, dispatches Locard, and updates status. + * GET ?job_id=... — read the latest state for polling. + */ +import { NextResponse } from "next/server"; +import { pgQuery } from "@/lib/retrieval/db"; +import { withRequest } from "@/lib/logger"; + +export const runtime = "nodejs"; +export const dynamic = "force-dynamic"; + +interface JobRow { + job_id: string; + kind: string; + payload: unknown; + status: string; + worker_id: string | null; + started_at: string | null; + finished_at: string | null; + outputs: unknown; + error: string | null; + created_at: string; +} + +export async function POST(request: Request) { + const log = withRequest(request); + let body: { kind?: string; doc_id?: string; chunks?: string[]; claim?: string }; + try { + body = await request.json(); + } catch { + return NextResponse.json({ error: "invalid_json" }, { status: 400 }); + } + const kind = body.kind ?? "evidence_chain"; + const doc_id = body.doc_id; + if (!doc_id) return NextResponse.json({ error: "doc_id_required" }, { status: 400 }); + + const payload: Record = { doc_id }; + if (body.chunks?.length) payload.chunks = body.chunks; + if (body.claim) payload.claim = body.claim; + + // Triggered_by carries the admin email so the audit ties back to a person. + // Middleware already validated the admin role; we just label here. + const triggered_by = `user:${request.headers.get("x-user-email") ?? "admin"}`; + + try { + const rows = await pgQuery<{ job_id: string }>( + `INSERT INTO public.investigation_jobs (kind, payload, triggered_by, status) + VALUES ($1, $2::jsonb, $3, 'queued') + RETURNING job_id`, + [kind, JSON.stringify(payload), triggered_by], + ); + const job_id = rows[0]?.job_id; + log.info({ event: "investigation_job_created", kind, doc_id, job_id }, "investigation job queued"); + return NextResponse.json({ + job_id, + kind, + status: "queued", + poll_url: `/api/admin/investigate/test?job_id=${job_id}`, + }); + } catch (e) { + return NextResponse.json({ error: "db_unavailable", message: (e as Error).message }, { status: 503 }); + } +} + +export async function GET(request: Request) { + const url = new URL(request.url); + const job_id = url.searchParams.get("job_id"); + if (!job_id) return NextResponse.json({ error: "job_id_required" }, { status: 400 }); + try { + const rows = await pgQuery( + `SELECT job_id, kind, payload, status, worker_id, started_at, finished_at, + outputs, error, created_at + FROM public.investigation_jobs WHERE job_id = $1`, + [job_id], + ); + if (rows.length === 0) return NextResponse.json({ error: "not_found" }, { status: 404 }); + return NextResponse.json(rows[0]); + } catch (e) { + return NextResponse.json({ error: "db_unavailable", message: (e as Error).message }, { status: 503 }); + } +}