disclosure-bureau/investigator-runtime/src/orchestrator.ts

/**
 * orchestrator.ts — chief-detective. Decides which detective runs for a job.
 *
 * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
 * registry as we build each detective in W3.5+. Unknown kinds fail the job
 * loudly so we don't quietly drop work.
 */
import { audit } from "./lib/audit";
import { query } from "./lib/pg";
import { runLocard, type LocardTask } from "./detectives/locard";
import { runHolmes, type HolmesTask } from "./detectives/holmes";
import { runDupin, type DupinTask } from "./detectives/dupin";
import { runSchneier, type SchneierTask } from "./detectives/schneier";
import { runPoirot, type PoirotTask } from "./detectives/poirot";
import { runTaleb, type TalebTask } from "./detectives/taleb";
import { runTetlock, type TetlockTask } from "./detectives/tetlock";
import { runCaseWriter, type CaseWriterTask } from "./detectives/case_writer";

export interface InvestigationJob {
  job_id: string;
  kind: string;
  payload: Record<string, unknown>;
  triggered_by: string | null;
}

export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
  await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });

  let outputs: unknown[] = [];
  try {
    switch (job.kind) {
      case "evidence_chain": {
        // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
        // the first 20 substantive chunks of the doc if not provided.
        const docId = String(job.payload.doc_id ?? "");
        if (!docId) throw new Error("evidence_chain requires payload.doc_id");
        const chunkIds = Array.isArray(job.payload.chunks)
          ? (job.payload.chunks as string[])
          : await pickEvidenceCandidates(docId, 5);
        if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
        for (const chunk_id of chunkIds) {
          const task: LocardTask = {
            job_id: job.job_id,
            doc_id: docId,
            chunk_id,
            claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
          };
          try {
            const r = await runLocard(task);
            outputs.push({ chunk_id, ...r });
          } catch (e) {
            outputs.push({ chunk_id, error: (e as Error).message });
          }
        }
        break;
      }
      case "hypothesis_tournament": {
        // Payload: { question, question_pt_br?, doc_id?, lang?, context_chunks? }
        const question = String(job.payload.question ?? "").trim();
        if (!question) throw new Error("hypothesis_tournament requires payload.question");
        const task: HolmesTask = {
          job_id: job.job_id,
          question,
          question_pt_br: typeof job.payload.question_pt_br === "string"
            ? job.payload.question_pt_br.trim() : undefined,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runHolmes(task);
        if ("skipped" in r) {
          outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
        } else {
          for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
        }
        break;
      }
      case "case_report": {
        // Payload: { topic, topic_pt_br?, doc_id?, slug?, lang? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("case_report requires payload.topic");
        const task: CaseWriterTask = {
          job_id: job.job_id, topic,
          topic_pt_br: typeof job.payload.topic_pt_br === "string"
            ? job.payload.topic_pt_br.trim() : undefined,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          slug: typeof job.payload.slug === "string" ? job.payload.slug : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
        };
        const r = await runCaseWriter(task);
        if ("skipped" in r) {
          outputs.push({ kind: "case_report", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "case_report", ...r });
        }
        break;
      }
      case "calibrate_hypothesis": {
        // Payload: { hypothesis_id }
        const hyp = String(job.payload.hypothesis_id ?? "").trim();
        if (!hyp) throw new Error("calibrate_hypothesis requires payload.hypothesis_id");
        const task: TetlockTask = {
          job_id: job.job_id,
          hypothesis_id: hyp,
          lang: job.payload.lang === "en" ? "en" : "pt",
        };
        const r = await runTetlock(task);
        if ("skipped" in r) {
          outputs.push({ kind: "calibrate_hypothesis", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "calibration", ...r });
        }
        break;
      }
      case "outlier_scan": {
        // Payload: { topic, doc_id?, lang?, context_chunks? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("outlier_scan requires payload.topic");
        const task: TalebTask = {
          job_id: job.job_id, topic,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runTaleb(task);
        if ("skipped" in r) {
          outputs.push({ kind: "outlier_scan", skipped: true, reason: r.reason });
        } else {
          for (const o of r.outliers) outputs.push({ kind: "outlier", ...o });
        }
        break;
      }
      case "witness_analysis": {
        // Payload: { person_id } OR { person_entity_pk }
        const person_id = typeof job.payload.person_id === "string" ? job.payload.person_id.trim() : undefined;
        const person_entity_pk = typeof job.payload.person_entity_pk === "number"
          ? job.payload.person_entity_pk : undefined;
        if (!person_id && !person_entity_pk) {
          throw new Error("witness_analysis requires payload.person_id or person_entity_pk");
        }
        const task: PoirotTask = {
          job_id: job.job_id,
          person_id,
          person_entity_pk,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runPoirot(task);
        if ("skipped" in r) {
          outputs.push({ kind: "witness_analysis", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "witness_analysis", ...r });
        }
        break;
      }
      case "red_team_review": {
        // Payload: { hypothesis_id }
        const hyp = String(job.payload.hypothesis_id ?? "").trim();
        if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
        const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
        const r = await runSchneier(task);
        if ("skipped" in r) {
          outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "red_team_review", ...r });
        }
        break;
      }
      case "contradiction_scan": {
        // Payload: { topic, doc_id?, lang?, context_chunks? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("contradiction_scan requires payload.topic");
        const task: DupinTask = {
          job_id: job.job_id,
          topic,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runDupin(task);
        if ("skipped" in r) {
          outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
        } else {
          for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
        }
        break;
      }

      default:
        throw new Error(`unknown_kind: ${job.kind}`);
    }

    // Status reflects reality: if every per-item attempt errored we mark
    // the job failed (so the UI doesn't say "complete" when nothing useful
    // was produced); if at least one succeeded we keep `complete` with the
    // mixed outputs payload.
    const allErrors = outputs.length > 0 && outputs.every(
      (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
    );
    const summary = (() => {
      if (!allErrors) return null;
      // First few error messages, surfaced to the user via the jobs table.
      return outputs
        .map((o) => (o as { error?: string }).error)
        .filter((e): e is string => Boolean(e))
        .slice(0, 3)
        .join(" | ");
    })();
    await query(
      `UPDATE public.investigation_jobs
          SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
        WHERE job_id = $4`,
      [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
    );
    await audit({
      event: allErrors ? "job_failed_all_items" : "job_completed",
      job_id: job.job_id,
      kind: job.kind,
      n_outputs: outputs.length,
      ...(summary ? { summary } : {}),
    });
  } catch (e) {
    const err = (e as Error).message;
    await query(
      `UPDATE public.investigation_jobs
          SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
        WHERE job_id = $3`,
      [err, JSON.stringify(outputs), job.job_id],
    );
    await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
  }
}

/**
 * Pick a small set of chunks that are likely to yield evidence — body
 * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
 * Sonnet's anomaly flag first so we extract the most interesting first.
 */
async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
  const rows = await query<{ chunk_id: string }>(
    `SELECT chunk_id
       FROM public.chunks
      WHERE doc_id = $1
        AND is_searchable
        AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
      ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
      LIMIT $2`,
    [doc_id, limit],
  );
  return rows.map((r) => r.chunk_id);
}
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
+								/**
 								 * orchestrator.ts — chief-detective. Decides which detective runs for a job.
 								 *
 								 * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
 								 * registry as we build each detective in W3.5+. Unknown kinds fail the job
 								 * loudly so we don't quietly drop work.
 								 */
 								import { audit } from "./lib/audit";
 								import { query } from "./lib/pg";
 								import { runLocard, type LocardTask } from "./detectives/locard";
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								import { runHolmes, type HolmesTask } from "./detectives/holmes";
-												W3.7: Dupin contradiction-scan detective + UI integration

Adds the third AI detective in the Investigation Bureau runtime: C. Auguste
Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks
that cannot both be true under any ordinary reading.

Runtime:
  - prompts/dupin.md — discipline (no contradiction without ≥2 distinct
    chunk_ids; reject same-vocabulary near-misses; FEW high-confidence
    over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent)
  - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than
    Holmes because contradictions emerge from comparing dispersed
    claims), strict JSON-array parsing, AT MOST 3 contradictions per call
  - src/tools/write_contradiction.ts — validates topic + ≥2 positions
    drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup
    (rejects positions citing unknown chunks), INSERTs into
    public.contradictions + writes case/contradictions/R-NNNN.md
  - orchestrator: new `contradiction_scan` kind dispatching to runDupin;
    payload { topic, doc_id?, lang?, context_chunks? }

Chat + UI:
  - request_investigation gains kind=contradiction_scan + topic arg;
    triggered detective auto-resolves to dupin
  - chat-bubble inline card renders dupin in orange (#ff8a4d) to
    distinguish from holmes (cyan) and locard (green)
  - /jobs/[id] page swaps title + subtitle + tone per detective;
    "Question" label becomes "Topic" for contradiction_scan
  - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces
    contradiction_ids
  - job-status-poller renders ContradictionCard: topic + N positions
    (verbatim statements quoted, stance label optional, link to source
    chunk) + optional notes panel, with resolution_status badge
    (open/resolved/irreconcilable)

R-NNNN shares the contradiction_id_seq slot with relation per
CLAUDE.md naming — same conceptual class (a connection between two
pieces of evidence in tension).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:34:04 +00:00
+								import { runDupin, type DupinTask } from "./detectives/dupin";
-												W3.8: Schneier red-team detective + /h/[hypothesisId] dossier page

Adds the fourth AI detective in the Investigation Bureau runtime: Bruce
Schneier, who attacks an existing hypothesis as a red-team operator.

Runtime:
  - prompts/schneier.md — discipline (don't disprove, just attack;
    structured output with hidden_assumptions, failure_modes,
    alternative_explanations, recommended_tests, verdict_one_sentence;
    severity ∈ {low, medium, high}; emit INSUFFICIENT_HYPOTHESIS when
    the input is too thin)
  - src/detectives/schneier.ts — reads the hypothesis row + evidence
    chain (joined via evidence_refs FK), feeds Claude with the
    arguments + verbatim quotes, parses strict JSON object
  - src/tools/write_red_team_review.ts — UPDATEs hypotheses.reviewed_by
    + updated_at; APPENDS (or replaces if re-reviewed) a structured
    "## Red-team review (Schneier · X severity)" section to
    case/hypotheses/H-NNNN.md. Caps each list at 5 entries × 240 chars,
    validates verdict ≤ 280 chars.
  - orchestrator: new `red_team_review` kind dispatching to runSchneier

Chat + UI:
  - request_investigation gains kind=red_team_review + hypothesis_id arg
    (validated against H-NNNN regex); detective auto-resolves to schneier
  - chat-bubble inline card paints Schneier in red (#ff3344)
  - /jobs/[id] page swaps title/subtitle/tone per detective; the
    "Question" label becomes "Hypothesis under attack" for red_team_review

New /h/[hypothesisId] page (hypothesis dossier):
  - Server-rendered from public.hypotheses + public.evidence (joined
    via evidence_refs FK + chunk lookup)
  - Header: ID + creator + reviewer (highlighted when Schneier has
    visited), position as headline, question subtitle, Tetlock band
  - Prior + posterior bars with Δ-delta indicator
  - Argument grid: argument_for (green) vs argument_against (pink)
    side-by-side with [[wiki-link]] auto-linking to source chunks
  - Evidence chain: each E-NNNN with Grade A/B/C badge, verbatim
    blockquote, link to source page
  - Red-team review panel: parses the markdown section in the case
    file (severity badge, verdict, 4 bullet panels for
    hidden_assumptions / failure_modes / alternative_explanations /
    recommended_tests). Empty state when not yet reviewed.

RedTeamRequestButton client component + POST /api/h/[id]/red-team —
authenticated user can trigger Schneier in one click; UI swaps to
"acompanhar" link to /jobs/[id] once queued.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:48:12 +00:00
+								import { runSchneier, type SchneierTask } from "./detectives/schneier";
-												W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer

Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI
subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY
queue, sharing search.ts hybridSearch and writer-side validators that
gate writes against schema + FK.

New detectives:

  Poirot (witness_analysis)
    - prompts/poirot.md — credibility / access / bias / corroboration /
      verdict; uses entity_mentions JOIN chunks to pull 12 chunks per
      person; resolves corroboration_refs chunk_ids defensively (accepts
      bare cNNNN even when the model emits pNNN/cNNNN).
    - INSERT into public.witnesses with W-NNNN naming.
    - Tone: purple (#9b5de5).

  Taleb (outlier_scan)
    - prompts/taleb.md — "surprise is relative to a model"; at most 3
      outliers; each requires explicit dominant_model + why_surprising +
      what_it_implies; fan-out into public.gaps with scope.kind="outlier".
    - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens
      to corpus if hits < 3).
    - Tone: yellow (#ffd23f).

  Tetlock (calibrate_hypothesis)
    - prompts/tetlock.md — honest Bayesian update; emits new_posterior +
      Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}.
    - write_calibration UPDATEs public.hypotheses + APPENDS a
      "## Calibration history" section to the H-NNNN.md case file
      (calibration is append-only — each datapoint matters). Posterior
      band auto-corrected to match Tetlock thresholds.
    - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with |Δ|<0.005 only
      touches updated_at + reviewed_by.
    - Tone: teal (#26d4cc).

  Case-Writer (case_report)
    - prompts/case-writer.md — Dr. Watson assembles all artefacts
      (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative.
      ILIKE filter on topic; doc_id optional scope.
    - Larger budget cap (≥ $0.50) + longer timeout for prose generation.
    - Writes case/reports/<slug>.md with frontmatter (topic + counts);
      no DB table for v0.
    - New page /c/[slug] renders the report via MarkdownBody + stat chips.
    - Tone: gold (#e0c080).

Hardening across the bureau:
  - Sentinel parsing now accepts backticked AND prose-trailing forms
    (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier
    INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb
    NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer
    INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model
    refuses honestly but the runtime treated it as a parse error
    (observed live with Poirot+Hoover identifying the DIRECTOR
    false-positive disambiguation issue in entity_mentions).

Chat tool extensions (web/lib/chat/tools.ts):
  - request_investigation now accepts 7 kinds. Each routes to its
    detective with appropriate validation (hypothesis_id regex,
    person_id kebab-case, topic non-empty, doc_id for evidence_chain).
  - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s,
    Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks.

UI integration:
  - chat-bubble inline card paints each detective in its tone color.
  - /jobs/[id] page header swaps name/subtitle/tone per detective;
    question label adapts ("Topic" / "Hypothesis under attack" /
    "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis
    under recalibration" / "Case to assemble").
  - job-status-poller renders: case-report link card (gold), outlier
    cards (yellow), witness cards (purple) — alongside existing
    hypothesis, evidence, contradiction cards.
  - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name)
    + gaps (with scope JSONB).
  - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders
    with MarkdownBody, frontmatter parsed for stat chips.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 01:11:39 +00:00
+								import { runPoirot, type PoirotTask } from "./detectives/poirot";
 								import { runTaleb, type TalebTask } from "./detectives/taleb";
 								import { runTetlock, type TetlockTask } from "./detectives/tetlock";
 								import { runCaseWriter, type CaseWriterTask } from "./detectives/case_writer";
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
 								export interface InvestigationJob {
 								  job_id: string;
 								  kind: string;
 								  payload: Record<string, unknown>;
 								  triggered_by: string | null;
 								}
 								export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
 								  await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });
 								  let outputs: unknown[] = [];
 								  try {
 								    switch (job.kind) {
 								      case "evidence_chain": {
 								        // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
 								        // the first 20 substantive chunks of the doc if not provided.
 								        const docId = String(job.payload.doc_id ?? "");
 								        if (!docId) throw new Error("evidence_chain requires payload.doc_id");
 								        const chunkIds = Array.isArray(job.payload.chunks)
 								          ? (job.payload.chunks as string[])
 								          : await pickEvidenceCandidates(docId, 5);
 								        if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
 								        for (const chunk_id of chunkIds) {
 								          const task: LocardTask = {
 								            job_id: job.job_id,
 								            doc_id: docId,
 								            chunk_id,
 								            claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
 								          };
 								          try {
 								            const r = await runLocard(task);
 								            outputs.push({ chunk_id, ...r });
 								          } catch (e) {
 								            outputs.push({ chunk_id, error: (e as Error).message });
 								          }
 								        }
 								        break;
 								      }
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								      case "hypothesis_tournament": {
-												W4: bilingual EN + PT-BR Investigation Bureau (CLAUDE.md §3 contract)

User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.

Migration 0007 (apply as supabase_admin):
  - public.hypotheses    +question_pt_br, +position_pt_br,
                         +argument_for_pt_br, +argument_against_pt_br
  - public.contradictions +topic_pt_br, +notes_pt_br
  - public.witnesses     +access_to_event_pt_br, +bias_notes_pt_br,
                         +verdict_pt_br
  - public.gaps          +description_pt_br, +suggested_next_move_pt_br
  - public.evidence: unchanged (verbatim_excerpt stays source-language)
  - JSONB siblings inside contradictions.chunks + gaps.scope handled at
    runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
    why_surprising_pt_br, what_it_implies_pt_br).

Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
  - Output protocol section names every EN field + its _pt_br sibling
  - "Bilingual is mandatory" warning in the task instruction
  - Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
    INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
    NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
  - Schneier: parallel arrays — hidden_assumptions[i] matches
    hidden_assumptions_pt_br[i], lengths must match
  - Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body

Writer-side validation (all 7 tools):
  - Reject INSERT if PT-BR sibling missing when EN field is set
  - Persist both languages atomically in one INSERT (no half-updates)
  - Markdown renderers write adjacent EN+PT-BR sections in case files
    (## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)

Detective parse layer (all 7 detectives):
  - Coerce both keys from JSON output
  - "incomplete_bilingual_*" skip reason when either side missing
  - Defensive: PT-BR fields trimmed + length-capped same as EN

Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.

Web (UI):
  - /api/jobs/[id] hydrates _pt_br siblings from pg
  - job-status-poller HypothesisCard: PT-BR primary, EN in <details>
    fallback when both exist
  - ContradictionCard: PT-BR statement primary + secondary EN quote
  - WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
  - GapCard: PT-BR title/why/implies primary
  - /bureau hub: SELECTs both columns, renders PT-BR primary
  - /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
    fallback when both exist
  - BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
    primary
  - DocBureauPanel /d/[doc]: same primary-PT-BR pattern
  - New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
    for future locale-driven switching when both languages are equally
    full; current rule is PT-BR-first since the user is brasileiro)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 15:02:59 +00:00
+								        // Payload: { question, question_pt_br?, doc_id?, lang?, context_chunks? }
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								        const question = String(job.payload.question ?? "").trim();
 								        if (!question) throw new Error("hypothesis_tournament requires payload.question");
 								        const task: HolmesTask = {
 								          job_id: job.job_id,
 								          question,
-												W4: bilingual EN + PT-BR Investigation Bureau (CLAUDE.md §3 contract)

User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.

Migration 0007 (apply as supabase_admin):
  - public.hypotheses    +question_pt_br, +position_pt_br,
                         +argument_for_pt_br, +argument_against_pt_br
  - public.contradictions +topic_pt_br, +notes_pt_br
  - public.witnesses     +access_to_event_pt_br, +bias_notes_pt_br,
                         +verdict_pt_br
  - public.gaps          +description_pt_br, +suggested_next_move_pt_br
  - public.evidence: unchanged (verbatim_excerpt stays source-language)
  - JSONB siblings inside contradictions.chunks + gaps.scope handled at
    runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
    why_surprising_pt_br, what_it_implies_pt_br).

Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
  - Output protocol section names every EN field + its _pt_br sibling
  - "Bilingual is mandatory" warning in the task instruction
  - Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
    INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
    NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
  - Schneier: parallel arrays — hidden_assumptions[i] matches
    hidden_assumptions_pt_br[i], lengths must match
  - Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body

Writer-side validation (all 7 tools):
  - Reject INSERT if PT-BR sibling missing when EN field is set
  - Persist both languages atomically in one INSERT (no half-updates)
  - Markdown renderers write adjacent EN+PT-BR sections in case files
    (## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)

Detective parse layer (all 7 detectives):
  - Coerce both keys from JSON output
  - "incomplete_bilingual_*" skip reason when either side missing
  - Defensive: PT-BR fields trimmed + length-capped same as EN

Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.

Web (UI):
  - /api/jobs/[id] hydrates _pt_br siblings from pg
  - job-status-poller HypothesisCard: PT-BR primary, EN in <details>
    fallback when both exist
  - ContradictionCard: PT-BR statement primary + secondary EN quote
  - WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
  - GapCard: PT-BR title/why/implies primary
  - /bureau hub: SELECTs both columns, renders PT-BR primary
  - /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
    fallback when both exist
  - BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
    primary
  - DocBureauPanel /d/[doc]: same primary-PT-BR pattern
  - New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
    for future locale-driven switching when both languages are equally
    full; current rule is PT-BR-first since the user is brasileiro)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 15:02:59 +00:00
+								          question_pt_br: typeof job.payload.question_pt_br === "string"
 								            ? job.payload.question_pt_br.trim() : undefined,
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runHolmes(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
 								        } else {
 								          for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
 								        }
 								        break;
 								      }
-												W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer

Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI
subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY
queue, sharing search.ts hybridSearch and writer-side validators that
gate writes against schema + FK.

New detectives:

  Poirot (witness_analysis)
    - prompts/poirot.md — credibility / access / bias / corroboration /
      verdict; uses entity_mentions JOIN chunks to pull 12 chunks per
      person; resolves corroboration_refs chunk_ids defensively (accepts
      bare cNNNN even when the model emits pNNN/cNNNN).
    - INSERT into public.witnesses with W-NNNN naming.
    - Tone: purple (#9b5de5).

  Taleb (outlier_scan)
    - prompts/taleb.md — "surprise is relative to a model"; at most 3
      outliers; each requires explicit dominant_model + why_surprising +
      what_it_implies; fan-out into public.gaps with scope.kind="outlier".
    - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens
      to corpus if hits < 3).
    - Tone: yellow (#ffd23f).

  Tetlock (calibrate_hypothesis)
    - prompts/tetlock.md — honest Bayesian update; emits new_posterior +
      Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}.
    - write_calibration UPDATEs public.hypotheses + APPENDS a
      "## Calibration history" section to the H-NNNN.md case file
      (calibration is append-only — each datapoint matters). Posterior
      band auto-corrected to match Tetlock thresholds.
    - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with |Δ|<0.005 only
      touches updated_at + reviewed_by.
    - Tone: teal (#26d4cc).

  Case-Writer (case_report)
    - prompts/case-writer.md — Dr. Watson assembles all artefacts
      (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative.
      ILIKE filter on topic; doc_id optional scope.
    - Larger budget cap (≥ $0.50) + longer timeout for prose generation.
    - Writes case/reports/<slug>.md with frontmatter (topic + counts);
      no DB table for v0.
    - New page /c/[slug] renders the report via MarkdownBody + stat chips.
    - Tone: gold (#e0c080).

Hardening across the bureau:
  - Sentinel parsing now accepts backticked AND prose-trailing forms
    (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier
    INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb
    NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer
    INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model
    refuses honestly but the runtime treated it as a parse error
    (observed live with Poirot+Hoover identifying the DIRECTOR
    false-positive disambiguation issue in entity_mentions).

Chat tool extensions (web/lib/chat/tools.ts):
  - request_investigation now accepts 7 kinds. Each routes to its
    detective with appropriate validation (hypothesis_id regex,
    person_id kebab-case, topic non-empty, doc_id for evidence_chain).
  - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s,
    Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks.

UI integration:
  - chat-bubble inline card paints each detective in its tone color.
  - /jobs/[id] page header swaps name/subtitle/tone per detective;
    question label adapts ("Topic" / "Hypothesis under attack" /
    "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis
    under recalibration" / "Case to assemble").
  - job-status-poller renders: case-report link card (gold), outlier
    cards (yellow), witness cards (purple) — alongside existing
    hypothesis, evidence, contradiction cards.
  - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name)
    + gaps (with scope JSONB).
  - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders
    with MarkdownBody, frontmatter parsed for stat chips.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 01:11:39 +00:00
+								      case "case_report": {
-												W4: bilingual EN + PT-BR Investigation Bureau (CLAUDE.md §3 contract)

User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.

Migration 0007 (apply as supabase_admin):
  - public.hypotheses    +question_pt_br, +position_pt_br,
                         +argument_for_pt_br, +argument_against_pt_br
  - public.contradictions +topic_pt_br, +notes_pt_br
  - public.witnesses     +access_to_event_pt_br, +bias_notes_pt_br,
                         +verdict_pt_br
  - public.gaps          +description_pt_br, +suggested_next_move_pt_br
  - public.evidence: unchanged (verbatim_excerpt stays source-language)
  - JSONB siblings inside contradictions.chunks + gaps.scope handled at
    runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
    why_surprising_pt_br, what_it_implies_pt_br).

Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
  - Output protocol section names every EN field + its _pt_br sibling
  - "Bilingual is mandatory" warning in the task instruction
  - Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
    INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
    NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
  - Schneier: parallel arrays — hidden_assumptions[i] matches
    hidden_assumptions_pt_br[i], lengths must match
  - Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body

Writer-side validation (all 7 tools):
  - Reject INSERT if PT-BR sibling missing when EN field is set
  - Persist both languages atomically in one INSERT (no half-updates)
  - Markdown renderers write adjacent EN+PT-BR sections in case files
    (## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)

Detective parse layer (all 7 detectives):
  - Coerce both keys from JSON output
  - "incomplete_bilingual_*" skip reason when either side missing
  - Defensive: PT-BR fields trimmed + length-capped same as EN

Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.

Web (UI):
  - /api/jobs/[id] hydrates _pt_br siblings from pg
  - job-status-poller HypothesisCard: PT-BR primary, EN in <details>
    fallback when both exist
  - ContradictionCard: PT-BR statement primary + secondary EN quote
  - WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
  - GapCard: PT-BR title/why/implies primary
  - /bureau hub: SELECTs both columns, renders PT-BR primary
  - /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
    fallback when both exist
  - BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
    primary
  - DocBureauPanel /d/[doc]: same primary-PT-BR pattern
  - New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
    for future locale-driven switching when both languages are equally
    full; current rule is PT-BR-first since the user is brasileiro)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 15:02:59 +00:00
+								        // Payload: { topic, topic_pt_br?, doc_id?, slug?, lang? }
-												W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer

Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI
subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY
queue, sharing search.ts hybridSearch and writer-side validators that
gate writes against schema + FK.

New detectives:

  Poirot (witness_analysis)
    - prompts/poirot.md — credibility / access / bias / corroboration /
      verdict; uses entity_mentions JOIN chunks to pull 12 chunks per
      person; resolves corroboration_refs chunk_ids defensively (accepts
      bare cNNNN even when the model emits pNNN/cNNNN).
    - INSERT into public.witnesses with W-NNNN naming.
    - Tone: purple (#9b5de5).

  Taleb (outlier_scan)
    - prompts/taleb.md — "surprise is relative to a model"; at most 3
      outliers; each requires explicit dominant_model + why_surprising +
      what_it_implies; fan-out into public.gaps with scope.kind="outlier".
    - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens
      to corpus if hits < 3).
    - Tone: yellow (#ffd23f).

  Tetlock (calibrate_hypothesis)
    - prompts/tetlock.md — honest Bayesian update; emits new_posterior +
      Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}.
    - write_calibration UPDATEs public.hypotheses + APPENDS a
      "## Calibration history" section to the H-NNNN.md case file
      (calibration is append-only — each datapoint matters). Posterior
      band auto-corrected to match Tetlock thresholds.
    - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with |Δ|<0.005 only
      touches updated_at + reviewed_by.
    - Tone: teal (#26d4cc).

  Case-Writer (case_report)
    - prompts/case-writer.md — Dr. Watson assembles all artefacts
      (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative.
      ILIKE filter on topic; doc_id optional scope.
    - Larger budget cap (≥ $0.50) + longer timeout for prose generation.
    - Writes case/reports/<slug>.md with frontmatter (topic + counts);
      no DB table for v0.
    - New page /c/[slug] renders the report via MarkdownBody + stat chips.
    - Tone: gold (#e0c080).

Hardening across the bureau:
  - Sentinel parsing now accepts backticked AND prose-trailing forms
    (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier
    INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb
    NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer
    INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model
    refuses honestly but the runtime treated it as a parse error
    (observed live with Poirot+Hoover identifying the DIRECTOR
    false-positive disambiguation issue in entity_mentions).

Chat tool extensions (web/lib/chat/tools.ts):
  - request_investigation now accepts 7 kinds. Each routes to its
    detective with appropriate validation (hypothesis_id regex,
    person_id kebab-case, topic non-empty, doc_id for evidence_chain).
  - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s,
    Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks.

UI integration:
  - chat-bubble inline card paints each detective in its tone color.
  - /jobs/[id] page header swaps name/subtitle/tone per detective;
    question label adapts ("Topic" / "Hypothesis under attack" /
    "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis
    under recalibration" / "Case to assemble").
  - job-status-poller renders: case-report link card (gold), outlier
    cards (yellow), witness cards (purple) — alongside existing
    hypothesis, evidence, contradiction cards.
  - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name)
    + gaps (with scope JSONB).
  - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders
    with MarkdownBody, frontmatter parsed for stat chips.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 01:11:39 +00:00
+								        const topic = String(job.payload.topic ?? "").trim();
 								        if (!topic) throw new Error("case_report requires payload.topic");
 								        const task: CaseWriterTask = {
 								          job_id: job.job_id, topic,
-												W4: bilingual EN + PT-BR Investigation Bureau (CLAUDE.md §3 contract)

User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.

Migration 0007 (apply as supabase_admin):
  - public.hypotheses    +question_pt_br, +position_pt_br,
                         +argument_for_pt_br, +argument_against_pt_br
  - public.contradictions +topic_pt_br, +notes_pt_br
  - public.witnesses     +access_to_event_pt_br, +bias_notes_pt_br,
                         +verdict_pt_br
  - public.gaps          +description_pt_br, +suggested_next_move_pt_br
  - public.evidence: unchanged (verbatim_excerpt stays source-language)
  - JSONB siblings inside contradictions.chunks + gaps.scope handled at
    runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
    why_surprising_pt_br, what_it_implies_pt_br).

Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
  - Output protocol section names every EN field + its _pt_br sibling
  - "Bilingual is mandatory" warning in the task instruction
  - Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
    INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
    NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
  - Schneier: parallel arrays — hidden_assumptions[i] matches
    hidden_assumptions_pt_br[i], lengths must match
  - Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body

Writer-side validation (all 7 tools):
  - Reject INSERT if PT-BR sibling missing when EN field is set
  - Persist both languages atomically in one INSERT (no half-updates)
  - Markdown renderers write adjacent EN+PT-BR sections in case files
    (## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)

Detective parse layer (all 7 detectives):
  - Coerce both keys from JSON output
  - "incomplete_bilingual_*" skip reason when either side missing
  - Defensive: PT-BR fields trimmed + length-capped same as EN

Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.

Web (UI):
  - /api/jobs/[id] hydrates _pt_br siblings from pg
  - job-status-poller HypothesisCard: PT-BR primary, EN in <details>
    fallback when both exist
  - ContradictionCard: PT-BR statement primary + secondary EN quote
  - WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
  - GapCard: PT-BR title/why/implies primary
  - /bureau hub: SELECTs both columns, renders PT-BR primary
  - /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
    fallback when both exist
  - BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
    primary
  - DocBureauPanel /d/[doc]: same primary-PT-BR pattern
  - New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
    for future locale-driven switching when both languages are equally
    full; current rule is PT-BR-first since the user is brasileiro)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 15:02:59 +00:00
+								          topic_pt_br: typeof job.payload.topic_pt_br === "string"
 								            ? job.payload.topic_pt_br.trim() : undefined,
-												W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer

Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI
subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY
queue, sharing search.ts hybridSearch and writer-side validators that
gate writes against schema + FK.

New detectives:

  Poirot (witness_analysis)
    - prompts/poirot.md — credibility / access / bias / corroboration /
      verdict; uses entity_mentions JOIN chunks to pull 12 chunks per
      person; resolves corroboration_refs chunk_ids defensively (accepts
      bare cNNNN even when the model emits pNNN/cNNNN).
    - INSERT into public.witnesses with W-NNNN naming.
    - Tone: purple (#9b5de5).

  Taleb (outlier_scan)
    - prompts/taleb.md — "surprise is relative to a model"; at most 3
      outliers; each requires explicit dominant_model + why_surprising +
      what_it_implies; fan-out into public.gaps with scope.kind="outlier".
    - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens
      to corpus if hits < 3).
    - Tone: yellow (#ffd23f).

  Tetlock (calibrate_hypothesis)
    - prompts/tetlock.md — honest Bayesian update; emits new_posterior +
      Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}.
    - write_calibration UPDATEs public.hypotheses + APPENDS a
      "## Calibration history" section to the H-NNNN.md case file
      (calibration is append-only — each datapoint matters). Posterior
      band auto-corrected to match Tetlock thresholds.
    - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with |Δ|<0.005 only
      touches updated_at + reviewed_by.
    - Tone: teal (#26d4cc).

  Case-Writer (case_report)
    - prompts/case-writer.md — Dr. Watson assembles all artefacts
      (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative.
      ILIKE filter on topic; doc_id optional scope.
    - Larger budget cap (≥ $0.50) + longer timeout for prose generation.
    - Writes case/reports/<slug>.md with frontmatter (topic + counts);
      no DB table for v0.
    - New page /c/[slug] renders the report via MarkdownBody + stat chips.
    - Tone: gold (#e0c080).

Hardening across the bureau:
  - Sentinel parsing now accepts backticked AND prose-trailing forms
    (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier
    INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb
    NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer
    INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model
    refuses honestly but the runtime treated it as a parse error
    (observed live with Poirot+Hoover identifying the DIRECTOR
    false-positive disambiguation issue in entity_mentions).

Chat tool extensions (web/lib/chat/tools.ts):
  - request_investigation now accepts 7 kinds. Each routes to its
    detective with appropriate validation (hypothesis_id regex,
    person_id kebab-case, topic non-empty, doc_id for evidence_chain).
  - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s,
    Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks.

UI integration:
  - chat-bubble inline card paints each detective in its tone color.
  - /jobs/[id] page header swaps name/subtitle/tone per detective;
    question label adapts ("Topic" / "Hypothesis under attack" /
    "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis
    under recalibration" / "Case to assemble").
  - job-status-poller renders: case-report link card (gold), outlier
    cards (yellow), witness cards (purple) — alongside existing
    hypothesis, evidence, contradiction cards.
  - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name)
    + gaps (with scope JSONB).
  - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders
    with MarkdownBody, frontmatter parsed for stat chips.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 01:11:39 +00:00
+								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          slug: typeof job.payload.slug === "string" ? job.payload.slug : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								        };
 								        const r = await runCaseWriter(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "case_report", skipped: true, reason: r.reason });
 								        } else {
 								          outputs.push({ kind: "case_report", ...r });
 								        }
 								        break;
 								      }
 								      case "calibrate_hypothesis": {
 								        // Payload: { hypothesis_id }
 								        const hyp = String(job.payload.hypothesis_id ?? "").trim();
 								        if (!hyp) throw new Error("calibrate_hypothesis requires payload.hypothesis_id");
 								        const task: TetlockTask = {
 								          job_id: job.job_id,
 								          hypothesis_id: hyp,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								        };
 								        const r = await runTetlock(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "calibrate_hypothesis", skipped: true, reason: r.reason });
 								        } else {
 								          outputs.push({ kind: "calibration", ...r });
 								        }
 								        break;
 								      }
 								      case "outlier_scan": {
 								        // Payload: { topic, doc_id?, lang?, context_chunks? }
 								        const topic = String(job.payload.topic ?? "").trim();
 								        if (!topic) throw new Error("outlier_scan requires payload.topic");
 								        const task: TalebTask = {
 								          job_id: job.job_id, topic,
 								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runTaleb(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "outlier_scan", skipped: true, reason: r.reason });
 								        } else {
 								          for (const o of r.outliers) outputs.push({ kind: "outlier", ...o });
 								        }
 								        break;
 								      }
 								      case "witness_analysis": {
 								        // Payload: { person_id } OR { person_entity_pk }
 								        const person_id = typeof job.payload.person_id === "string" ? job.payload.person_id.trim() : undefined;
 								        const person_entity_pk = typeof job.payload.person_entity_pk === "number"
 								          ? job.payload.person_entity_pk : undefined;
 								        if (!person_id && !person_entity_pk) {
 								          throw new Error("witness_analysis requires payload.person_id or person_entity_pk");
 								        }
 								        const task: PoirotTask = {
 								          job_id: job.job_id,
 								          person_id,
 								          person_entity_pk,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runPoirot(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "witness_analysis", skipped: true, reason: r.reason });
 								        } else {
 								          outputs.push({ kind: "witness_analysis", ...r });
 								        }
 								        break;
 								      }
-												W3.8: Schneier red-team detective + /h/[hypothesisId] dossier page

Adds the fourth AI detective in the Investigation Bureau runtime: Bruce
Schneier, who attacks an existing hypothesis as a red-team operator.

Runtime:
  - prompts/schneier.md — discipline (don't disprove, just attack;
    structured output with hidden_assumptions, failure_modes,
    alternative_explanations, recommended_tests, verdict_one_sentence;
    severity ∈ {low, medium, high}; emit INSUFFICIENT_HYPOTHESIS when
    the input is too thin)
  - src/detectives/schneier.ts — reads the hypothesis row + evidence
    chain (joined via evidence_refs FK), feeds Claude with the
    arguments + verbatim quotes, parses strict JSON object
  - src/tools/write_red_team_review.ts — UPDATEs hypotheses.reviewed_by
    + updated_at; APPENDS (or replaces if re-reviewed) a structured
    "## Red-team review (Schneier · X severity)" section to
    case/hypotheses/H-NNNN.md. Caps each list at 5 entries × 240 chars,
    validates verdict ≤ 280 chars.
  - orchestrator: new `red_team_review` kind dispatching to runSchneier

Chat + UI:
  - request_investigation gains kind=red_team_review + hypothesis_id arg
    (validated against H-NNNN regex); detective auto-resolves to schneier
  - chat-bubble inline card paints Schneier in red (#ff3344)
  - /jobs/[id] page swaps title/subtitle/tone per detective; the
    "Question" label becomes "Hypothesis under attack" for red_team_review

New /h/[hypothesisId] page (hypothesis dossier):
  - Server-rendered from public.hypotheses + public.evidence (joined
    via evidence_refs FK + chunk lookup)
  - Header: ID + creator + reviewer (highlighted when Schneier has
    visited), position as headline, question subtitle, Tetlock band
  - Prior + posterior bars with Δ-delta indicator
  - Argument grid: argument_for (green) vs argument_against (pink)
    side-by-side with [[wiki-link]] auto-linking to source chunks
  - Evidence chain: each E-NNNN with Grade A/B/C badge, verbatim
    blockquote, link to source page
  - Red-team review panel: parses the markdown section in the case
    file (severity badge, verdict, 4 bullet panels for
    hidden_assumptions / failure_modes / alternative_explanations /
    recommended_tests). Empty state when not yet reviewed.

RedTeamRequestButton client component + POST /api/h/[id]/red-team —
authenticated user can trigger Schneier in one click; UI swaps to
"acompanhar" link to /jobs/[id] once queued.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:48:12 +00:00
+								      case "red_team_review": {
 								        // Payload: { hypothesis_id }
 								        const hyp = String(job.payload.hypothesis_id ?? "").trim();
 								        if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
 								        const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
 								        const r = await runSchneier(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
 								        } else {
 								          outputs.push({ kind: "red_team_review", ...r });
 								        }
 								        break;
 								      }
-												W3.7: Dupin contradiction-scan detective + UI integration

Adds the third AI detective in the Investigation Bureau runtime: C. Auguste
Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks
that cannot both be true under any ordinary reading.

Runtime:
  - prompts/dupin.md — discipline (no contradiction without ≥2 distinct
    chunk_ids; reject same-vocabulary near-misses; FEW high-confidence
    over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent)
  - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than
    Holmes because contradictions emerge from comparing dispersed
    claims), strict JSON-array parsing, AT MOST 3 contradictions per call
  - src/tools/write_contradiction.ts — validates topic + ≥2 positions
    drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup
    (rejects positions citing unknown chunks), INSERTs into
    public.contradictions + writes case/contradictions/R-NNNN.md
  - orchestrator: new `contradiction_scan` kind dispatching to runDupin;
    payload { topic, doc_id?, lang?, context_chunks? }

Chat + UI:
  - request_investigation gains kind=contradiction_scan + topic arg;
    triggered detective auto-resolves to dupin
  - chat-bubble inline card renders dupin in orange (#ff8a4d) to
    distinguish from holmes (cyan) and locard (green)
  - /jobs/[id] page swaps title + subtitle + tone per detective;
    "Question" label becomes "Topic" for contradiction_scan
  - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces
    contradiction_ids
  - job-status-poller renders ContradictionCard: topic + N positions
    (verbatim statements quoted, stance label optional, link to source
    chunk) + optional notes panel, with resolution_status badge
    (open/resolved/irreconcilable)

R-NNNN shares the contradiction_id_seq slot with relation per
CLAUDE.md naming — same conceptual class (a connection between two
pieces of evidence in tension).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:34:04 +00:00
+								      case "contradiction_scan": {
 								        // Payload: { topic, doc_id?, lang?, context_chunks? }
 								        const topic = String(job.payload.topic ?? "").trim();
 								        if (!topic) throw new Error("contradiction_scan requires payload.topic");
 								        const task: DupinTask = {
 								          job_id: job.job_id,
 								          topic,
 								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runDupin(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
 								        } else {
 								          for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
 								        }
 								        break;
 								      }
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
+								      default:
 								        throw new Error(`unknown_kind: ${job.kind}`);
 								    }
 								    // Status reflects reality: if every per-item attempt errored we mark
 								    // the job failed (so the UI doesn't say "complete" when nothing useful
 								    // was produced); if at least one succeeded we keep `complete` with the
 								    // mixed outputs payload.
 								    const allErrors = outputs.length > 0 && outputs.every(
 								      (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
 								    );
 								    const summary = (() => {
 								      if (!allErrors) return null;
 								      // First few error messages, surfaced to the user via the jobs table.
 								      return outputs
 								        .map((o) => (o as { error?: string }).error)
 								        .filter((e): e is string => Boolean(e))
 								        .slice(0, 3)
 								        .join(" | ");
 								    })();
 								    await query(
 								      `UPDATE public.investigation_jobs
 								          SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
 								        WHERE job_id = $4`,
 								      [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
 								    );
 								    await audit({
 								      event: allErrors ? "job_failed_all_items" : "job_completed",
 								      job_id: job.job_id,
 								      kind: job.kind,
 								      n_outputs: outputs.length,
 								      ...(summary ? { summary } : {}),
 								    });
 								  } catch (e) {
 								    const err = (e as Error).message;
 								    await query(
 								      `UPDATE public.investigation_jobs
 								          SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
 								        WHERE job_id = $3`,
 								      [err, JSON.stringify(outputs), job.job_id],
 								    );
 								    await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
 								  }
 								}
 								/**
 								 * Pick a small set of chunks that are likely to yield evidence — body
 								 * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
 								 * Sonnet's anomaly flag first so we extract the most interesting first.
 								 */
 								async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
 								  const rows = await query<{ chunk_id: string }>(
 								    `SELECT chunk_id
 								       FROM public.chunks
 								      WHERE doc_id = $1
 								        AND is_searchable
 								        AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
 								      ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
 								      LIMIT $2`,
 								    [doc_id, limit],
 								  );
 								  return rows.map((r) => r.chunk_id);
 								}