disclosure-bureau/investigator-runtime/src/orchestrator.ts

/**
 * orchestrator.ts — chief-detective. Decides which detective runs for a job.
 *
 * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
 * registry as we build each detective in W3.5+. Unknown kinds fail the job
 * loudly so we don't quietly drop work.
 */
import { audit } from "./lib/audit";
import { query } from "./lib/pg";
import { runLocard, type LocardTask } from "./detectives/locard";
import { runHolmes, type HolmesTask } from "./detectives/holmes";
import { runDupin, type DupinTask } from "./detectives/dupin";
import { runSchneier, type SchneierTask } from "./detectives/schneier";

export interface InvestigationJob {
  job_id: string;
  kind: string;
  payload: Record<string, unknown>;
  triggered_by: string | null;
}

export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
  await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });

  let outputs: unknown[] = [];
  try {
    switch (job.kind) {
      case "evidence_chain": {
        // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
        // the first 20 substantive chunks of the doc if not provided.
        const docId = String(job.payload.doc_id ?? "");
        if (!docId) throw new Error("evidence_chain requires payload.doc_id");
        const chunkIds = Array.isArray(job.payload.chunks)
          ? (job.payload.chunks as string[])
          : await pickEvidenceCandidates(docId, 5);
        if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
        for (const chunk_id of chunkIds) {
          const task: LocardTask = {
            job_id: job.job_id,
            doc_id: docId,
            chunk_id,
            claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
          };
          try {
            const r = await runLocard(task);
            outputs.push({ chunk_id, ...r });
          } catch (e) {
            outputs.push({ chunk_id, error: (e as Error).message });
          }
        }
        break;
      }
      case "hypothesis_tournament": {
        // Payload: { question, doc_id?, lang?, context_chunks? }
        const question = String(job.payload.question ?? "").trim();
        if (!question) throw new Error("hypothesis_tournament requires payload.question");
        const task: HolmesTask = {
          job_id: job.job_id,
          question,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runHolmes(task);
        if ("skipped" in r) {
          outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
        } else {
          for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
        }
        break;
      }
      case "red_team_review": {
        // Payload: { hypothesis_id }
        const hyp = String(job.payload.hypothesis_id ?? "").trim();
        if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
        const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
        const r = await runSchneier(task);
        if ("skipped" in r) {
          outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "red_team_review", ...r });
        }
        break;
      }
      case "contradiction_scan": {
        // Payload: { topic, doc_id?, lang?, context_chunks? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("contradiction_scan requires payload.topic");
        const task: DupinTask = {
          job_id: job.job_id,
          topic,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runDupin(task);
        if ("skipped" in r) {
          outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
        } else {
          for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
        }
        break;
      }

      default:
        throw new Error(`unknown_kind: ${job.kind}`);
    }

    // Status reflects reality: if every per-item attempt errored we mark
    // the job failed (so the UI doesn't say "complete" when nothing useful
    // was produced); if at least one succeeded we keep `complete` with the
    // mixed outputs payload.
    const allErrors = outputs.length > 0 && outputs.every(
      (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
    );
    const summary = (() => {
      if (!allErrors) return null;
      // First few error messages, surfaced to the user via the jobs table.
      return outputs
        .map((o) => (o as { error?: string }).error)
        .filter((e): e is string => Boolean(e))
        .slice(0, 3)
        .join(" | ");
    })();
    await query(
      `UPDATE public.investigation_jobs
          SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
        WHERE job_id = $4`,
      [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
    );
    await audit({
      event: allErrors ? "job_failed_all_items" : "job_completed",
      job_id: job.job_id,
      kind: job.kind,
      n_outputs: outputs.length,
      ...(summary ? { summary } : {}),
    });
  } catch (e) {
    const err = (e as Error).message;
    await query(
      `UPDATE public.investigation_jobs
          SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
        WHERE job_id = $3`,
      [err, JSON.stringify(outputs), job.job_id],
    );
    await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
  }
}

/**
 * Pick a small set of chunks that are likely to yield evidence — body
 * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
 * Sonnet's anomaly flag first so we extract the most interesting first.
 */
async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
  const rows = await query<{ chunk_id: string }>(
    `SELECT chunk_id
       FROM public.chunks
      WHERE doc_id = $1
        AND is_searchable
        AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
      ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
      LIMIT $2`,
    [doc_id, limit],
  );
  return rows.map((r) => r.chunk_id);
}
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
+								/**
 								 * orchestrator.ts — chief-detective. Decides which detective runs for a job.
 								 *
 								 * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
 								 * registry as we build each detective in W3.5+. Unknown kinds fail the job
 								 * loudly so we don't quietly drop work.
 								 */
 								import { audit } from "./lib/audit";
 								import { query } from "./lib/pg";
 								import { runLocard, type LocardTask } from "./detectives/locard";
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								import { runHolmes, type HolmesTask } from "./detectives/holmes";
-												W3.7: Dupin contradiction-scan detective + UI integration

Adds the third AI detective in the Investigation Bureau runtime: C. Auguste
Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks
that cannot both be true under any ordinary reading.

Runtime:
  - prompts/dupin.md — discipline (no contradiction without ≥2 distinct
    chunk_ids; reject same-vocabulary near-misses; FEW high-confidence
    over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent)
  - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than
    Holmes because contradictions emerge from comparing dispersed
    claims), strict JSON-array parsing, AT MOST 3 contradictions per call
  - src/tools/write_contradiction.ts — validates topic + ≥2 positions
    drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup
    (rejects positions citing unknown chunks), INSERTs into
    public.contradictions + writes case/contradictions/R-NNNN.md
  - orchestrator: new `contradiction_scan` kind dispatching to runDupin;
    payload { topic, doc_id?, lang?, context_chunks? }

Chat + UI:
  - request_investigation gains kind=contradiction_scan + topic arg;
    triggered detective auto-resolves to dupin
  - chat-bubble inline card renders dupin in orange (#ff8a4d) to
    distinguish from holmes (cyan) and locard (green)
  - /jobs/[id] page swaps title + subtitle + tone per detective;
    "Question" label becomes "Topic" for contradiction_scan
  - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces
    contradiction_ids
  - job-status-poller renders ContradictionCard: topic + N positions
    (verbatim statements quoted, stance label optional, link to source
    chunk) + optional notes panel, with resolution_status badge
    (open/resolved/irreconcilable)

R-NNNN shares the contradiction_id_seq slot with relation per
CLAUDE.md naming — same conceptual class (a connection between two
pieces of evidence in tension).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:34:04 +00:00
+								import { runDupin, type DupinTask } from "./detectives/dupin";
-												W3.8: Schneier red-team detective + /h/[hypothesisId] dossier page

Adds the fourth AI detective in the Investigation Bureau runtime: Bruce
Schneier, who attacks an existing hypothesis as a red-team operator.

Runtime:
  - prompts/schneier.md — discipline (don't disprove, just attack;
    structured output with hidden_assumptions, failure_modes,
    alternative_explanations, recommended_tests, verdict_one_sentence;
    severity ∈ {low, medium, high}; emit INSUFFICIENT_HYPOTHESIS when
    the input is too thin)
  - src/detectives/schneier.ts — reads the hypothesis row + evidence
    chain (joined via evidence_refs FK), feeds Claude with the
    arguments + verbatim quotes, parses strict JSON object
  - src/tools/write_red_team_review.ts — UPDATEs hypotheses.reviewed_by
    + updated_at; APPENDS (or replaces if re-reviewed) a structured
    "## Red-team review (Schneier · X severity)" section to
    case/hypotheses/H-NNNN.md. Caps each list at 5 entries × 240 chars,
    validates verdict ≤ 280 chars.
  - orchestrator: new `red_team_review` kind dispatching to runSchneier

Chat + UI:
  - request_investigation gains kind=red_team_review + hypothesis_id arg
    (validated against H-NNNN regex); detective auto-resolves to schneier
  - chat-bubble inline card paints Schneier in red (#ff3344)
  - /jobs/[id] page swaps title/subtitle/tone per detective; the
    "Question" label becomes "Hypothesis under attack" for red_team_review

New /h/[hypothesisId] page (hypothesis dossier):
  - Server-rendered from public.hypotheses + public.evidence (joined
    via evidence_refs FK + chunk lookup)
  - Header: ID + creator + reviewer (highlighted when Schneier has
    visited), position as headline, question subtitle, Tetlock band
  - Prior + posterior bars with Δ-delta indicator
  - Argument grid: argument_for (green) vs argument_against (pink)
    side-by-side with [[wiki-link]] auto-linking to source chunks
  - Evidence chain: each E-NNNN with Grade A/B/C badge, verbatim
    blockquote, link to source page
  - Red-team review panel: parses the markdown section in the case
    file (severity badge, verdict, 4 bullet panels for
    hidden_assumptions / failure_modes / alternative_explanations /
    recommended_tests). Empty state when not yet reviewed.

RedTeamRequestButton client component + POST /api/h/[id]/red-team —
authenticated user can trigger Schneier in one click; UI swaps to
"acompanhar" link to /jobs/[id] once queued.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:48:12 +00:00
+								import { runSchneier, type SchneierTask } from "./detectives/schneier";
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
 								export interface InvestigationJob {
 								  job_id: string;
 								  kind: string;
 								  payload: Record<string, unknown>;
 								  triggered_by: string | null;
 								}
 								export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
 								  await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });
 								  let outputs: unknown[] = [];
 								  try {
 								    switch (job.kind) {
 								      case "evidence_chain": {
 								        // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
 								        // the first 20 substantive chunks of the doc if not provided.
 								        const docId = String(job.payload.doc_id ?? "");
 								        if (!docId) throw new Error("evidence_chain requires payload.doc_id");
 								        const chunkIds = Array.isArray(job.payload.chunks)
 								          ? (job.payload.chunks as string[])
 								          : await pickEvidenceCandidates(docId, 5);
 								        if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
 								        for (const chunk_id of chunkIds) {
 								          const task: LocardTask = {
 								            job_id: job.job_id,
 								            doc_id: docId,
 								            chunk_id,
 								            claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
 								          };
 								          try {
 								            const r = await runLocard(task);
 								            outputs.push({ chunk_id, ...r });
 								          } catch (e) {
 								            outputs.push({ chunk_id, error: (e as Error).message });
 								          }
 								        }
 								        break;
 								      }
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
+								      case "hypothesis_tournament": {
 								        // Payload: { question, doc_id?, lang?, context_chunks? }
 								        const question = String(job.payload.question ?? "").trim();
 								        if (!question) throw new Error("hypothesis_tournament requires payload.question");
 								        const task: HolmesTask = {
 								          job_id: job.job_id,
 								          question,
 								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runHolmes(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
 								        } else {
 								          for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
 								        }
 								        break;
 								      }
-												W3.8: Schneier red-team detective + /h/[hypothesisId] dossier page

Adds the fourth AI detective in the Investigation Bureau runtime: Bruce
Schneier, who attacks an existing hypothesis as a red-team operator.

Runtime:
  - prompts/schneier.md — discipline (don't disprove, just attack;
    structured output with hidden_assumptions, failure_modes,
    alternative_explanations, recommended_tests, verdict_one_sentence;
    severity ∈ {low, medium, high}; emit INSUFFICIENT_HYPOTHESIS when
    the input is too thin)
  - src/detectives/schneier.ts — reads the hypothesis row + evidence
    chain (joined via evidence_refs FK), feeds Claude with the
    arguments + verbatim quotes, parses strict JSON object
  - src/tools/write_red_team_review.ts — UPDATEs hypotheses.reviewed_by
    + updated_at; APPENDS (or replaces if re-reviewed) a structured
    "## Red-team review (Schneier · X severity)" section to
    case/hypotheses/H-NNNN.md. Caps each list at 5 entries × 240 chars,
    validates verdict ≤ 280 chars.
  - orchestrator: new `red_team_review` kind dispatching to runSchneier

Chat + UI:
  - request_investigation gains kind=red_team_review + hypothesis_id arg
    (validated against H-NNNN regex); detective auto-resolves to schneier
  - chat-bubble inline card paints Schneier in red (#ff3344)
  - /jobs/[id] page swaps title/subtitle/tone per detective; the
    "Question" label becomes "Hypothesis under attack" for red_team_review

New /h/[hypothesisId] page (hypothesis dossier):
  - Server-rendered from public.hypotheses + public.evidence (joined
    via evidence_refs FK + chunk lookup)
  - Header: ID + creator + reviewer (highlighted when Schneier has
    visited), position as headline, question subtitle, Tetlock band
  - Prior + posterior bars with Δ-delta indicator
  - Argument grid: argument_for (green) vs argument_against (pink)
    side-by-side with [[wiki-link]] auto-linking to source chunks
  - Evidence chain: each E-NNNN with Grade A/B/C badge, verbatim
    blockquote, link to source page
  - Red-team review panel: parses the markdown section in the case
    file (severity badge, verdict, 4 bullet panels for
    hidden_assumptions / failure_modes / alternative_explanations /
    recommended_tests). Empty state when not yet reviewed.

RedTeamRequestButton client component + POST /api/h/[id]/red-team —
authenticated user can trigger Schneier in one click; UI swaps to
"acompanhar" link to /jobs/[id] once queued.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:48:12 +00:00
+								      case "red_team_review": {
 								        // Payload: { hypothesis_id }
 								        const hyp = String(job.payload.hypothesis_id ?? "").trim();
 								        if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
 								        const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
 								        const r = await runSchneier(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
 								        } else {
 								          outputs.push({ kind: "red_team_review", ...r });
 								        }
 								        break;
 								      }
-												W3.7: Dupin contradiction-scan detective + UI integration

Adds the third AI detective in the Investigation Bureau runtime: C. Auguste
Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks
that cannot both be true under any ordinary reading.

Runtime:
  - prompts/dupin.md — discipline (no contradiction without ≥2 distinct
    chunk_ids; reject same-vocabulary near-misses; FEW high-confidence
    over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent)
  - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than
    Holmes because contradictions emerge from comparing dispersed
    claims), strict JSON-array parsing, AT MOST 3 contradictions per call
  - src/tools/write_contradiction.ts — validates topic + ≥2 positions
    drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup
    (rejects positions citing unknown chunks), INSERTs into
    public.contradictions + writes case/contradictions/R-NNNN.md
  - orchestrator: new `contradiction_scan` kind dispatching to runDupin;
    payload { topic, doc_id?, lang?, context_chunks? }

Chat + UI:
  - request_investigation gains kind=contradiction_scan + topic arg;
    triggered detective auto-resolves to dupin
  - chat-bubble inline card renders dupin in orange (#ff8a4d) to
    distinguish from holmes (cyan) and locard (green)
  - /jobs/[id] page swaps title + subtitle + tone per detective;
    "Question" label becomes "Topic" for contradiction_scan
  - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces
    contradiction_ids
  - job-status-poller renders ContradictionCard: topic + N positions
    (verbatim statements quoted, stance label optional, link to source
    chunk) + optional notes panel, with resolution_status badge
    (open/resolved/irreconcilable)

R-NNNN shares the contradiction_id_seq slot with relation per
CLAUDE.md naming — same conceptual class (a connection between two
pieces of evidence in tension).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:34:04 +00:00
+								      case "contradiction_scan": {
 								        // Payload: { topic, doc_id?, lang?, context_chunks? }
 								        const topic = String(job.payload.topic ?? "").trim();
 								        if (!topic) throw new Error("contradiction_scan requires payload.topic");
 								        const task: DupinTask = {
 								          job_id: job.job_id,
 								          topic,
 								          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
 								          lang: job.payload.lang === "en" ? "en" : "pt",
 								          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
 								        };
 								        const r = await runDupin(task);
 								        if ("skipped" in r) {
 								          outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
 								        } else {
 								          for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
 								        }
 								        break;
 								      }
-												W3.5: Holmes hypothesis tournament detective

Adds the second AI detective in the Investigation Bureau runtime: Sherlock
Holmes, who builds 2-3 rival hypotheses with calibrated priors + posteriors
against a corpus shortlist.

Pipeline:
  1. hybridSearch() grounds Holmes with 8-15 chunks via the same
     hybrid_search_chunks RPC the web uses (BM25 + dense + RRF). Default
     max_dense_dist=0.55 (runtime favors recall over precision; web's
     /api/search/hybrid stays at 0.40 for chat).
  2. claude-sonnet-4-6 emits a strict JSON array with position +
     argument_for + argument_against + prior + posterior + confidence_band
     + evidence_refs. Citations use [[doc-id/pNNN#cNNNN]] wiki-links.
  3. writeHypothesis() validates posterior ∈ [0,1], auto-corrects the
     Tetlock band from the posterior (high ≥0.90, medium 0.60-0.89,
     low 0.30-0.59, speculation <0.30), checks evidence_refs FK against
     public.evidence, INSERTs into public.hypotheses + writes
     case/hypotheses/H-NNNN.md.

Discipline guarantees (prompts/holmes.md):
  - posteriors across rivals sum to ≈1.0
  - no claim without chunk citation
  - prefer lower band when ambiguous (anti-inflation)
  - declarative one-sentence position, no hedging
  - emit `NO_HYPOTHESES` when corpus is silent (refuses to fabricate)

Smoke test (Sandia green fireballs 1948-49):
  - H-0001 prior 0.5 → posterior 0.2 (speculation): natural meteoric
  - H-0002 prior 0.3 → posterior 0.4 (low): classified weapons / tests
  - H-0003 prior 0.2 → posterior 0.4 (low): genuinely unidentified
  Bayesian update visible: "natural meteoric" prior dropped 60%; both
  rivals climbed. 4 unique chunk citations across the 3 hypotheses.

orchestrator dispatches `hypothesis_tournament` kind via runHolmes;
job marked `failed` if all rivals error, `complete` otherwise.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:19:43 +00:00
-												W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard

Migrations:
- 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence,
  hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id
  sequences, pg_notify trigger on investigation_jobs, RLS read-only public,
  investigator role with least-privilege grants (no service_role).
- 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE
  policies bound to investigator + service_role + postgres (RLS with only a
  SELECT policy was silently blocking the worker's claim UPDATE).

investigator-runtime/ (new Bun + TS container):
- src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool,
  healthcheck file, graceful SIGTERM shutdown.
- src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard).
  Marks job failed when all per-item outputs error; surfaces first errors.
- src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool +
  dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc),
  claude -p subprocess with quota detection (api_error_status=429).
- src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps),
  resolve chunk_pk via FK, verify verbatim_excerpt actually appears in
  chunk content, INSERT + render case/evidence/E-NNNN.md + audit.
- src/detectives/locard.ts: load chunk → call Claude with locard.md system
  prompt → parse strict JSON → call writeEvidence locally.
- Dockerfile installs `claude` CLI (OAuth) at build time.

Compose:
- new `investigator` service builds from investigator-runtime/, connects
  with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap.

Web:
- /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1).
  POST creates a job, GET polls status. For W3.6 it becomes the chat tool.

End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch →
claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota
exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed
isQuota error, marks job failed with surfaced reason. Architecture proven;
quota reset enables real evidence creation.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 22:49:33 +00:00
+								      default:
 								        throw new Error(`unknown_kind: ${job.kind}`);
 								    }
 								    // Status reflects reality: if every per-item attempt errored we mark
 								    // the job failed (so the UI doesn't say "complete" when nothing useful
 								    // was produced); if at least one succeeded we keep `complete` with the
 								    // mixed outputs payload.
 								    const allErrors = outputs.length > 0 && outputs.every(
 								      (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
 								    );
 								    const summary = (() => {
 								      if (!allErrors) return null;
 								      // First few error messages, surfaced to the user via the jobs table.
 								      return outputs
 								        .map((o) => (o as { error?: string }).error)
 								        .filter((e): e is string => Boolean(e))
 								        .slice(0, 3)
 								        .join(" | ");
 								    })();
 								    await query(
 								      `UPDATE public.investigation_jobs
 								          SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
 								        WHERE job_id = $4`,
 								      [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
 								    );
 								    await audit({
 								      event: allErrors ? "job_failed_all_items" : "job_completed",
 								      job_id: job.job_id,
 								      kind: job.kind,
 								      n_outputs: outputs.length,
 								      ...(summary ? { summary } : {}),
 								    });
 								  } catch (e) {
 								    const err = (e as Error).message;
 								    await query(
 								      `UPDATE public.investigation_jobs
 								          SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
 								        WHERE job_id = $3`,
 								      [err, JSON.stringify(outputs), job.job_id],
 								    );
 								    await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
 								  }
 								}
 								/**
 								 * Pick a small set of chunks that are likely to yield evidence — body
 								 * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
 								 * Sonnet's anomaly flag first so we extract the most interesting first.
 								 */
 								async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
 								  const rows = await query<{ chunk_id: string }>(
 								    `SELECT chunk_id
 								       FROM public.chunks
 								      WHERE doc_id = $1
 								        AND is_searchable
 								        AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
 								      ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
 								      LIMIT $2`,
 								    [doc_id, limit],
 								  );
 								  return rows.map((r) => r.chunk_id);
 								}