disclosure-bureau/investigator-runtime/src/tools/write_calibration.ts

/**
 * write_calibration.ts — Tetlock's primary writer.
 *
 * UPDATEs public.hypotheses (posterior + confidence_band + reviewed_by +
 * updated_at) and APPENDS (or replaces) a "## Calibration history" section
 * to the H-NNNN.md case file. Each calibration includes a timestamp +
 * old/new posterior + recommended_action + rationale.
 */
import { readFile, writeFile } from "node:fs/promises";
import path from "node:path";
import { audit } from "../lib/audit";
import { env } from "../lib/env";
import { query, queryOne } from "../lib/pg";

export interface WriteCalibrationArgs {
  hypothesis_id: string;
  new_posterior: number;
  new_confidence_band: "high" | "medium" | "low" | "speculation";
  delta: number;
  rationale: string;
  recommended_action: "keep" | "downgrade" | "upgrade" | "supersede";
  supersede_reason?: string;
  /** previous posterior captured at call time — used in the case-file row. */
  old_posterior: number | null;
  old_confidence_band: string | null;
}

export interface WriteCalibrationContext {
  job_id: string;
  detective: string;
}

const SECTION_MARKER = "## Calibration history";

function bandFromPosterior(p: number): "high" | "medium" | "low" | "speculation" {
  if (p >= 0.90) return "high";
  if (p >= 0.60) return "medium";
  if (p >= 0.30) return "low";
  return "speculation";
}

function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): string {
  const ts = new Date().toISOString();
  const rows = [
    `### ${ts} — ${args.recommended_action}`,
    "",
    `_Calibrated by ${ctx.detective} — job \`${ctx.job_id}\`._`,
    "",
    `| field | old | new |`,
    `|---|---|---|`,
    `| posterior | ${args.old_posterior ?? "—"} | **${args.new_posterior}** |`,
    `| band | ${args.old_confidence_band ?? "—"} | **${args.new_confidence_band}** |`,
    `| delta | — | ${args.delta >= 0 ? "+" : ""}${args.delta.toFixed(3)} |`,
    "",
    `**Rationale.** ${args.rationale}`,
  ];
  if (args.recommended_action === "supersede" && args.supersede_reason) {
    rows.push("", `**Supersede reason.** ${args.supersede_reason}`);
  }
  rows.push("");
  return rows.join("\n");
}

function appendCalibration(existing: string, section: string): string {
  // Calibration history is APPEND-only (Tetlock can be invoked many times
  // and each datapoint matters). Find the section, append; create it if
  // missing.
  const idx = existing.indexOf(`\n${SECTION_MARKER}`);
  if (idx === -1) {
    return existing.trimEnd() + "\n\n" + SECTION_MARKER + "\n\n" + section;
  }
  return existing.trimEnd() + "\n" + section;
}

export async function writeCalibration(
  body: WriteCalibrationArgs,
  ctx: WriteCalibrationContext,
): Promise<{ hypothesis_id: string; case_file: string; new_posterior: number; recommended_action: string }> {
  if (!body.hypothesis_id?.match(/^H-\d{4}$/)) {
    throw new Error(`bad hypothesis_id: ${body.hypothesis_id}`);
  }
  if (!Number.isFinite(body.new_posterior) || body.new_posterior < 0 || body.new_posterior > 1) {
    throw new Error(`new_posterior out of range: ${body.new_posterior}`);
  }
  const expectedBand = bandFromPosterior(body.new_posterior);
  // Force the band to match the posterior — Tetlock can mis-label.
  body.new_confidence_band = expectedBand;
  if (!body.rationale?.trim()) throw new Error("rationale required");
  if (body.rationale.length > 600) throw new Error(`rationale too long`);

  const action = body.recommended_action;
  if (!["keep", "downgrade", "upgrade", "supersede"].includes(action)) {
    throw new Error(`bad recommended_action: ${action}`);
  }
  if (action === "supersede" && !body.supersede_reason?.trim()) {
    throw new Error("supersede_reason required when action == supersede");
  }

  // Verify hypothesis exists.
  const h = await queryOne<{ hypothesis_id: string; status: string }>(
    `SELECT hypothesis_id, status FROM public.hypotheses WHERE hypothesis_id = $1`,
    [body.hypothesis_id],
  );
  if (!h) throw new Error(`hypothesis not found: ${body.hypothesis_id}`);

  // UPDATE DB: posterior + band (always), status='superseded' when action=='supersede'.
  if (action === "supersede") {
    await query(
      `UPDATE public.hypotheses
          SET posterior = $1, confidence_band = $2, status = 'superseded',
              reviewed_by = $3, updated_at = NOW()
        WHERE hypothesis_id = $4`,
      [body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],
    );
  } else if (action === "keep" && Math.abs(body.delta) < 0.005) {
    // Pure keep with no movement — only touch updated_at + reviewed_by.
    await query(
      `UPDATE public.hypotheses
          SET reviewed_by = $1, updated_at = NOW()
        WHERE hypothesis_id = $2`,
      [ctx.detective, body.hypothesis_id],
    );
  } else {
    await query(
      `UPDATE public.hypotheses
          SET posterior = $1, confidence_band = $2,
              reviewed_by = $3, updated_at = NOW()
        WHERE hypothesis_id = $4`,
      [body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],
    );
  }

  // Append calibration row to the case file.
  const file = path.join(env.CASE_ROOT, "hypotheses", `${body.hypothesis_id}.md`);
  let existing: string;
  try {
    existing = await readFile(file, "utf-8");
  } catch (e) {
    throw new Error(`hypothesis case file missing: ${file} (${(e as Error).message})`);
  }
  const section = buildSection(body, ctx);
  const next = appendCalibration(existing, section);
  await writeFile(file, next, "utf-8");

  await audit({
    event: "write_calibration",
    job_id: ctx.job_id,
    detective: ctx.detective,
    hypothesis_id: body.hypothesis_id,
    new_posterior: body.new_posterior,
    new_confidence_band: body.new_confidence_band,
    delta: body.delta,
    recommended_action: action,
    file,
  });

  return {
    hypothesis_id: body.hypothesis_id, case_file: file,
    new_posterior: body.new_posterior,
    recommended_action: action,
  };
}
W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY queue, sharing search.ts hybridSearch and writer-side validators that gate writes against schema + FK. New detectives: Poirot (witness_analysis) - prompts/poirot.md — credibility / access / bias / corroboration / verdict; uses entity_mentions JOIN chunks to pull 12 chunks per person; resolves corroboration_refs chunk_ids defensively (accepts bare cNNNN even when the model emits pNNN/cNNNN). - INSERT into public.witnesses with W-NNNN naming. - Tone: purple (#9b5de5). Taleb (outlier_scan) - prompts/taleb.md — "surprise is relative to a model"; at most 3 outliers; each requires explicit dominant_model + why_surprising + what_it_implies; fan-out into public.gaps with scope.kind="outlier". - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens to corpus if hits < 3). - Tone: yellow (#ffd23f). Tetlock (calibrate_hypothesis) - prompts/tetlock.md — honest Bayesian update; emits new_posterior + Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}. - write_calibration UPDATEs public.hypotheses + APPENDS a "## Calibration history" section to the H-NNNN.md case file (calibration is append-only — each datapoint matters). Posterior band auto-corrected to match Tetlock thresholds. - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with \|Δ\|<0.005 only touches updated_at + reviewed_by. - Tone: teal (#26d4cc). Case-Writer (case_report) - prompts/case-writer.md — Dr. Watson assembles all artefacts (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative. ILIKE filter on topic; doc_id optional scope. - Larger budget cap (≥ $0.50) + longer timeout for prose generation. - Writes case/reports/<slug>.md with frontmatter (topic + counts); no DB table for v0. - New page /c/[slug] renders the report via MarkdownBody + stat chips. - Tone: gold (#e0c080). Hardening across the bureau: - Sentinel parsing now accepts backticked AND prose-trailing forms (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model refuses honestly but the runtime treated it as a parse error (observed live with Poirot+Hoover identifying the DIRECTOR false-positive disambiguation issue in entity_mentions). Chat tool extensions (web/lib/chat/tools.ts): - request_investigation now accepts 7 kinds. Each routes to its detective with appropriate validation (hypothesis_id regex, person_id kebab-case, topic non-empty, doc_id for evidence_chain). - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s, Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks. UI integration: - chat-bubble inline card paints each detective in its tone color. - /jobs/[id] page header swaps name/subtitle/tone per detective; question label adapts ("Topic" / "Hypothesis under attack" / "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis under recalibration" / "Case to assemble"). - job-status-poller renders: case-report link card (gold), outlier cards (yellow), witness cards (purple) — alongside existing hypothesis, evidence, contradiction cards. - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name) + gaps (with scope JSONB). - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders with MarkdownBody, frontmatter parsed for stat chips. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-24 01:11:39 +00:00			`/**`
			`* write_calibration.ts — Tetlock's primary writer.`
			`*`
			`* UPDATEs public.hypotheses (posterior + confidence_band + reviewed_by +`
			`* updated_at) and APPENDS (or replaces) a "## Calibration history" section`
			`* to the H-NNNN.md case file. Each calibration includes a timestamp +`
			`* old/new posterior + recommended_action + rationale.`
			`*/`
			`import { readFile, writeFile } from "node:fs/promises";`
			`import path from "node:path";`
			`import { audit } from "../lib/audit";`
			`import { env } from "../lib/env";`
			`import { query, queryOne } from "../lib/pg";`

			`export interface WriteCalibrationArgs {`
			`hypothesis_id: string;`
			`new_posterior: number;`
			`new_confidence_band: "high" \| "medium" \| "low" \| "speculation";`
			`delta: number;`
			`rationale: string;`
			`recommended_action: "keep" \| "downgrade" \| "upgrade" \| "supersede";`
			`supersede_reason?: string;`
			`/** previous posterior captured at call time — used in the case-file row. */`
			`old_posterior: number \| null;`
			`old_confidence_band: string \| null;`
			`}`

			`export interface WriteCalibrationContext {`
			`job_id: string;`
			`detective: string;`
			`}`

			`const SECTION_MARKER = "## Calibration history";`

			`function bandFromPosterior(p: number): "high" \| "medium" \| "low" \| "speculation" {`
			`if (p >= 0.90) return "high";`
			`if (p >= 0.60) return "medium";`
			`if (p >= 0.30) return "low";`
			`return "speculation";`
			`}`

			`function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): string {`
			`const ts = new Date().toISOString();`
			`const rows = [`
			`### ${ts} — ${args.recommended_action}`,
			`"",`
			`_Calibrated by ${ctx.detective} — job \`${ctx.job_id}\`._`,
			`"",`
			`\| field \| old \| new \|`,
			`\|---\|---\|---\|`,
			`\| posterior \| ${args.old_posterior ?? "—"} \| ${args.new_posterior} \|`,
			`\| band \| ${args.old_confidence_band ?? "—"} \| ${args.new_confidence_band} \|`,
			`\| delta \| — \| ${args.delta >= 0 ? "+" : ""}${args.delta.toFixed(3)} \|`,
			`"",`
			`Rationale. ${args.rationale}`,
			`];`
			`if (args.recommended_action === "supersede" && args.supersede_reason) {`
			rows.push("", `Supersede reason. ${args.supersede_reason}`);
			`}`
			`rows.push("");`
			`return rows.join("\n");`
			`}`

			`function appendCalibration(existing: string, section: string): string {`
			`// Calibration history is APPEND-only (Tetlock can be invoked many times`
			`// and each datapoint matters). Find the section, append; create it if`
			`// missing.`
			const idx = existing.indexOf(`\n${SECTION_MARKER}`);
			`if (idx === -1) {`
			`return existing.trimEnd() + "\n\n" + SECTION_MARKER + "\n\n" + section;`
			`}`
			`return existing.trimEnd() + "\n" + section;`
			`}`

			`export async function writeCalibration(`
			`body: WriteCalibrationArgs,`
			`ctx: WriteCalibrationContext,`
			`): Promise<{ hypothesis_id: string; case_file: string; new_posterior: number; recommended_action: string }> {`
			`if (!body.hypothesis_id?.match(/^H-\d{4}$/)) {`
			throw new Error(`bad hypothesis_id: ${body.hypothesis_id}`);
			`}`
			`if (!Number.isFinite(body.new_posterior) \|\| body.new_posterior < 0 \|\| body.new_posterior > 1) {`
			throw new Error(`new_posterior out of range: ${body.new_posterior}`);
			`}`
			`const expectedBand = bandFromPosterior(body.new_posterior);`
			`// Force the band to match the posterior — Tetlock can mis-label.`
			`body.new_confidence_band = expectedBand;`
			`if (!body.rationale?.trim()) throw new Error("rationale required");`
			if (body.rationale.length > 600) throw new Error(`rationale too long`);

			`const action = body.recommended_action;`
			`if (!["keep", "downgrade", "upgrade", "supersede"].includes(action)) {`
			throw new Error(`bad recommended_action: ${action}`);
			`}`
			`if (action === "supersede" && !body.supersede_reason?.trim()) {`
			`throw new Error("supersede_reason required when action == supersede");`
			`}`

			`// Verify hypothesis exists.`
			`const h = await queryOne<{ hypothesis_id: string; status: string }>(`
			`SELECT hypothesis_id, status FROM public.hypotheses WHERE hypothesis_id = $1`,
			`[body.hypothesis_id],`
			`);`
			if (!h) throw new Error(`hypothesis not found: ${body.hypothesis_id}`);

			`// UPDATE DB: posterior + band (always), status='superseded' when action=='supersede'.`
			`if (action === "supersede") {`
			`await query(`
			`UPDATE public.hypotheses
			`SET posterior = $1, confidence_band = $2, status = 'superseded',`
			`reviewed_by = $3, updated_at = NOW()`
			WHERE hypothesis_id = $4`,
			`[body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],`
			`);`
			`} else if (action === "keep" && Math.abs(body.delta) < 0.005) {`
			`// Pure keep with no movement — only touch updated_at + reviewed_by.`
			`await query(`
			`UPDATE public.hypotheses
			`SET reviewed_by = $1, updated_at = NOW()`
			WHERE hypothesis_id = $2`,
			`[ctx.detective, body.hypothesis_id],`
			`);`
			`} else {`
			`await query(`
			`UPDATE public.hypotheses
			`SET posterior = $1, confidence_band = $2,`
			`reviewed_by = $3, updated_at = NOW()`
			WHERE hypothesis_id = $4`,
			`[body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],`
			`);`
			`}`

			`// Append calibration row to the case file.`
			const file = path.join(env.CASE_ROOT, "hypotheses", `${body.hypothesis_id}.md`);
			`let existing: string;`
			`try {`
			`existing = await readFile(file, "utf-8");`
			`} catch (e) {`
			throw new Error(`hypothesis case file missing: ${file} (${(e as Error).message})`);
			`}`
			`const section = buildSection(body, ctx);`
			`const next = appendCalibration(existing, section);`
			`await writeFile(file, next, "utf-8");`

			`await audit({`
			`event: "write_calibration",`
			`job_id: ctx.job_id,`
			`detective: ctx.detective,`
			`hypothesis_id: body.hypothesis_id,`
			`new_posterior: body.new_posterior,`
			`new_confidence_band: body.new_confidence_band,`
			`delta: body.delta,`
			`recommended_action: action,`
			`file,`
			`});`

			`return {`
			`hypothesis_id: body.hypothesis_id, case_file: file,`
			`new_posterior: body.new_posterior,`
			`recommended_action: action,`
			`};`
			`}`