/** * write_calibration.ts — Tetlock's primary writer. * * UPDATEs public.hypotheses (posterior + confidence_band + reviewed_by + * updated_at) and APPENDS (or replaces) a "## Calibration history" section * to the H-NNNN.md case file. Each calibration includes a timestamp + * old/new posterior + recommended_action + rationale. */ import { readFile, writeFile } from "node:fs/promises"; import path from "node:path"; import { audit } from "../lib/audit"; import { env } from "../lib/env"; import { query, queryOne } from "../lib/pg"; export interface WriteCalibrationArgs { hypothesis_id: string; new_posterior: number; new_confidence_band: "high" | "medium" | "low" | "speculation"; delta: number; rationale: string; recommended_action: "keep" | "downgrade" | "upgrade" | "supersede"; supersede_reason?: string; /** previous posterior captured at call time — used in the case-file row. */ old_posterior: number | null; old_confidence_band: string | null; } export interface WriteCalibrationContext { job_id: string; detective: string; } const SECTION_MARKER = "## Calibration history"; function bandFromPosterior(p: number): "high" | "medium" | "low" | "speculation" { if (p >= 0.90) return "high"; if (p >= 0.60) return "medium"; if (p >= 0.30) return "low"; return "speculation"; } function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): string { const ts = new Date().toISOString(); const rows = [ `### ${ts} — ${args.recommended_action}`, "", `_Calibrated by ${ctx.detective} — job \`${ctx.job_id}\`._`, "", `| field | old | new |`, `|---|---|---|`, `| posterior | ${args.old_posterior ?? "—"} | **${args.new_posterior}** |`, `| band | ${args.old_confidence_band ?? "—"} | **${args.new_confidence_band}** |`, `| delta | — | ${args.delta >= 0 ? "+" : ""}${args.delta.toFixed(3)} |`, "", `**Rationale.** ${args.rationale}`, ]; if (args.recommended_action === "supersede" && args.supersede_reason) { rows.push("", `**Supersede reason.** ${args.supersede_reason}`); } rows.push(""); return rows.join("\n"); } function appendCalibration(existing: string, section: string): string { // Calibration history is APPEND-only (Tetlock can be invoked many times // and each datapoint matters). Find the section, append; create it if // missing. const idx = existing.indexOf(`\n${SECTION_MARKER}`); if (idx === -1) { return existing.trimEnd() + "\n\n" + SECTION_MARKER + "\n\n" + section; } return existing.trimEnd() + "\n" + section; } export async function writeCalibration( body: WriteCalibrationArgs, ctx: WriteCalibrationContext, ): Promise<{ hypothesis_id: string; case_file: string; new_posterior: number; recommended_action: string }> { if (!body.hypothesis_id?.match(/^H-\d{4}$/)) { throw new Error(`bad hypothesis_id: ${body.hypothesis_id}`); } if (!Number.isFinite(body.new_posterior) || body.new_posterior < 0 || body.new_posterior > 1) { throw new Error(`new_posterior out of range: ${body.new_posterior}`); } const expectedBand = bandFromPosterior(body.new_posterior); // Force the band to match the posterior — Tetlock can mis-label. body.new_confidence_band = expectedBand; if (!body.rationale?.trim()) throw new Error("rationale required"); // Soft cap: 1200 chars. Tetlock often writes 600-800 of substantive // reasoning + chunk citations; the prompt asks for ≤ 600 but a 2× slack // beats failing the job on an honest analysis. if (body.rationale.length > 1200) throw new Error(`rationale too long (${body.rationale.length} > 1200)`); const action = body.recommended_action; if (!["keep", "downgrade", "upgrade", "supersede"].includes(action)) { throw new Error(`bad recommended_action: ${action}`); } if (action === "supersede" && !body.supersede_reason?.trim()) { throw new Error("supersede_reason required when action == supersede"); } // Verify hypothesis exists. const h = await queryOne<{ hypothesis_id: string; status: string }>( `SELECT hypothesis_id, status FROM public.hypotheses WHERE hypothesis_id = $1`, [body.hypothesis_id], ); if (!h) throw new Error(`hypothesis not found: ${body.hypothesis_id}`); // UPDATE DB: posterior + band (always), status='superseded' when action=='supersede'. if (action === "supersede") { await query( `UPDATE public.hypotheses SET posterior = $1, confidence_band = $2, status = 'superseded', reviewed_by = $3, updated_at = NOW() WHERE hypothesis_id = $4`, [body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id], ); } else if (action === "keep" && Math.abs(body.delta) < 0.005) { // Pure keep with no movement — only touch updated_at + reviewed_by. await query( `UPDATE public.hypotheses SET reviewed_by = $1, updated_at = NOW() WHERE hypothesis_id = $2`, [ctx.detective, body.hypothesis_id], ); } else { await query( `UPDATE public.hypotheses SET posterior = $1, confidence_band = $2, reviewed_by = $3, updated_at = NOW() WHERE hypothesis_id = $4`, [body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id], ); } // Append calibration row to the case file. const file = path.join(env.CASE_ROOT, "hypotheses", `${body.hypothesis_id}.md`); let existing: string; try { existing = await readFile(file, "utf-8"); } catch (e) { throw new Error(`hypothesis case file missing: ${file} (${(e as Error).message})`); } const section = buildSection(body, ctx); const next = appendCalibration(existing, section); await writeFile(file, next, "utf-8"); await audit({ event: "write_calibration", job_id: ctx.job_id, detective: ctx.detective, hypothesis_id: body.hypothesis_id, new_posterior: body.new_posterior, new_confidence_band: body.new_confidence_band, delta: body.delta, recommended_action: action, file, }); return { hypothesis_id: body.hypothesis_id, case_file: file, new_posterior: body.new_posterior, recommended_action: action, }; }