disclosure-bureau/investigator-runtime/src/tools/write_calibration.ts

163 lines
5.8 KiB
TypeScript
Raw Normal View History

W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY queue, sharing search.ts hybridSearch and writer-side validators that gate writes against schema + FK. New detectives: Poirot (witness_analysis) - prompts/poirot.md — credibility / access / bias / corroboration / verdict; uses entity_mentions JOIN chunks to pull 12 chunks per person; resolves corroboration_refs chunk_ids defensively (accepts bare cNNNN even when the model emits pNNN/cNNNN). - INSERT into public.witnesses with W-NNNN naming. - Tone: purple (#9b5de5). Taleb (outlier_scan) - prompts/taleb.md — "surprise is relative to a model"; at most 3 outliers; each requires explicit dominant_model + why_surprising + what_it_implies; fan-out into public.gaps with scope.kind="outlier". - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens to corpus if hits < 3). - Tone: yellow (#ffd23f). Tetlock (calibrate_hypothesis) - prompts/tetlock.md — honest Bayesian update; emits new_posterior + Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}. - write_calibration UPDATEs public.hypotheses + APPENDS a "## Calibration history" section to the H-NNNN.md case file (calibration is append-only — each datapoint matters). Posterior band auto-corrected to match Tetlock thresholds. - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with |Δ|<0.005 only touches updated_at + reviewed_by. - Tone: teal (#26d4cc). Case-Writer (case_report) - prompts/case-writer.md — Dr. Watson assembles all artefacts (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative. ILIKE filter on topic; doc_id optional scope. - Larger budget cap (≥ $0.50) + longer timeout for prose generation. - Writes case/reports/<slug>.md with frontmatter (topic + counts); no DB table for v0. - New page /c/[slug] renders the report via MarkdownBody + stat chips. - Tone: gold (#e0c080). Hardening across the bureau: - Sentinel parsing now accepts backticked AND prose-trailing forms (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model refuses honestly but the runtime treated it as a parse error (observed live with Poirot+Hoover identifying the DIRECTOR false-positive disambiguation issue in entity_mentions). Chat tool extensions (web/lib/chat/tools.ts): - request_investigation now accepts 7 kinds. Each routes to its detective with appropriate validation (hypothesis_id regex, person_id kebab-case, topic non-empty, doc_id for evidence_chain). - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s, Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks. UI integration: - chat-bubble inline card paints each detective in its tone color. - /jobs/[id] page header swaps name/subtitle/tone per detective; question label adapts ("Topic" / "Hypothesis under attack" / "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis under recalibration" / "Case to assemble"). - job-status-poller renders: case-report link card (gold), outlier cards (yellow), witness cards (purple) — alongside existing hypothesis, evidence, contradiction cards. - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name) + gaps (with scope JSONB). - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders with MarkdownBody, frontmatter parsed for stat chips. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 01:11:39 +00:00
/**
* write_calibration.ts Tetlock's primary writer.
*
* UPDATEs public.hypotheses (posterior + confidence_band + reviewed_by +
* updated_at) and APPENDS (or replaces) a "## Calibration history" section
* to the H-NNNN.md case file. Each calibration includes a timestamp +
* old/new posterior + recommended_action + rationale.
*/
import { readFile, writeFile } from "node:fs/promises";
import path from "node:path";
import { audit } from "../lib/audit";
import { env } from "../lib/env";
import { query, queryOne } from "../lib/pg";
export interface WriteCalibrationArgs {
hypothesis_id: string;
new_posterior: number;
new_confidence_band: "high" | "medium" | "low" | "speculation";
delta: number;
rationale: string;
recommended_action: "keep" | "downgrade" | "upgrade" | "supersede";
supersede_reason?: string;
/** previous posterior captured at call time — used in the case-file row. */
old_posterior: number | null;
old_confidence_band: string | null;
}
export interface WriteCalibrationContext {
job_id: string;
detective: string;
}
const SECTION_MARKER = "## Calibration history";
function bandFromPosterior(p: number): "high" | "medium" | "low" | "speculation" {
if (p >= 0.90) return "high";
if (p >= 0.60) return "medium";
if (p >= 0.30) return "low";
return "speculation";
}
function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): string {
const ts = new Date().toISOString();
const rows = [
`### ${ts}${args.recommended_action}`,
"",
`_Calibrated by ${ctx.detective} — job \`${ctx.job_id}\`._`,
"",
`| field | old | new |`,
`|---|---|---|`,
`| posterior | ${args.old_posterior ?? "—"} | **${args.new_posterior}** |`,
`| band | ${args.old_confidence_band ?? "—"} | **${args.new_confidence_band}** |`,
`| delta | — | ${args.delta >= 0 ? "+" : ""}${args.delta.toFixed(3)} |`,
"",
`**Rationale.** ${args.rationale}`,
];
if (args.recommended_action === "supersede" && args.supersede_reason) {
rows.push("", `**Supersede reason.** ${args.supersede_reason}`);
}
rows.push("");
return rows.join("\n");
}
function appendCalibration(existing: string, section: string): string {
// Calibration history is APPEND-only (Tetlock can be invoked many times
// and each datapoint matters). Find the section, append; create it if
// missing.
const idx = existing.indexOf(`\n${SECTION_MARKER}`);
if (idx === -1) {
return existing.trimEnd() + "\n\n" + SECTION_MARKER + "\n\n" + section;
}
return existing.trimEnd() + "\n" + section;
}
export async function writeCalibration(
body: WriteCalibrationArgs,
ctx: WriteCalibrationContext,
): Promise<{ hypothesis_id: string; case_file: string; new_posterior: number; recommended_action: string }> {
if (!body.hypothesis_id?.match(/^H-\d{4}$/)) {
throw new Error(`bad hypothesis_id: ${body.hypothesis_id}`);
}
if (!Number.isFinite(body.new_posterior) || body.new_posterior < 0 || body.new_posterior > 1) {
throw new Error(`new_posterior out of range: ${body.new_posterior}`);
}
const expectedBand = bandFromPosterior(body.new_posterior);
// Force the band to match the posterior — Tetlock can mis-label.
body.new_confidence_band = expectedBand;
if (!body.rationale?.trim()) throw new Error("rationale required");
if (body.rationale.length > 600) throw new Error(`rationale too long`);
const action = body.recommended_action;
if (!["keep", "downgrade", "upgrade", "supersede"].includes(action)) {
throw new Error(`bad recommended_action: ${action}`);
}
if (action === "supersede" && !body.supersede_reason?.trim()) {
throw new Error("supersede_reason required when action == supersede");
}
// Verify hypothesis exists.
const h = await queryOne<{ hypothesis_id: string; status: string }>(
`SELECT hypothesis_id, status FROM public.hypotheses WHERE hypothesis_id = $1`,
[body.hypothesis_id],
);
if (!h) throw new Error(`hypothesis not found: ${body.hypothesis_id}`);
// UPDATE DB: posterior + band (always), status='superseded' when action=='supersede'.
if (action === "supersede") {
await query(
`UPDATE public.hypotheses
SET posterior = $1, confidence_band = $2, status = 'superseded',
reviewed_by = $3, updated_at = NOW()
WHERE hypothesis_id = $4`,
[body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],
);
} else if (action === "keep" && Math.abs(body.delta) < 0.005) {
// Pure keep with no movement — only touch updated_at + reviewed_by.
await query(
`UPDATE public.hypotheses
SET reviewed_by = $1, updated_at = NOW()
WHERE hypothesis_id = $2`,
[ctx.detective, body.hypothesis_id],
);
} else {
await query(
`UPDATE public.hypotheses
SET posterior = $1, confidence_band = $2,
reviewed_by = $3, updated_at = NOW()
WHERE hypothesis_id = $4`,
[body.new_posterior, body.new_confidence_band, ctx.detective, body.hypothesis_id],
);
}
// Append calibration row to the case file.
const file = path.join(env.CASE_ROOT, "hypotheses", `${body.hypothesis_id}.md`);
let existing: string;
try {
existing = await readFile(file, "utf-8");
} catch (e) {
throw new Error(`hypothesis case file missing: ${file} (${(e as Error).message})`);
}
const section = buildSection(body, ctx);
const next = appendCalibration(existing, section);
await writeFile(file, next, "utf-8");
await audit({
event: "write_calibration",
job_id: ctx.job_id,
detective: ctx.detective,
hypothesis_id: body.hypothesis_id,
new_posterior: body.new_posterior,
new_confidence_band: body.new_confidence_band,
delta: body.delta,
recommended_action: action,
file,
});
return {
hypothesis_id: body.hypothesis_id, case_file: file,
new_posterior: body.new_posterior,
recommended_action: action,
};
}