disclosure-bureau/investigator-runtime/src/tools/write_witness_analysis.ts

/**
 * write_witness_analysis.ts — Poirot's primary writer.
 *
 * INSERTs a row into public.witnesses (FK to entities.entity_pk for the
 * person) and writes case/witnesses/W-NNNN.md.
 *
 * Validates:
 *  - person_entity_pk exists in public.entities and is of class 'person'
 *  - credibility ∈ {high, medium, low, speculation}
 *  - access_to_event + bias_notes are present (non-empty)
 *  - corroboration_refs[].chunk_id resolves to a chunk_pk in public.chunks
 *  - verdict ≤ 280 chars
 */
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { audit } from "../lib/audit";
import { env } from "../lib/env";
import { allocate } from "../lib/ids";
import { query, queryOne } from "../lib/pg";

export interface CorroborationRef {
  /** chunk_id slug (e.g. "c0042"). The writer resolves chunk_pk for storage. */
  chunk_id: string;
  /** Optional doc_id when the chunk_id alone might be ambiguous across docs. */
  doc_id?: string;
  /** true=supports, false=refutes. */
  supports: boolean;
}

export interface WriteWitnessAnalysisArgs {
  person_entity_pk: number;
  credibility: "high" | "medium" | "low" | "speculation";
  access_to_event: string;
  bias_notes: string;
  corroboration_refs: CorroborationRef[];
  verdict: string;
}

export interface WriteWitnessAnalysisContext {
  job_id: string;
  detective: string;
}

function normalizeChunkId(raw: string): string {
  const m = raw.match(/c\d{4,}$/);
  return m ? m[0] : raw;
}

interface ResolvedRef {
  chunk_pk: number;
  doc_id: string;
  chunk_id: string;
  page: number;
  supports: boolean;
}

async function resolveRef(ref: CorroborationRef, fallbackDocId?: string): Promise<ResolvedRef | null> {
  const cid = normalizeChunkId(ref.chunk_id);
  if (!cid) return null;
  // If doc_id provided, scope; else allow any doc.
  const docHint = ref.doc_id?.trim() || fallbackDocId?.trim() || null;
  const row = docHint
    ? await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(
        `SELECT chunk_pk, page, doc_id FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`,
        [docHint, cid],
      )
    : await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(
        `SELECT chunk_pk, page, doc_id FROM public.chunks WHERE chunk_id = $1 LIMIT 1`,
        [cid],
      );
  if (!row) return null;
  return { chunk_pk: row.chunk_pk, doc_id: row.doc_id, chunk_id: cid, page: row.page, supports: ref.supports };
}

function renderMd(
  id: string,
  canonical_name: string,
  body: WriteWitnessAnalysisArgs,
  refs: ResolvedRef[],
  ctx: WriteWitnessAnalysisContext,
): string {
  const refBlocks = refs.length === 0
    ? "_(no corroboration cited)_"
    : refs.map((r) => {
        const pageStr = String(r.page).padStart(3, "0");
        return `- [[${r.doc_id}/p${pageStr}#${r.chunk_id}]] (${r.supports ? "supports" : "refutes"})`;
      }).join("\n");

  const fm = [
    "---",
    `schema_version: "0.1.0"`,
    `type: witness_analysis`,
    `witness_id: ${id}`,
    `subject: ${JSON.stringify(canonical_name)}`,
    `credibility: ${body.credibility}`,
    `created_by: ${ctx.detective}`,
    `job_id: ${ctx.job_id}`,
    `created_at: ${new Date().toISOString()}`,
    "---",
  ].join("\n");

  return [
    fm,
    "",
    `# Witness analysis ${id} — ${canonical_name}`,
    "",
    `**Credibility.** ${body.credibility}`,
    "",
    `**Verdict.** ${body.verdict}`,
    "",
    "## Access to event",
    "",
    body.access_to_event,
    "",
    "## Bias notes",
    "",
    body.bias_notes,
    "",
    "## Corroboration chain",
    "",
    refBlocks,
    "",
  ].join("\n");
}

export async function writeWitnessAnalysis(
  body: WriteWitnessAnalysisArgs,
  ctx: WriteWitnessAnalysisContext,
  opts?: { fallback_doc_id?: string },
): Promise<{ witness_id: string; case_file: string; credibility: string; person_entity_pk: number }> {
  if (!Number.isFinite(body.person_entity_pk)) throw new Error("person_entity_pk required");
  const validBand = ["high", "medium", "low", "speculation"].includes(body.credibility);
  if (!validBand) throw new Error(`bad credibility: ${body.credibility}`);
  if (!body.access_to_event?.trim()) throw new Error("access_to_event required");
  if (!body.bias_notes?.trim()) throw new Error("bias_notes required");
  if (!body.verdict?.trim()) throw new Error("verdict required");
  if (body.verdict.length > 280) throw new Error(`verdict too long (${body.verdict.length} > 280)`);
  if (body.access_to_event.length > 800) throw new Error(`access_to_event too long (${body.access_to_event.length} > 800)`);
  if (body.bias_notes.length > 800) throw new Error(`bias_notes too long (${body.bias_notes.length} > 800)`);

  // Verify entity exists and is a person.
  const ent = await queryOne<{ canonical_name: string; entity_class: string }>(
    `SELECT canonical_name, entity_class FROM public.entities WHERE entity_pk = $1`,
    [body.person_entity_pk],
  );
  if (!ent) throw new Error(`entity not found: pk=${body.person_entity_pk}`);
  if (ent.entity_class !== "person") {
    throw new Error(`entity is not a person: ${ent.entity_class}`);
  }

  // Resolve corroboration refs. Drop unresolvable ones (don't fail the whole call).
  const refs: ResolvedRef[] = [];
  for (const r of (body.corroboration_refs ?? []).slice(0, 8)) {
    if (!r?.chunk_id) continue;
    const resolved = await resolveRef(r, opts?.fallback_doc_id);
    if (resolved) refs.push(resolved);
  }

  const witness_id = await allocate.witnessId();
  await query(
    `INSERT INTO public.witnesses
       (witness_id, person_entity_pk, credibility, access_to_event,
        bias_notes, corroboration_refs, verdict, created_by)
     VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, $8)`,
    [
      witness_id, body.person_entity_pk, body.credibility,
      body.access_to_event, body.bias_notes,
      JSON.stringify(refs.map((r) => ({
        chunk_pk: r.chunk_pk, doc_id: r.doc_id, chunk_id: r.chunk_id,
        page: r.page, supports: r.supports,
      }))),
      body.verdict, ctx.detective,
    ],
  );

  const dir = path.join(env.CASE_ROOT, "witnesses");
  await mkdir(dir, { recursive: true });
  const file = path.join(dir, `${witness_id}.md`);
  await writeFile(file, renderMd(witness_id, ent.canonical_name, body, refs, ctx), "utf-8");

  await audit({
    event: "write_witness_analysis",
    job_id: ctx.job_id,
    detective: ctx.detective,
    witness_id,
    person_entity_pk: body.person_entity_pk,
    canonical_name: ent.canonical_name,
    credibility: body.credibility,
    n_corroboration: refs.length,
    file,
  });

  return {
    witness_id, case_file: file,
    credibility: body.credibility,
    person_entity_pk: body.person_entity_pk,
  };
}
W3.8: Investigation Bureau complete — Poirot, Taleb, Tetlock, Case-Writer Brings the bureau from 4 → 8 detectives. All eight run as Bun + claude-CLI subprocesses against the same Supabase + investigation_jobs LISTEN/NOTIFY queue, sharing search.ts hybridSearch and writer-side validators that gate writes against schema + FK. New detectives: Poirot (witness_analysis) - prompts/poirot.md — credibility / access / bias / corroboration / verdict; uses entity_mentions JOIN chunks to pull 12 chunks per person; resolves corroboration_refs chunk_ids defensively (accepts bare cNNNN even when the model emits pNNN/cNNNN). - INSERT into public.witnesses with W-NNNN naming. - Tone: purple (#9b5de5). Taleb (outlier_scan) - prompts/taleb.md — "surprise is relative to a model"; at most 3 outliers; each requires explicit dominant_model + why_surprising + what_it_implies; fan-out into public.gaps with scope.kind="outlier". - Same unscoped-fallback as Dupin (Pass 1 with doc_id, Pass 2 widens to corpus if hits < 3). - Tone: yellow (#ffd23f). Tetlock (calibrate_hypothesis) - prompts/tetlock.md — honest Bayesian update; emits new_posterior + Δ + recommended_action ∈ {keep, downgrade, upgrade, supersede}. - write_calibration UPDATEs public.hypotheses + APPENDS a "## Calibration history" section to the H-NNNN.md case file (calibration is append-only — each datapoint matters). Posterior band auto-corrected to match Tetlock thresholds. - NO_NEW_EVIDENCE sentinel handled; pure 'keep' with \|Δ\|<0.005 only touches updated_at + reviewed_by. - Tone: teal (#26d4cc). Case-Writer (case_report) - prompts/case-writer.md — Dr. Watson assembles all artefacts (E-NNNN, H-NNNN, R-NNNN, W-NNNN, G-NNNN) into a five-act narrative. ILIKE filter on topic; doc_id optional scope. - Larger budget cap (≥ $0.50) + longer timeout for prose generation. - Writes case/reports/<slug>.md with frontmatter (topic + counts); no DB table for v0. - New page /c/[slug] renders the report via MarkdownBody + stat chips. - Tone: gold (#e0c080). Hardening across the bureau: - Sentinel parsing now accepts backticked AND prose-trailing forms (Holmes NO_HYPOTHESES, Dupin NO_CONTRADICTIONS, Schneier INSUFFICIENT_HYPOTHESIS, Poirot INSUFFICIENT_TESTIMONY, Taleb NO_OUTLIERS, Tetlock NO_NEW_EVIDENCE, Case-Writer INSUFFICIENT_ARTEFACTS). Avoids the failure mode where the model refuses honestly but the runtime treated it as a parse error (observed live with Poirot+Hoover identifying the DIRECTOR false-positive disambiguation issue in entity_mentions). Chat tool extensions (web/lib/chat/tools.ts): - request_investigation now accepts 7 kinds. Each routes to its detective with appropriate validation (hypothesis_id regex, person_id kebab-case, topic non-empty, doc_id for evidence_chain). - ETA per kind: Holmes/Dupin 60s, Poirot 45s, Schneier/Tetlock 30s, Taleb 50s, Case-Writer 180s (longer prose), Locard 30×n_chunks. UI integration: - chat-bubble inline card paints each detective in its tone color. - /jobs/[id] page header swaps name/subtitle/tone per detective; question label adapts ("Topic" / "Hypothesis under attack" / "Witness under analysis" / "Topic to outlier-scan" / "Hypothesis under recalibration" / "Case to assemble"). - job-status-poller renders: case-report link card (gold), outlier cards (yellow), witness cards (purple) — alongside existing hypothesis, evidence, contradiction cards. - /api/jobs/[id] hydrates witnesses (JOIN entities for canonical_name) + gaps (with scope JSONB). - /c/[slug] page reads /data/ufo/case/reports/<slug>.md and renders with MarkdownBody, frontmatter parsed for stat chips. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-24 01:11:39 +00:00			`/**`
			`* write_witness_analysis.ts — Poirot's primary writer.`
			`*`
			`* INSERTs a row into public.witnesses (FK to entities.entity_pk for the`
			`* person) and writes case/witnesses/W-NNNN.md.`
			`*`
			`* Validates:`
			`* - person_entity_pk exists in public.entities and is of class 'person'`
			`* - credibility ∈ {high, medium, low, speculation}`
			`* - access_to_event + bias_notes are present (non-empty)`
			`* - corroboration_refs[].chunk_id resolves to a chunk_pk in public.chunks`
			`* - verdict ≤ 280 chars`
			`*/`
			`import { mkdir, writeFile } from "node:fs/promises";`
			`import path from "node:path";`
			`import { audit } from "../lib/audit";`
			`import { env } from "../lib/env";`
			`import { allocate } from "../lib/ids";`
			`import { query, queryOne } from "../lib/pg";`

			`export interface CorroborationRef {`
			`/** chunk_id slug (e.g. "c0042"). The writer resolves chunk_pk for storage. */`
			`chunk_id: string;`
			`/** Optional doc_id when the chunk_id alone might be ambiguous across docs. */`
			`doc_id?: string;`
			`/** true=supports, false=refutes. */`
			`supports: boolean;`
			`}`

			`export interface WriteWitnessAnalysisArgs {`
			`person_entity_pk: number;`
			`credibility: "high" \| "medium" \| "low" \| "speculation";`
			`access_to_event: string;`
			`bias_notes: string;`
			`corroboration_refs: CorroborationRef[];`
			`verdict: string;`
			`}`

			`export interface WriteWitnessAnalysisContext {`
			`job_id: string;`
			`detective: string;`
			`}`

			`function normalizeChunkId(raw: string): string {`
			`const m = raw.match(/c\d{4,}$/);`
			`return m ? m[0] : raw;`
			`}`

			`interface ResolvedRef {`
			`chunk_pk: number;`
			`doc_id: string;`
			`chunk_id: string;`
			`page: number;`
			`supports: boolean;`
			`}`

			`async function resolveRef(ref: CorroborationRef, fallbackDocId?: string): Promise<ResolvedRef \| null> {`
			`const cid = normalizeChunkId(ref.chunk_id);`
			`if (!cid) return null;`
			`// If doc_id provided, scope; else allow any doc.`
			`const docHint = ref.doc_id?.trim() \|\| fallbackDocId?.trim() \|\| null;`
			`const row = docHint`
			`? await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(`
			`SELECT chunk_pk, page, doc_id FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`,
			`[docHint, cid],`
			`)`
			`: await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(`
			`SELECT chunk_pk, page, doc_id FROM public.chunks WHERE chunk_id = $1 LIMIT 1`,
			`[cid],`
			`);`
			`if (!row) return null;`
			`return { chunk_pk: row.chunk_pk, doc_id: row.doc_id, chunk_id: cid, page: row.page, supports: ref.supports };`
			`}`

			`function renderMd(`
			`id: string,`
			`canonical_name: string,`
			`body: WriteWitnessAnalysisArgs,`
			`refs: ResolvedRef[],`
			`ctx: WriteWitnessAnalysisContext,`
			`): string {`
			`const refBlocks = refs.length === 0`
			`? "_(no corroboration cited)_"`
			`: refs.map((r) => {`
			`const pageStr = String(r.page).padStart(3, "0");`
			return `- [[${r.doc_id}/p${pageStr}#${r.chunk_id}]] (${r.supports ? "supports" : "refutes"})`;
			`}).join("\n");`

			`const fm = [`
			`"---",`
			`schema_version: "0.1.0"`,
			`type: witness_analysis`,
			`witness_id: ${id}`,
			`subject: ${JSON.stringify(canonical_name)}`,
			`credibility: ${body.credibility}`,
			`created_by: ${ctx.detective}`,
			`job_id: ${ctx.job_id}`,
			`created_at: ${new Date().toISOString()}`,
			`"---",`
			`].join("\n");`

			`return [`
			`fm,`
			`"",`
			`# Witness analysis ${id} — ${canonical_name}`,
			`"",`
			`Credibility. ${body.credibility}`,
			`"",`
			`Verdict. ${body.verdict}`,
			`"",`
			`"## Access to event",`
			`"",`
			`body.access_to_event,`
			`"",`
			`"## Bias notes",`
			`"",`
			`body.bias_notes,`
			`"",`
			`"## Corroboration chain",`
			`"",`
			`refBlocks,`
			`"",`
			`].join("\n");`
			`}`

			`export async function writeWitnessAnalysis(`
			`body: WriteWitnessAnalysisArgs,`
			`ctx: WriteWitnessAnalysisContext,`
			`opts?: { fallback_doc_id?: string },`
			`): Promise<{ witness_id: string; case_file: string; credibility: string; person_entity_pk: number }> {`
			`if (!Number.isFinite(body.person_entity_pk)) throw new Error("person_entity_pk required");`
			`const validBand = ["high", "medium", "low", "speculation"].includes(body.credibility);`
			if (!validBand) throw new Error(`bad credibility: ${body.credibility}`);
			`if (!body.access_to_event?.trim()) throw new Error("access_to_event required");`
			`if (!body.bias_notes?.trim()) throw new Error("bias_notes required");`
			`if (!body.verdict?.trim()) throw new Error("verdict required");`
			if (body.verdict.length > 280) throw new Error(`verdict too long (${body.verdict.length} > 280)`);
			if (body.access_to_event.length > 800) throw new Error(`access_to_event too long (${body.access_to_event.length} > 800)`);
			if (body.bias_notes.length > 800) throw new Error(`bias_notes too long (${body.bias_notes.length} > 800)`);

			`// Verify entity exists and is a person.`
			`const ent = await queryOne<{ canonical_name: string; entity_class: string }>(`
			`SELECT canonical_name, entity_class FROM public.entities WHERE entity_pk = $1`,
			`[body.person_entity_pk],`
			`);`
			if (!ent) throw new Error(`entity not found: pk=${body.person_entity_pk}`);
			`if (ent.entity_class !== "person") {`
			throw new Error(`entity is not a person: ${ent.entity_class}`);
			`}`

			`// Resolve corroboration refs. Drop unresolvable ones (don't fail the whole call).`
			`const refs: ResolvedRef[] = [];`
			`for (const r of (body.corroboration_refs ?? []).slice(0, 8)) {`
			`if (!r?.chunk_id) continue;`
			`const resolved = await resolveRef(r, opts?.fallback_doc_id);`
			`if (resolved) refs.push(resolved);`
			`}`

			`const witness_id = await allocate.witnessId();`
			`await query(`
			`INSERT INTO public.witnesses
			`(witness_id, person_entity_pk, credibility, access_to_event,`
			`bias_notes, corroboration_refs, verdict, created_by)`
			VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, $8)`,
			`[`
			`witness_id, body.person_entity_pk, body.credibility,`
			`body.access_to_event, body.bias_notes,`
			`JSON.stringify(refs.map((r) => ({`
			`chunk_pk: r.chunk_pk, doc_id: r.doc_id, chunk_id: r.chunk_id,`
			`page: r.page, supports: r.supports,`
			`}))),`
			`body.verdict, ctx.detective,`
			`],`
			`);`

			`const dir = path.join(env.CASE_ROOT, "witnesses");`
			`await mkdir(dir, { recursive: true });`
			const file = path.join(dir, `${witness_id}.md`);
			`await writeFile(file, renderMd(witness_id, ent.canonical_name, body, refs, ctx), "utf-8");`

			`await audit({`
			`event: "write_witness_analysis",`
			`job_id: ctx.job_id,`
			`detective: ctx.detective,`
			`witness_id,`
			`person_entity_pk: body.person_entity_pk,`
			`canonical_name: ent.canonical_name,`
			`credibility: body.credibility,`
			`n_corroboration: refs.length,`
			`file,`
			`});`

			`return {`
			`witness_id, case_file: file,`
			`credibility: body.credibility,`
			`person_entity_pk: body.person_entity_pk,`
			`};`
			`}`