disclosure-bureau/investigator-runtime/src/tools/write_witness_analysis.ts

/**
 * write_witness_analysis.ts — Poirot's primary writer.
 *
 * INSERTs a row into public.witnesses (FK to entities.entity_pk for the
 * person) and writes case/witnesses/W-NNNN.md.
 *
 * Validates:
 *  - person_entity_pk exists in public.entities and is of class 'person'
 *  - credibility ∈ {high, medium, low, speculation}
 *  - access_to_event + bias_notes are present (non-empty)
 *  - corroboration_refs[].chunk_id resolves to a chunk_pk in public.chunks
 *  - verdict ≤ 280 chars
 */
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { audit } from "../lib/audit";
import { env } from "../lib/env";
import { allocate } from "../lib/ids";
import { query, queryOne } from "../lib/pg";

export interface CorroborationRef {
  /** chunk_id slug (e.g. "c0042"). The writer resolves chunk_pk for storage. */
  chunk_id: string;
  /** Optional doc_id when the chunk_id alone might be ambiguous across docs. */
  doc_id?: string;
  /** true=supports, false=refutes. */
  supports: boolean;
}

export interface WriteWitnessAnalysisArgs {
  person_entity_pk: number;
  credibility: "high" | "medium" | "low" | "speculation";
  access_to_event: string;
  bias_notes: string;
  corroboration_refs: CorroborationRef[];
  verdict: string;
}

export interface WriteWitnessAnalysisContext {
  job_id: string;
  detective: string;
}

function normalizeChunkId(raw: string): string {
  const m = raw.match(/c\d{4,}$/);
  return m ? m[0] : raw;
}

interface ResolvedRef {
  chunk_pk: number;
  doc_id: string;
  chunk_id: string;
  page: number;
  supports: boolean;
}

async function resolveRef(ref: CorroborationRef, fallbackDocId?: string): Promise<ResolvedRef | null> {
  const cid = normalizeChunkId(ref.chunk_id);
  if (!cid) return null;
  // If doc_id provided, scope; else allow any doc.
  const docHint = ref.doc_id?.trim() || fallbackDocId?.trim() || null;
  const row = docHint
    ? await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(
        `SELECT chunk_pk, page, doc_id FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`,
        [docHint, cid],
      )
    : await queryOne<{ chunk_pk: number; page: number; doc_id: string }>(
        `SELECT chunk_pk, page, doc_id FROM public.chunks WHERE chunk_id = $1 LIMIT 1`,
        [cid],
      );
  if (!row) return null;
  return { chunk_pk: row.chunk_pk, doc_id: row.doc_id, chunk_id: cid, page: row.page, supports: ref.supports };
}

function renderMd(
  id: string,
  canonical_name: string,
  body: WriteWitnessAnalysisArgs,
  refs: ResolvedRef[],
  ctx: WriteWitnessAnalysisContext,
): string {
  const refBlocks = refs.length === 0
    ? "_(no corroboration cited)_"
    : refs.map((r) => {
        const pageStr = String(r.page).padStart(3, "0");
        return `- [[${r.doc_id}/p${pageStr}#${r.chunk_id}]] (${r.supports ? "supports" : "refutes"})`;
      }).join("\n");

  const fm = [
    "---",
    `schema_version: "0.1.0"`,
    `type: witness_analysis`,
    `witness_id: ${id}`,
    `subject: ${JSON.stringify(canonical_name)}`,
    `credibility: ${body.credibility}`,
    `created_by: ${ctx.detective}`,
    `job_id: ${ctx.job_id}`,
    `created_at: ${new Date().toISOString()}`,
    "---",
  ].join("\n");

  return [
    fm,
    "",
    `# Witness analysis ${id} — ${canonical_name}`,
    "",
    `**Credibility.** ${body.credibility}`,
    "",
    `**Verdict.** ${body.verdict}`,
    "",
    "## Access to event",
    "",
    body.access_to_event,
    "",
    "## Bias notes",
    "",
    body.bias_notes,
    "",
    "## Corroboration chain",
    "",
    refBlocks,
    "",
  ].join("\n");
}

export async function writeWitnessAnalysis(
  body: WriteWitnessAnalysisArgs,
  ctx: WriteWitnessAnalysisContext,
  opts?: { fallback_doc_id?: string },
): Promise<{ witness_id: string; case_file: string; credibility: string; person_entity_pk: number }> {
  if (!Number.isFinite(body.person_entity_pk)) throw new Error("person_entity_pk required");
  const validBand = ["high", "medium", "low", "speculation"].includes(body.credibility);
  if (!validBand) throw new Error(`bad credibility: ${body.credibility}`);
  if (!body.access_to_event?.trim()) throw new Error("access_to_event required");
  if (!body.bias_notes?.trim()) throw new Error("bias_notes required");
  if (!body.verdict?.trim()) throw new Error("verdict required");
  if (body.verdict.length > 280) throw new Error(`verdict too long (${body.verdict.length} > 280)`);
  if (body.access_to_event.length > 800) throw new Error(`access_to_event too long (${body.access_to_event.length} > 800)`);
  if (body.bias_notes.length > 800) throw new Error(`bias_notes too long (${body.bias_notes.length} > 800)`);

  // Verify entity exists and is a person.
  const ent = await queryOne<{ canonical_name: string; entity_class: string }>(
    `SELECT canonical_name, entity_class FROM public.entities WHERE entity_pk = $1`,
    [body.person_entity_pk],
  );
  if (!ent) throw new Error(`entity not found: pk=${body.person_entity_pk}`);
  if (ent.entity_class !== "person") {
    throw new Error(`entity is not a person: ${ent.entity_class}`);
  }

  // Resolve corroboration refs. Drop unresolvable ones (don't fail the whole call).
  const refs: ResolvedRef[] = [];
  for (const r of (body.corroboration_refs ?? []).slice(0, 8)) {
    if (!r?.chunk_id) continue;
    const resolved = await resolveRef(r, opts?.fallback_doc_id);
    if (resolved) refs.push(resolved);
  }

  const witness_id = await allocate.witnessId();
  await query(
    `INSERT INTO public.witnesses
       (witness_id, person_entity_pk, credibility, access_to_event,
        bias_notes, corroboration_refs, verdict, created_by)
     VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, $8)`,
    [
      witness_id, body.person_entity_pk, body.credibility,
      body.access_to_event, body.bias_notes,
      JSON.stringify(refs.map((r) => ({
        chunk_pk: r.chunk_pk, doc_id: r.doc_id, chunk_id: r.chunk_id,
        page: r.page, supports: r.supports,
      }))),
      body.verdict, ctx.detective,
    ],
  );

  const dir = path.join(env.CASE_ROOT, "witnesses");
  await mkdir(dir, { recursive: true });
  const file = path.join(dir, `${witness_id}.md`);
  await writeFile(file, renderMd(witness_id, ent.canonical_name, body, refs, ctx), "utf-8");

  await audit({
    event: "write_witness_analysis",
    job_id: ctx.job_id,
    detective: ctx.detective,
    witness_id,
    person_entity_pk: body.person_entity_pk,
    canonical_name: ent.canonical_name,
    credibility: body.credibility,
    n_corroboration: refs.length,
    file,
  });

  return {
    witness_id, case_file: file,
    credibility: body.credibility,
    person_entity_pk: body.person_entity_pk,
  };
}