/** * write_evidence.ts — Locard's primary writer. * * The first of the gated write tools (sa-security gate #2). Builds a row in * `public.evidence`, allocates E-NNNN via the sequence, writes the matching * `case/evidence/E-NNNN.md` markdown file, and audit-logs the act. * * Discipline (mirrors the spec): * - verbatim_excerpt MUST appear verbatim inside the source chunk content * (we reject if it doesn't — Sonnet is occasionally creative about quoting). * - source_chunk must exist in public.chunks (FK enforced by DB; we also * check up front to give a friendlier error). * - grade A requires ≥ 3 custody steps, B ≥ 2, C ≥ 1. * - related_hypotheses[].hypothesis_id must exist (if provided). */ import { mkdir, writeFile } from "node:fs/promises"; import path from "node:path"; import { audit } from "../lib/audit"; import { env } from "../lib/env"; import { allocate } from "../lib/ids"; import { query, queryOne } from "../lib/pg"; export interface CustodyStep { step: string; uri?: string; sha256?: string; notes?: string; } export interface WriteEvidenceArgs { verbatim_excerpt: string; source_doc_id: string; source_chunk_id: string; /** Page is 1-indexed; we derive source_page_id from doc+page. */ page?: number; bbox?: { x: number; y: number; w: number; h: number }; grade: "A" | "B" | "C"; custody_steps: CustodyStep[]; custody_gaps?: string[]; confidence_band?: "high" | "medium" | "low" | "speculation"; related_hypotheses?: Array<{ hypothesis_id: string; supports: boolean }>; } export interface WriteEvidenceContext { job_id: string; detective: string; } const GRADE_MIN_STEPS: Record = { A: 3, B: 2, C: 1 }; function pageId(doc: string, p: number | undefined, chunkId: string): string { // Prefer explicit page; otherwise we leave it doc-only and the lint // catches the malformed reference later. if (typeof p === "number" && p > 0) { return `${doc}/p${String(p).padStart(3, "0")}#${chunkId}`; } return `${doc}#${chunkId}`; } function renderMd(args: { evidence_id: string; body: WriteEvidenceArgs; ctx: WriteEvidenceContext; }): string { const { evidence_id, body, ctx } = args; const fm = [ "---", `schema_version: "0.1.0"`, `type: evidence`, `evidence_id: ${evidence_id}`, `source_doc: ${body.source_doc_id}`, `source_chunk: ${body.source_chunk_id}`, body.page ? `source_page: ${body.page}` : null, `grade: ${body.grade}`, body.confidence_band ? `confidence_band: ${body.confidence_band}` : null, `created_by: ${ctx.detective}`, `job_id: ${ctx.job_id}`, `created_at: ${new Date().toISOString()}`, body.bbox ? `bbox: { x: ${body.bbox.x}, y: ${body.bbox.y}, w: ${body.bbox.w}, h: ${body.bbox.h} }` : null, "---", ].filter(Boolean).join("\n"); const custody = body.custody_steps .map((s, i) => ` ${i + 1}. **${s.step}**${s.uri ? ` — ${s.uri}` : ""}${s.sha256 ? ` (sha256: \`${s.sha256.slice(0, 16)}…\`)` : ""}${s.notes ? `\n ${s.notes}` : ""}`) .join("\n"); const gaps = (body.custody_gaps ?? []).map((g) => ` - ${g}`).join("\n"); const links = (body.related_hypotheses ?? []) .map((h) => ` - [[hypothesis/${h.hypothesis_id}]] (${h.supports ? "supports" : "refutes"})`) .join("\n"); return [ fm, "", `# Evidence ${evidence_id}`, "", "## Verbatim excerpt (source language)", "", "> " + body.verbatim_excerpt.replace(/\n+/g, "\n> "), "", "## Chain of custody", "", custody, gaps ? "\n### Gaps\n\n" + gaps : "", "", "## Source", "", `- Doc: [[${body.source_doc_id}]]`, `- Chunk: [[${body.source_doc_id}#${body.source_chunk_id}]]`, body.page ? `- Page: [[${body.source_doc_id}/p${String(body.page).padStart(3, "0")}]]` : "", "", "## Linked hypotheses", "", links || "_(none yet)_", "", ].join("\n"); } export async function writeEvidence( body: WriteEvidenceArgs, ctx: WriteEvidenceContext, ): Promise<{ evidence_id: string; case_file: string }> { // 1. Validate grade minimums (sa-security gate #2: schema check before INSERT). const minSteps = GRADE_MIN_STEPS[body.grade] ?? 99; if (!Array.isArray(body.custody_steps) || body.custody_steps.length < minSteps) { throw new Error(`grade ${body.grade} requires ≥ ${minSteps} custody steps; got ${body.custody_steps?.length ?? 0}`); } // 2. Resolve chunk_pk + verify the verbatim excerpt appears in the chunk. const chunk = await queryOne<{ chunk_pk: number; content_en: string | null; content_pt: string | null }>( `SELECT chunk_pk, content_en, content_pt FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2 LIMIT 1`, [body.source_doc_id, body.source_chunk_id], ); if (!chunk) { throw new Error(`chunk_not_found: ${body.source_doc_id}#${body.source_chunk_id}`); } const haystack = `${chunk.content_en ?? ""}\n${chunk.content_pt ?? ""}`; const needle = body.verbatim_excerpt.trim(); if (needle.length < 8) { throw new Error(`verbatim_excerpt too short (${needle.length} chars; min 8)`); } if (!haystack.includes(needle.slice(0, 80))) { throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`); } // 3. (Optional) validate hypothesis refs exist. Drop empty/null IDs silently // — Locard may emit `related_hypotheses: [{}]` when it knows of no link yet. const validRefs = (body.related_hypotheses ?? []).filter( (r) => typeof r?.hypothesis_id === "string" && r.hypothesis_id.trim().length > 0, ); for (const ref of validRefs) { const h = await queryOne<{ hypothesis_pk: number }>( `SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`, [ref.hypothesis_id], ); if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`); } body.related_hypotheses = validRefs; // 4. Allocate id + INSERT. const evidence_id = await allocate.evidenceId(); await query( `INSERT INTO public.evidence (evidence_id, verbatim_excerpt, source_chunk_pk, source_page_id, bbox, grade, custody_steps, custody_gaps, confidence_band, related_hypotheses, created_by) VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8::jsonb, $9, $10::jsonb, $11)`, [ evidence_id, body.verbatim_excerpt, chunk.chunk_pk, pageId(body.source_doc_id, body.page, body.source_chunk_id), body.bbox ? JSON.stringify(body.bbox) : null, body.grade, JSON.stringify(body.custody_steps), body.custody_gaps ? JSON.stringify(body.custody_gaps) : null, body.confidence_band ?? null, JSON.stringify(body.related_hypotheses ?? []), ctx.detective, ], ); // 5. Write the case markdown file. const dir = path.join(env.CASE_ROOT, "evidence"); await mkdir(dir, { recursive: true }); const filepath = path.join(dir, `${evidence_id}.md`); await writeFile(filepath, renderMd({ evidence_id, body, ctx }), "utf-8"); // 6. Audit. await audit({ event: "write_evidence", job_id: ctx.job_id, detective: ctx.detective, evidence_id, source_doc: body.source_doc_id, source_chunk: body.source_chunk_id, grade: body.grade, file: filepath, }); return { evidence_id, case_file: filepath }; }