200 lines
6.9 KiB
TypeScript
200 lines
6.9 KiB
TypeScript
|
|
/**
|
||
|
|
* write_evidence.ts — Locard's primary writer.
|
||
|
|
*
|
||
|
|
* The first of the gated write tools (sa-security gate #2). Builds a row in
|
||
|
|
* `public.evidence`, allocates E-NNNN via the sequence, writes the matching
|
||
|
|
* `case/evidence/E-NNNN.md` markdown file, and audit-logs the act.
|
||
|
|
*
|
||
|
|
* Discipline (mirrors the spec):
|
||
|
|
* - verbatim_excerpt MUST appear verbatim inside the source chunk content
|
||
|
|
* (we reject if it doesn't — Sonnet is occasionally creative about quoting).
|
||
|
|
* - source_chunk must exist in public.chunks (FK enforced by DB; we also
|
||
|
|
* check up front to give a friendlier error).
|
||
|
|
* - grade A requires ≥ 3 custody steps, B ≥ 2, C ≥ 1.
|
||
|
|
* - related_hypotheses[].hypothesis_id must exist (if provided).
|
||
|
|
*/
|
||
|
|
import { mkdir, writeFile } from "node:fs/promises";
|
||
|
|
import path from "node:path";
|
||
|
|
import { audit } from "../lib/audit";
|
||
|
|
import { env } from "../lib/env";
|
||
|
|
import { allocate } from "../lib/ids";
|
||
|
|
import { query, queryOne } from "../lib/pg";
|
||
|
|
|
||
|
|
export interface CustodyStep {
|
||
|
|
step: string;
|
||
|
|
uri?: string;
|
||
|
|
sha256?: string;
|
||
|
|
notes?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface WriteEvidenceArgs {
|
||
|
|
verbatim_excerpt: string;
|
||
|
|
source_doc_id: string;
|
||
|
|
source_chunk_id: string;
|
||
|
|
/** Page is 1-indexed; we derive source_page_id from doc+page. */
|
||
|
|
page?: number;
|
||
|
|
bbox?: { x: number; y: number; w: number; h: number };
|
||
|
|
grade: "A" | "B" | "C";
|
||
|
|
custody_steps: CustodyStep[];
|
||
|
|
custody_gaps?: string[];
|
||
|
|
confidence_band?: "high" | "medium" | "low" | "speculation";
|
||
|
|
related_hypotheses?: Array<{ hypothesis_id: string; supports: boolean }>;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface WriteEvidenceContext {
|
||
|
|
job_id: string;
|
||
|
|
detective: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
const GRADE_MIN_STEPS: Record<string, number> = { A: 3, B: 2, C: 1 };
|
||
|
|
|
||
|
|
function pageId(doc: string, p: number | undefined, chunkId: string): string {
|
||
|
|
// Prefer explicit page; otherwise we leave it doc-only and the lint
|
||
|
|
// catches the malformed reference later.
|
||
|
|
if (typeof p === "number" && p > 0) {
|
||
|
|
return `${doc}/p${String(p).padStart(3, "0")}#${chunkId}`;
|
||
|
|
}
|
||
|
|
return `${doc}#${chunkId}`;
|
||
|
|
}
|
||
|
|
|
||
|
|
function renderMd(args: {
|
||
|
|
evidence_id: string;
|
||
|
|
body: WriteEvidenceArgs;
|
||
|
|
ctx: WriteEvidenceContext;
|
||
|
|
}): string {
|
||
|
|
const { evidence_id, body, ctx } = args;
|
||
|
|
const fm = [
|
||
|
|
"---",
|
||
|
|
`schema_version: "0.1.0"`,
|
||
|
|
`type: evidence`,
|
||
|
|
`evidence_id: ${evidence_id}`,
|
||
|
|
`source_doc: ${body.source_doc_id}`,
|
||
|
|
`source_chunk: ${body.source_chunk_id}`,
|
||
|
|
body.page ? `source_page: ${body.page}` : null,
|
||
|
|
`grade: ${body.grade}`,
|
||
|
|
body.confidence_band ? `confidence_band: ${body.confidence_band}` : null,
|
||
|
|
`created_by: ${ctx.detective}`,
|
||
|
|
`job_id: ${ctx.job_id}`,
|
||
|
|
`created_at: ${new Date().toISOString()}`,
|
||
|
|
body.bbox ? `bbox: { x: ${body.bbox.x}, y: ${body.bbox.y}, w: ${body.bbox.w}, h: ${body.bbox.h} }` : null,
|
||
|
|
"---",
|
||
|
|
].filter(Boolean).join("\n");
|
||
|
|
|
||
|
|
const custody = body.custody_steps
|
||
|
|
.map((s, i) => ` ${i + 1}. **${s.step}**${s.uri ? ` — ${s.uri}` : ""}${s.sha256 ? ` (sha256: \`${s.sha256.slice(0, 16)}…\`)` : ""}${s.notes ? `\n ${s.notes}` : ""}`)
|
||
|
|
.join("\n");
|
||
|
|
const gaps = (body.custody_gaps ?? []).map((g) => ` - ${g}`).join("\n");
|
||
|
|
const links = (body.related_hypotheses ?? [])
|
||
|
|
.map((h) => ` - [[hypothesis/${h.hypothesis_id}]] (${h.supports ? "supports" : "refutes"})`)
|
||
|
|
.join("\n");
|
||
|
|
|
||
|
|
return [
|
||
|
|
fm,
|
||
|
|
"",
|
||
|
|
`# Evidence ${evidence_id}`,
|
||
|
|
"",
|
||
|
|
"## Verbatim excerpt (source language)",
|
||
|
|
"",
|
||
|
|
"> " + body.verbatim_excerpt.replace(/\n+/g, "\n> "),
|
||
|
|
"",
|
||
|
|
"## Chain of custody",
|
||
|
|
"",
|
||
|
|
custody,
|
||
|
|
gaps ? "\n### Gaps\n\n" + gaps : "",
|
||
|
|
"",
|
||
|
|
"## Source",
|
||
|
|
"",
|
||
|
|
`- Doc: [[${body.source_doc_id}]]`,
|
||
|
|
`- Chunk: [[${body.source_doc_id}#${body.source_chunk_id}]]`,
|
||
|
|
body.page ? `- Page: [[${body.source_doc_id}/p${String(body.page).padStart(3, "0")}]]` : "",
|
||
|
|
"",
|
||
|
|
"## Linked hypotheses",
|
||
|
|
"",
|
||
|
|
links || "_(none yet)_",
|
||
|
|
"",
|
||
|
|
].join("\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function writeEvidence(
|
||
|
|
body: WriteEvidenceArgs,
|
||
|
|
ctx: WriteEvidenceContext,
|
||
|
|
): Promise<{ evidence_id: string; case_file: string }> {
|
||
|
|
// 1. Validate grade minimums (sa-security gate #2: schema check before INSERT).
|
||
|
|
const minSteps = GRADE_MIN_STEPS[body.grade] ?? 99;
|
||
|
|
if (!Array.isArray(body.custody_steps) || body.custody_steps.length < minSteps) {
|
||
|
|
throw new Error(`grade ${body.grade} requires ≥ ${minSteps} custody steps; got ${body.custody_steps?.length ?? 0}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. Resolve chunk_pk + verify the verbatim excerpt appears in the chunk.
|
||
|
|
const chunk = await queryOne<{ chunk_pk: number; content_en: string | null; content_pt: string | null }>(
|
||
|
|
`SELECT chunk_pk, content_en, content_pt
|
||
|
|
FROM public.chunks
|
||
|
|
WHERE doc_id = $1 AND chunk_id = $2
|
||
|
|
LIMIT 1`,
|
||
|
|
[body.source_doc_id, body.source_chunk_id],
|
||
|
|
);
|
||
|
|
if (!chunk) {
|
||
|
|
throw new Error(`chunk_not_found: ${body.source_doc_id}#${body.source_chunk_id}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
const haystack = `${chunk.content_en ?? ""}\n${chunk.content_pt ?? ""}`;
|
||
|
|
const needle = body.verbatim_excerpt.trim();
|
||
|
|
if (needle.length < 8) {
|
||
|
|
throw new Error(`verbatim_excerpt too short (${needle.length} chars; min 8)`);
|
||
|
|
}
|
||
|
|
if (!haystack.includes(needle.slice(0, 80))) {
|
||
|
|
throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 3. (Optional) validate hypothesis refs exist.
|
||
|
|
for (const ref of body.related_hypotheses ?? []) {
|
||
|
|
const h = await queryOne<{ hypothesis_pk: number }>(
|
||
|
|
`SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`,
|
||
|
|
[ref.hypothesis_id],
|
||
|
|
);
|
||
|
|
if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 4. Allocate id + INSERT.
|
||
|
|
const evidence_id = await allocate.evidenceId();
|
||
|
|
await query(
|
||
|
|
`INSERT INTO public.evidence
|
||
|
|
(evidence_id, verbatim_excerpt, source_chunk_pk, source_page_id, bbox,
|
||
|
|
grade, custody_steps, custody_gaps, confidence_band, related_hypotheses, created_by)
|
||
|
|
VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8::jsonb, $9, $10::jsonb, $11)`,
|
||
|
|
[
|
||
|
|
evidence_id,
|
||
|
|
body.verbatim_excerpt,
|
||
|
|
chunk.chunk_pk,
|
||
|
|
pageId(body.source_doc_id, body.page, body.source_chunk_id),
|
||
|
|
body.bbox ? JSON.stringify(body.bbox) : null,
|
||
|
|
body.grade,
|
||
|
|
JSON.stringify(body.custody_steps),
|
||
|
|
body.custody_gaps ? JSON.stringify(body.custody_gaps) : null,
|
||
|
|
body.confidence_band ?? null,
|
||
|
|
JSON.stringify(body.related_hypotheses ?? []),
|
||
|
|
ctx.detective,
|
||
|
|
],
|
||
|
|
);
|
||
|
|
|
||
|
|
// 5. Write the case markdown file.
|
||
|
|
const dir = path.join(env.CASE_ROOT, "evidence");
|
||
|
|
await mkdir(dir, { recursive: true });
|
||
|
|
const filepath = path.join(dir, `${evidence_id}.md`);
|
||
|
|
await writeFile(filepath, renderMd({ evidence_id, body, ctx }), "utf-8");
|
||
|
|
|
||
|
|
// 6. Audit.
|
||
|
|
await audit({
|
||
|
|
event: "write_evidence",
|
||
|
|
job_id: ctx.job_id,
|
||
|
|
detective: ctx.detective,
|
||
|
|
evidence_id,
|
||
|
|
source_doc: body.source_doc_id,
|
||
|
|
source_chunk: body.source_chunk_id,
|
||
|
|
grade: body.grade,
|
||
|
|
file: filepath,
|
||
|
|
});
|
||
|
|
|
||
|
|
return { evidence_id, case_file: filepath };
|
||
|
|
}
|