/** * write_contradiction.ts — Dupin's primary writer. * * Inserts a row into public.contradictions and renders * case/contradictions/R-NNNN.md. * * Validates: * - topic + at least 2 positions * - each position has a chunk_pk (resolved from doc_id + chunk_id via the DB) * - position.statement is non-empty * - notes ≤ 4000 chars * * Naming uses the R-NNNN slot from contradiction_id_seq. Note: this CLAUDE.md * historically reserved R-NNNN for `relation` artefacts (chief-detective); * contradictions are the same conceptual class (a connection between two * pieces of evidence in tension), so they share the slot. */ import { mkdir, writeFile } from "node:fs/promises"; import path from "node:path"; import { audit } from "../lib/audit"; import { env } from "../lib/env"; import { allocate } from "../lib/ids"; import { query, queryOne } from "../lib/pg"; export interface ContradictionPosition { doc_id: string; chunk_id: string; /** The verbatim or paraphrased claim that puts this chunk on this side. */ statement: string; statement_pt_br?: string; /** Optional weight or stance label (e.g. "asserts", "denies"). */ stance?: string; } export interface WriteContradictionArgs { topic: string; topic_pt_br?: string; positions: ContradictionPosition[]; notes?: string; notes_pt_br?: string; resolution_status?: "open" | "resolved" | "irreconcilable"; } export interface WriteContradictionContext { job_id: string; detective: string; } interface ResolvedPosition extends ContradictionPosition { chunk_pk: number; page: number; statement_pt_br: string; } /** * Strip page-prefix idioms detectives sometimes emit. Canonical chunk_id is * just `c0042`. Forms accepted: "c0042", "p007#c0042", "p007/c0042". */ function normalizeChunkId(raw: string): string { const m = raw.match(/c\d{4,}$/); return m ? m[0] : raw; } async function resolveChunk(doc_id: string, chunk_id: string): Promise<{ chunk_pk: number; page: number } | null> { const cid = normalizeChunkId(chunk_id); const row = await queryOne<{ chunk_pk: number; page: number }>( `SELECT chunk_pk, page FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`, [doc_id, cid], ); return row ?? null; } function renderMd( id: string, body: WriteContradictionArgs, positions: ResolvedPosition[], ctx: WriteContradictionContext, ): string { const fm = [ "---", `schema_version: "0.1.0"`, `type: contradiction`, `contradiction_id: ${id}`, `topic: ${JSON.stringify(body.topic)}`, `resolution_status: ${body.resolution_status ?? "open"}`, `detected_by: ${ctx.detective}`, `job_id: ${ctx.job_id}`, `created_at: ${new Date().toISOString()}`, "---", ].join("\n"); const positionBlocks = positions.map((p, i) => { const pageStr = String(p.page).padStart(3, "0"); return [ `### Position ${i + 1}${p.stance ? ` — ${p.stance}` : ""}`, "", `**(EN)** > ${p.statement}`, "", `**(PT-BR)** > ${p.statement_pt_br}`, "", `Source: [[${p.doc_id}/p${pageStr}#${p.chunk_id}]]`, ].join("\n"); }); return [ fm, "", `# Contradiction ${id}`, "", `**Topic (EN).** ${body.topic}`, `**Tópico (PT-BR).** ${body.topic_pt_br ?? body.topic}`, "", "## Positions in tension", "", positionBlocks.join("\n\n"), "", "## Notes (EN)", "", body.notes || "_(no commentary recorded)_", "", "## Notas (PT-BR)", "", body.notes_pt_br || "_(sem comentário registrado)_", "", ].join("\n"); } export async function writeContradiction( body: WriteContradictionArgs, ctx: WriteContradictionContext, ): Promise<{ contradiction_id: string; case_file: string }> { if (!body.topic?.trim()) throw new Error("topic required"); if (!body.topic_pt_br?.trim()) throw new Error("topic_pt_br required (bilingual contract)"); if (!Array.isArray(body.positions) || body.positions.length < 2) { throw new Error("at least 2 positions required"); } if (body.notes && body.notes.length > 4000) { throw new Error(`notes too long (${body.notes.length} > 4000)`); } if (body.notes && !body.notes_pt_br?.trim()) { throw new Error("notes_pt_br required when notes set (bilingual contract)"); } const resolved: ResolvedPosition[] = []; for (const p of body.positions) { if (!p?.doc_id?.trim() || !p?.chunk_id?.trim()) { throw new Error("position requires doc_id + chunk_id"); } if (!p?.statement?.trim()) { throw new Error(`position ${p.doc_id}/${p.chunk_id} missing statement`); } if (!p?.statement_pt_br?.trim()) { throw new Error(`position ${p.doc_id}/${p.chunk_id} missing statement_pt_br (bilingual contract)`); } const chunk = await resolveChunk(p.doc_id, p.chunk_id); if (!chunk) { throw new Error(`chunk ${p.doc_id}/${p.chunk_id} not found`); } resolved.push({ ...p, statement: p.statement.trim(), statement_pt_br: p.statement_pt_br.trim(), chunk_pk: chunk.chunk_pk, page: chunk.page, }); } // Reject pairs that point at the same chunk on every side — that's not a // contradiction, that's a single statement. const uniqueChunks = new Set(resolved.map((p) => p.chunk_pk)); if (uniqueChunks.size < 2) { throw new Error("contradiction requires positions from at least 2 distinct chunks"); } const contradiction_id = await allocate.contradictionId(); const chunkPayload = resolved.map((p) => ({ chunk_pk: p.chunk_pk, doc_id: p.doc_id, chunk_id: p.chunk_id, page: p.page, statement: p.statement, statement_pt_br: p.statement_pt_br, stance: p.stance ?? null, })); await query( `INSERT INTO public.contradictions (contradiction_id, topic, topic_pt_br, chunks, detected_by, resolution_status, notes, notes_pt_br) VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7, $8)`, [ contradiction_id, body.topic.trim(), body.topic_pt_br!.trim(), JSON.stringify(chunkPayload), ctx.detective, body.resolution_status ?? "open", body.notes ?? null, body.notes_pt_br ?? null, ], ); const dir = path.join(env.CASE_ROOT, "contradictions"); await mkdir(dir, { recursive: true }); const file = path.join(dir, `${contradiction_id}.md`); await writeFile(file, renderMd(contradiction_id, body, resolved, ctx), "utf-8"); await audit({ event: "write_contradiction", job_id: ctx.job_id, detective: ctx.detective, contradiction_id, n_positions: resolved.length, distinct_chunks: uniqueChunks.size, file, }); return { contradiction_id, case_file: file }; }