From 0a5c03c29aa948c6433abac7a59cd8bd02afae29 Mon Sep 17 00:00:00 2001 From: Luiz Gustavo Date: Sun, 24 May 2026 12:11:35 -0300 Subject: [PATCH] W4 followup: Poirot soft-truncate at sentence boundary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live PT-BR smoke on j-edgar-hoover produced verdict_pt_br at 304 chars (prompt says ≤ 280). The writer correctly rejected it ("verdict too long (304 > 280)") but the job failed instead of trimming. Fix: detective now trims each language field at the nearest sentence boundary (period or semicolon) above 60% of the cap; falls back to a hard cut at the cap. Applied to verdict / verdict_pt_br (≤280), and to access_to_event*, bias_notes* (≤800) for defense in depth. The contract with the writer stays strict; the detective just becomes forgiving about the model going 5-10% over. Co-Authored-By: Claude Opus 4.7 (1M context) --- investigator-runtime/src/detectives/poirot.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/investigator-runtime/src/detectives/poirot.ts b/investigator-runtime/src/detectives/poirot.ts index 39edef1..36c53fc 100644 --- a/investigator-runtime/src/detectives/poirot.ts +++ b/investigator-runtime/src/detectives/poirot.ts @@ -244,6 +244,23 @@ export async function runPoirot(task: PoirotTask): Promise< return { skipped: true, reason: "incomplete_bilingual_analysis" }; } + // Soft-truncate before sending to the writer: the prompt asks ≤ 280 chars + // per language but the model occasionally goes slightly over (304 chars + // observed live with j-edgar-hoover PT-BR). Truncate at sentence boundary + // when possible, else at the cap. + const trimTo = (s: string, max: number): string => { + if (s.length <= max) return s; + const cut = s.slice(0, max); + const lastPeriod = Math.max(cut.lastIndexOf(". "), cut.lastIndexOf("; ")); + return (lastPeriod > max * 0.6 ? cut.slice(0, lastPeriod + 1) : cut).trim(); + }; + args.verdict = trimTo(args.verdict, 280); + args.verdict_pt_br = trimTo(args.verdict_pt_br, 280); + args.access_to_event = trimTo(args.access_to_event, 800); + args.access_to_event_pt_br = trimTo(args.access_to_event_pt_br, 800); + args.bias_notes = trimTo(args.bias_notes, 800); + args.bias_notes_pt_br = trimTo(args.bias_notes_pt_br, 800); + // Pass the shortlist's most-represented doc_id as a fallback for chunk_id // resolution in case the model emits a bare "c0042" without doc_id. const docCount = new Map();