W3.7 followup: harden Dupin scoping + chunk_id parsing
Two regressions surfaced in the smoke test that put Dupin from
0/3 contradictions written → 3/3 in the next run.
1. Single-doc scope was too narrow for Dupin's task.
Holmes's question about Sandia returned 4 chunks scoped to one doc,
but Dupin's terser "topic" form yielded only 1. Solution: Pass-1
tries the requested doc_id; if the head is < 2 chunks, Pass-2
widens to the whole corpus. Audit event carries `scope_widened`
so the case-writer can later flag cross-doc contradictions
distinctly. The unscoped retry hit 9 chunks and produced 3
contradictions across 3 different docs.
2. Chunk-block header was ambiguous to the model.
`--- doc-id/p007#c0042 ---` led Claude to parse `chunk_id` as
"p007#c0042" or "p007/c0042" in the JSON output. write_contradiction
then refused the FK lookup with "chunk not found". Fix:
- Explicit `doc_id:` / `chunk_id:` / `page:` lines per chunk
in the rendered block (no slashes/hashes the model can fold).
- Defensive normalizeChunkId() in write_contradiction.ts strips
any pNNN prefix and keeps only the trailing cNNNN — so the
writer is forgiving without losing strictness on the topic +
statement validation.
Smoke now produces (job 6deddf4b):
R-0001 (3 chunks) — Color of the fireball(s) in incident summaries
R-0002 (2 chunks) — Geographic confinement of green-fireball sightings
R-0003 (3 chunks) — Whether the phenomenon was exclusively green or
also red/multicolored
R-0003 connects 3 different declassified documents: the Los Alamos
conference (exclusively-green category), a retrospective document
(red OR green), and Incident 229 (red, blue, yellow — no green).
Real cross-doc contradiction, fully cited.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5ac53cb3e2
commit
25f19aee63
2 changed files with 38 additions and 5 deletions
|
|
@ -35,11 +35,13 @@ export interface DupinTask {
|
||||||
}
|
}
|
||||||
|
|
||||||
function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string {
|
function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string {
|
||||||
const blocks = hits.map((h) => {
|
const blocks = hits.map((h, i) => {
|
||||||
const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || "";
|
const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || "";
|
||||||
const pageStr = String(h.page).padStart(3, "0");
|
|
||||||
return [
|
return [
|
||||||
`--- ${h.doc_id}/p${pageStr}#${h.chunk_id} ---`,
|
`--- chunk ${i + 1} ---`,
|
||||||
|
`doc_id: ${h.doc_id}`,
|
||||||
|
`chunk_id: ${h.chunk_id}`,
|
||||||
|
`page: ${h.page}`,
|
||||||
`type: ${h.type}`,
|
`type: ${h.type}`,
|
||||||
h.classification ? `classification: ${h.classification}` : null,
|
h.classification ? `classification: ${h.classification}` : null,
|
||||||
"",
|
"",
|
||||||
|
|
@ -108,13 +110,33 @@ export async function runDupin(task: DupinTask): Promise<
|
||||||
const lang: "pt" | "en" = task.lang ?? "pt";
|
const lang: "pt" | "en" = task.lang ?? "pt";
|
||||||
const k = task.context_chunks ?? 18;
|
const k = task.context_chunks ?? 18;
|
||||||
|
|
||||||
const hits = await hybridSearch({
|
// Pass 1: scoped to the requested doc, if any.
|
||||||
|
let hits = await hybridSearch({
|
||||||
query: task.topic,
|
query: task.topic,
|
||||||
lang,
|
lang,
|
||||||
doc_id: task.doc_id ?? null,
|
doc_id: task.doc_id ?? null,
|
||||||
top_k: k,
|
top_k: k,
|
||||||
recall_k: 80,
|
recall_k: 80,
|
||||||
});
|
});
|
||||||
|
let scope_widened = false;
|
||||||
|
|
||||||
|
// Pass 2: if a doc_id was set and the head is too thin for a tournament,
|
||||||
|
// widen the scope to the whole corpus. Cross-doc contradictions are
|
||||||
|
// valuable too (one doc says X, another says Y).
|
||||||
|
if (task.doc_id && hits.length < 2) {
|
||||||
|
const widened = await hybridSearch({
|
||||||
|
query: task.topic,
|
||||||
|
lang,
|
||||||
|
doc_id: null,
|
||||||
|
top_k: k,
|
||||||
|
recall_k: 80,
|
||||||
|
});
|
||||||
|
if (widened.length > hits.length) {
|
||||||
|
hits = widened;
|
||||||
|
scope_widened = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await audit({
|
await audit({
|
||||||
event: "dupin_grounded",
|
event: "dupin_grounded",
|
||||||
job_id: task.job_id,
|
job_id: task.job_id,
|
||||||
|
|
@ -122,6 +144,7 @@ export async function runDupin(task: DupinTask): Promise<
|
||||||
topic: task.topic,
|
topic: task.topic,
|
||||||
n_chunks: hits.length,
|
n_chunks: hits.length,
|
||||||
doc_id: task.doc_id ?? null,
|
doc_id: task.doc_id ?? null,
|
||||||
|
scope_widened,
|
||||||
});
|
});
|
||||||
if (hits.length < 2) {
|
if (hits.length < 2) {
|
||||||
return { skipped: true, reason: "insufficient_corpus" };
|
return { skipped: true, reason: "insufficient_corpus" };
|
||||||
|
|
|
||||||
|
|
@ -48,10 +48,20 @@ interface ResolvedPosition extends ContradictionPosition {
|
||||||
page: number;
|
page: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strip page-prefix idioms detectives sometimes emit. Canonical chunk_id is
|
||||||
|
* just `c0042`. Forms accepted: "c0042", "p007#c0042", "p007/c0042".
|
||||||
|
*/
|
||||||
|
function normalizeChunkId(raw: string): string {
|
||||||
|
const m = raw.match(/c\d{4,}$/);
|
||||||
|
return m ? m[0] : raw;
|
||||||
|
}
|
||||||
|
|
||||||
async function resolveChunk(doc_id: string, chunk_id: string): Promise<{ chunk_pk: number; page: number } | null> {
|
async function resolveChunk(doc_id: string, chunk_id: string): Promise<{ chunk_pk: number; page: number } | null> {
|
||||||
|
const cid = normalizeChunkId(chunk_id);
|
||||||
const row = await queryOne<{ chunk_pk: number; page: number }>(
|
const row = await queryOne<{ chunk_pk: number; page: number }>(
|
||||||
`SELECT chunk_pk, page FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`,
|
`SELECT chunk_pk, page FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`,
|
||||||
[doc_id, chunk_id],
|
[doc_id, cid],
|
||||||
);
|
);
|
||||||
return row ?? null;
|
return row ?? null;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue