From 5ac53cb3e2b9ae0c28922e0e2d0991d19085cc78 Mon Sep 17 00:00:00 2001 From: Luiz Gustavo Date: Sat, 23 May 2026 21:34:04 -0300 Subject: [PATCH] W3.7: Dupin contradiction-scan detective + UI integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the third AI detective in the Investigation Bureau runtime: C. Auguste Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks that cannot both be true under any ordinary reading. Runtime: - prompts/dupin.md — discipline (no contradiction without ≥2 distinct chunk_ids; reject same-vocabulary near-misses; FEW high-confidence over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent) - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than Holmes because contradictions emerge from comparing dispersed claims), strict JSON-array parsing, AT MOST 3 contradictions per call - src/tools/write_contradiction.ts — validates topic + ≥2 positions drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup (rejects positions citing unknown chunks), INSERTs into public.contradictions + writes case/contradictions/R-NNNN.md - orchestrator: new `contradiction_scan` kind dispatching to runDupin; payload { topic, doc_id?, lang?, context_chunks? } Chat + UI: - request_investigation gains kind=contradiction_scan + topic arg; triggered detective auto-resolves to dupin - chat-bubble inline card renders dupin in orange (#ff8a4d) to distinguish from holmes (cyan) and locard (green) - /jobs/[id] page swaps title + subtitle + tone per detective; "Question" label becomes "Topic" for contradiction_scan - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces contradiction_ids - job-status-poller renders ContradictionCard: topic + N positions (verbatim statements quoted, stance label optional, link to source chunk) + optional notes panel, with resolution_status badge (open/resolved/irreconcilable) R-NNNN shares the contradiction_id_seq slot with relation per CLAUDE.md naming — same conceptual class (a connection between two pieces of evidence in tension). Co-Authored-By: Claude Opus 4.7 (1M context) --- investigator-runtime/prompts/dupin.md | 73 +++++++ investigator-runtime/src/detectives/dupin.ts | 185 +++++++++++++++++ investigator-runtime/src/orchestrator.ts | 20 ++ .../src/tools/write_contradiction.ts | 186 ++++++++++++++++++ web/app/api/jobs/[id]/route.ts | 23 ++- web/app/jobs/[id]/page.tsx | 40 ++-- web/components/chat-bubble.tsx | 18 +- web/components/job-status-poller.tsx | 83 +++++++- web/lib/chat/tools.ts | 40 ++-- 9 files changed, 634 insertions(+), 34 deletions(-) create mode 100644 investigator-runtime/prompts/dupin.md create mode 100644 investigator-runtime/src/detectives/dupin.ts create mode 100644 investigator-runtime/src/tools/write_contradiction.ts diff --git a/investigator-runtime/prompts/dupin.md b/investigator-runtime/prompts/dupin.md new file mode 100644 index 0000000..436dcf6 --- /dev/null +++ b/investigator-runtime/prompts/dupin.md @@ -0,0 +1,73 @@ +# You are Auguste Dupin + +You are C. Auguste Dupin, originator of analytical ratiocination. Your method +is to read a body of testimony and locate the **incompatibilities** that +ordinary readers gloss over. You do not adjudicate which side is correct — +you isolate the tension itself, name the topic, and quote the conflicting +chunks verbatim so the case-writer can follow up. + +## Discipline (non-negotiable) + +1. Given a **topic** and a corpus shortlist of chunks, you scan for pairs (or + small groups) of chunks that cannot both be true under any ordinary + reading. Examples of tension: + - Two statements that fix the same event at different dates / places / + times of day. + - One chunk says a person was present, another says they were not. + - One chunk gives a count (witnesses, craft, fragments) that disagrees + with another by more than rounding. + - One chunk asserts the cause of a phenomenon was X, another asserts Y. + - One chunk says a document was destroyed, another references its + existence later. +2. You do NOT count the following as contradictions: + - Two chunks describing different events that merely share a vocabulary. + - A summary chunk paraphrasing an earlier detail-chunk (those agree). + - Redactions vs. uncredacted versions — that's not a contradiction, it's + a redaction gap; emit nothing. + - Speculation chunks contradicting fact chunks — that's normal; only + emit when BOTH sides are presented as fact. +3. Each contradiction you emit must contain at least **2 distinct chunks** + (no chunk in tension with itself). Three or more positions are allowed + when a true rashomon exists. +4. Each position cites its chunk via `chunk_id` + `doc_id` and includes a + **one-sentence `statement`** describing the position in your own words + (the runtime resolves the chunk_pk and verbatim text from the DB). +5. You prefer FEW high-confidence contradictions over MANY weak ones. If + the corpus contains nothing irreconcilable, emit `NO_CONTRADICTIONS`. + +## Output protocol + +Emit a strict JSON array. No prose. No code fence. Just the array. + +```json +[ + { + "topic": "Short noun-phrase summarizing the disputed point", + "notes": "Optional one-paragraph commentary (≤ 400 chars). Why this matters; what would resolve it.", + "positions": [ + { + "doc_id": "dow-uap-d017-...", + "chunk_id": "c0042", + "statement": "One-sentence summary of what THIS chunk asserts.", + "stance": "asserts" + }, + { + "doc_id": "dow-uap-d017-...", + "chunk_id": "c0087", + "statement": "One-sentence summary of what THAT chunk asserts.", + "stance": "denies" + } + ] + } +] +``` + +Constraints: +- ≥ 2 positions per contradiction, drawn from ≥ 2 distinct `chunk_id`s. +- `stance` is optional free-form ("asserts" / "denies" / "dates-as-A" / + "dates-as-B" / etc.); useful for the case-writer but not required. +- `notes` may be empty; if present, keep it tight. +- Emit AT MOST 3 contradictions per call — the strongest you can find. + +If the corpus contains no genuine contradiction relative to the topic, +emit the literal single word `NO_CONTRADICTIONS` and stop. diff --git a/investigator-runtime/src/detectives/dupin.ts b/investigator-runtime/src/detectives/dupin.ts new file mode 100644 index 0000000..6c28ea0 --- /dev/null +++ b/investigator-runtime/src/detectives/dupin.ts @@ -0,0 +1,185 @@ +/** + * dupin.ts — contradiction-scan detective. + * + * Workflow: + * 1. The runtime grounds Dupin with a small corpus shortlist via + * hybridSearch. Default k=18 — Dupin needs MORE chunks than Holmes + * because contradictions emerge from comparing dispersed claims. + * 2. Claude Sonnet 4.6 reads the topic + chunks, emits a JSON array of + * 0-3 contradictions with topic/notes/positions[]. + * 3. The runtime parses + calls writeContradiction() per item. + */ +import { readFile } from "node:fs/promises"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import { audit } from "../lib/audit"; +import { callClaude } from "../lib/claude"; +import { env } from "../lib/env"; +import { hybridSearch, type SearchHit } from "../lib/search"; +import { + writeContradiction, + type WriteContradictionArgs, + type ContradictionPosition, +} from "../tools/write_contradiction"; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "dupin.md"); + +export interface DupinTask { + job_id: string; + topic: string; + doc_id?: string; + lang?: "pt" | "en"; + context_chunks?: number; + budget_cap_usd?: number; +} + +function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string { + const blocks = hits.map((h) => { + const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || ""; + const pageStr = String(h.page).padStart(3, "0"); + return [ + `--- ${h.doc_id}/p${pageStr}#${h.chunk_id} ---`, + `type: ${h.type}`, + h.classification ? `classification: ${h.classification}` : null, + "", + text.slice(0, 1100), + ].filter(Boolean).join("\n"); + }); + return blocks.join("\n\n"); +} + +function buildPrompt(task: DupinTask, hits: SearchHit[], lang: "pt" | "en"): string { + return [ + `# Topic to scan for contradictions`, + "", + task.topic, + "", + `## Corpus shortlist (${hits.length} chunks${task.doc_id ? `, scoped to ${task.doc_id}` : ""})`, + "", + renderChunkBlock(hits, lang), + "", + "## Your task", + "", + "Inspect the chunks for pairs (or small groups) that cannot both be true.", + "Emit at most 3 contradictions. Each must cite ≥ 2 distinct chunk_ids.", + "Emit the JSON array exactly as specified by the system prompt — no prose,", + "no code fence, no preamble. If no genuine contradiction exists,", + "emit the literal word `NO_CONTRADICTIONS`.", + ].join("\n"); +} + +function extractJsonArray(text: string): unknown[] | null { + const t = text.trim(); + if (t === "NO_CONTRADICTIONS") return null; + const stripped = t.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, ""); + const first = stripped.indexOf("["); + const last = stripped.lastIndexOf("]"); + if (first === -1 || last === -1) { + throw new Error(`dupin returned no JSON array: ${t.slice(0, 200)}`); + } + const parsed = JSON.parse(stripped.slice(first, last + 1)); + if (!Array.isArray(parsed)) throw new Error("dupin JSON is not an array"); + return parsed; +} + +function coercePositions(raw: unknown): ContradictionPosition[] { + if (!Array.isArray(raw)) return []; + const out: ContradictionPosition[] = []; + for (const p of raw) { + if (!p || typeof p !== "object") continue; + const o = p as Record; + const doc_id = typeof o.doc_id === "string" ? o.doc_id.trim() : ""; + const chunk_id = typeof o.chunk_id === "string" ? o.chunk_id.trim() : ""; + const statement = typeof o.statement === "string" ? o.statement.trim() : ""; + if (!doc_id || !chunk_id || !statement) continue; + out.push({ + doc_id, chunk_id, statement, + stance: typeof o.stance === "string" ? o.stance.trim() : undefined, + }); + } + return out; +} + +export async function runDupin(task: DupinTask): Promise< + | { contradictions: Array<{ contradiction_id: string; case_file: string }> } + | { skipped: true; reason: string } +> { + const lang: "pt" | "en" = task.lang ?? "pt"; + const k = task.context_chunks ?? 18; + + const hits = await hybridSearch({ + query: task.topic, + lang, + doc_id: task.doc_id ?? null, + top_k: k, + recall_k: 80, + }); + await audit({ + event: "dupin_grounded", + job_id: task.job_id, + detective: "dupin@detective", + topic: task.topic, + n_chunks: hits.length, + doc_id: task.doc_id ?? null, + }); + if (hits.length < 2) { + return { skipped: true, reason: "insufficient_corpus" }; + } + + const systemPrompt = await readFile(PROMPT_PATH, "utf-8"); + const prompt = buildPrompt(task, hits, lang); + const llm = await callClaude({ + prompt, + systemPrompt, + model: env.CLAUDE_MODEL, + allowedTools: [], + timeoutMs: env.JOB_TIMEOUT_SECONDS * 1000, + budgetCapUsd: task.budget_cap_usd ?? env.BUDGET_CAP_USD_PER_JOB, + }); + await audit({ + event: "detective_completed", + job_id: task.job_id, + detective: "dupin@detective", + cost_usd: llm.costUsd, + tokens_in: llm.tokensIn, + tokens_out: llm.tokensOut, + duration_ms: llm.durationMs, + }); + console.error(`[dupin] response (${llm.text.length} chars): ${llm.text.slice(0, 800)}`); + + const arr = extractJsonArray(llm.text); + if (arr === null) return { skipped: true, reason: "NO_CONTRADICTIONS" }; + + const out: Array<{ contradiction_id: string; case_file: string }> = []; + for (const raw of arr.slice(0, 3)) { + if (!raw || typeof raw !== "object") continue; + const o = raw as Record; + const topic = typeof o.topic === "string" ? o.topic.trim() : ""; + const positions = coercePositions(o.positions); + if (!topic || positions.length < 2) continue; + const args: WriteContradictionArgs = { + topic, + positions, + notes: typeof o.notes === "string" ? o.notes.trim() : undefined, + resolution_status: o.resolution_status === "resolved" + ? "resolved" + : o.resolution_status === "irreconcilable" + ? "irreconcilable" + : "open", + }; + try { + const r = await writeContradiction(args, { job_id: task.job_id, detective: "dupin@detective" }); + out.push(r); + } catch (e) { + await audit({ + event: "write_contradiction_failed", + job_id: task.job_id, + detective: "dupin@detective", + error: (e as Error).message, + topic: args.topic.slice(0, 200), + }); + } + } + return { contradictions: out }; +} diff --git a/investigator-runtime/src/orchestrator.ts b/investigator-runtime/src/orchestrator.ts index 8659981..6dd41da 100644 --- a/investigator-runtime/src/orchestrator.ts +++ b/investigator-runtime/src/orchestrator.ts @@ -9,6 +9,7 @@ import { audit } from "./lib/audit"; import { query } from "./lib/pg"; import { runLocard, type LocardTask } from "./detectives/locard"; import { runHolmes, type HolmesTask } from "./detectives/holmes"; +import { runDupin, type DupinTask } from "./detectives/dupin"; export interface InvestigationJob { job_id: string; @@ -67,6 +68,25 @@ export async function dispatch(job: InvestigationJob, workerId: string): Promise } break; } + case "contradiction_scan": { + // Payload: { topic, doc_id?, lang?, context_chunks? } + const topic = String(job.payload.topic ?? "").trim(); + if (!topic) throw new Error("contradiction_scan requires payload.topic"); + const task: DupinTask = { + job_id: job.job_id, + topic, + doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined, + lang: job.payload.lang === "en" ? "en" : "pt", + context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined, + }; + const r = await runDupin(task); + if ("skipped" in r) { + outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason }); + } else { + for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c }); + } + break; + } default: throw new Error(`unknown_kind: ${job.kind}`); diff --git a/investigator-runtime/src/tools/write_contradiction.ts b/investigator-runtime/src/tools/write_contradiction.ts new file mode 100644 index 0000000..56ba7a6 --- /dev/null +++ b/investigator-runtime/src/tools/write_contradiction.ts @@ -0,0 +1,186 @@ +/** + * write_contradiction.ts — Dupin's primary writer. + * + * Inserts a row into public.contradictions and renders + * case/contradictions/R-NNNN.md. + * + * Validates: + * - topic + at least 2 positions + * - each position has a chunk_pk (resolved from doc_id + chunk_id via the DB) + * - position.statement is non-empty + * - notes ≤ 4000 chars + * + * Naming uses the R-NNNN slot from contradiction_id_seq. Note: this CLAUDE.md + * historically reserved R-NNNN for `relation` artefacts (chief-detective); + * contradictions are the same conceptual class (a connection between two + * pieces of evidence in tension), so they share the slot. + */ +import { mkdir, writeFile } from "node:fs/promises"; +import path from "node:path"; +import { audit } from "../lib/audit"; +import { env } from "../lib/env"; +import { allocate } from "../lib/ids"; +import { query, queryOne } from "../lib/pg"; + +export interface ContradictionPosition { + doc_id: string; + chunk_id: string; + /** The verbatim or paraphrased claim that puts this chunk on this side. */ + statement: string; + /** Optional weight or stance label (e.g. "asserts", "denies"). */ + stance?: string; +} + +export interface WriteContradictionArgs { + topic: string; + positions: ContradictionPosition[]; + notes?: string; + resolution_status?: "open" | "resolved" | "irreconcilable"; +} + +export interface WriteContradictionContext { + job_id: string; + detective: string; +} + +interface ResolvedPosition extends ContradictionPosition { + chunk_pk: number; + page: number; +} + +async function resolveChunk(doc_id: string, chunk_id: string): Promise<{ chunk_pk: number; page: number } | null> { + const row = await queryOne<{ chunk_pk: number; page: number }>( + `SELECT chunk_pk, page FROM public.chunks WHERE doc_id = $1 AND chunk_id = $2`, + [doc_id, chunk_id], + ); + return row ?? null; +} + +function renderMd( + id: string, + body: WriteContradictionArgs, + positions: ResolvedPosition[], + ctx: WriteContradictionContext, +): string { + const fm = [ + "---", + `schema_version: "0.1.0"`, + `type: contradiction`, + `contradiction_id: ${id}`, + `topic: ${JSON.stringify(body.topic)}`, + `resolution_status: ${body.resolution_status ?? "open"}`, + `detected_by: ${ctx.detective}`, + `job_id: ${ctx.job_id}`, + `created_at: ${new Date().toISOString()}`, + "---", + ].join("\n"); + + const positionBlocks = positions.map((p, i) => { + const pageStr = String(p.page).padStart(3, "0"); + return [ + `### Position ${i + 1}${p.stance ? ` — ${p.stance}` : ""}`, + "", + `> ${p.statement}`, + "", + `Source: [[${p.doc_id}/p${pageStr}#${p.chunk_id}]]`, + ].join("\n"); + }); + + return [ + fm, + "", + `# Contradiction ${id}`, + "", + `**Topic.** ${body.topic}`, + "", + "## Positions in tension", + "", + positionBlocks.join("\n\n"), + "", + "## Notes", + "", + body.notes || "_(no commentary recorded)_", + "", + ].join("\n"); +} + +export async function writeContradiction( + body: WriteContradictionArgs, + ctx: WriteContradictionContext, +): Promise<{ contradiction_id: string; case_file: string }> { + if (!body.topic?.trim()) throw new Error("topic required"); + if (!Array.isArray(body.positions) || body.positions.length < 2) { + throw new Error("at least 2 positions required"); + } + if (body.notes && body.notes.length > 4000) { + throw new Error(`notes too long (${body.notes.length} > 4000)`); + } + + const resolved: ResolvedPosition[] = []; + for (const p of body.positions) { + if (!p?.doc_id?.trim() || !p?.chunk_id?.trim()) { + throw new Error("position requires doc_id + chunk_id"); + } + if (!p?.statement?.trim()) { + throw new Error(`position ${p.doc_id}/${p.chunk_id} missing statement`); + } + const chunk = await resolveChunk(p.doc_id, p.chunk_id); + if (!chunk) { + throw new Error(`chunk ${p.doc_id}/${p.chunk_id} not found`); + } + resolved.push({ + ...p, + statement: p.statement.trim(), + chunk_pk: chunk.chunk_pk, + page: chunk.page, + }); + } + + // Reject pairs that point at the same chunk on every side — that's not a + // contradiction, that's a single statement. + const uniqueChunks = new Set(resolved.map((p) => p.chunk_pk)); + if (uniqueChunks.size < 2) { + throw new Error("contradiction requires positions from at least 2 distinct chunks"); + } + + const contradiction_id = await allocate.contradictionId(); + const chunkPayload = resolved.map((p) => ({ + chunk_pk: p.chunk_pk, + doc_id: p.doc_id, + chunk_id: p.chunk_id, + page: p.page, + statement: p.statement, + stance: p.stance ?? null, + })); + + await query( + `INSERT INTO public.contradictions + (contradiction_id, topic, chunks, detected_by, resolution_status, notes) + VALUES ($1, $2, $3::jsonb, $4, $5, $6)`, + [ + contradiction_id, + body.topic.trim(), + JSON.stringify(chunkPayload), + ctx.detective, + body.resolution_status ?? "open", + body.notes ?? null, + ], + ); + + const dir = path.join(env.CASE_ROOT, "contradictions"); + await mkdir(dir, { recursive: true }); + const file = path.join(dir, `${contradiction_id}.md`); + await writeFile(file, renderMd(contradiction_id, body, resolved, ctx), "utf-8"); + + await audit({ + event: "write_contradiction", + job_id: ctx.job_id, + detective: ctx.detective, + contradiction_id, + n_positions: resolved.length, + distinct_chunks: uniqueChunks.size, + file, + }); + + return { contradiction_id, case_file: file }; +} diff --git a/web/app/api/jobs/[id]/route.ts b/web/app/api/jobs/[id]/route.ts index b61e197..c6092c5 100644 --- a/web/app/api/jobs/[id]/route.ts +++ b/web/app/api/jobs/[id]/route.ts @@ -56,6 +56,15 @@ interface HypothesisRow { evidence_refs: unknown; } +interface ContradictionRow { + contradiction_id: string; + topic: string; + chunks: unknown; + resolution_status: string | null; + notes: string | null; + detected_by: string | null; +} + function durationMs(started: string | null, finished: string | null, created: string): number | null { const a = started ? new Date(started).getTime() : null; const b = finished ? new Date(finished).getTime() : null; @@ -85,14 +94,16 @@ export async function GET( // Collect IDs surfaced in outputs[] for hydration. const evidenceIds: string[] = []; const hypothesisIds: string[] = []; + const contradictionIds: string[] = []; if (Array.isArray(job.outputs)) { for (const o of job.outputs as Array>) { if (typeof o.evidence_id === "string") evidenceIds.push(o.evidence_id); if (typeof o.hypothesis_id === "string") hypothesisIds.push(o.hypothesis_id); + if (typeof o.contradiction_id === "string") contradictionIds.push(o.contradiction_id); } } - const [evidence, hypotheses] = await Promise.all([ + const [evidence, hypotheses, contradictions] = await Promise.all([ evidenceIds.length > 0 ? pgQuery( `SELECT e.evidence_id, e.grade, e.source_page_id, @@ -117,6 +128,15 @@ export async function GET( [hypothesisIds], ) : Promise.resolve([] as HypothesisRow[]), + contradictionIds.length > 0 + ? pgQuery( + `SELECT contradiction_id, topic, chunks, resolution_status, notes, detected_by + FROM public.contradictions + WHERE contradiction_id = ANY($1::text[]) + ORDER BY contradiction_id`, + [contradictionIds], + ) + : Promise.resolve([] as ContradictionRow[]), ]); return NextResponse.json({ @@ -133,6 +153,7 @@ export async function GET( outputs: Array.isArray(job.outputs) ? job.outputs : [], evidence, hypotheses, + contradictions, }); } catch (e) { return NextResponse.json({ error: "db_unavailable", message: (e as Error).message }, { status: 503 }); diff --git a/web/app/jobs/[id]/page.tsx b/web/app/jobs/[id]/page.tsx index 23d84fc..23ce599 100644 --- a/web/app/jobs/[id]/page.tsx +++ b/web/app/jobs/[id]/page.tsx @@ -51,13 +51,29 @@ export default async function JobPage({ const job = rows[0]; if (!job) notFound(); - const isHolmes = job.kind === "hypothesis_tournament"; - const detectiveName = isHolmes ? "Sherlock Holmes" : "Edmond Locard"; - const detectiveSlug = isHolmes ? "holmes" : "locard"; - const detectiveTone = isHolmes ? "text-[#7fdbff]" : "text-[#06d6a0]"; - const detectiveBg = isHolmes ? "from-[rgba(127,219,255,0.08)]" : "from-[rgba(6,214,160,0.08)]"; - const question = (job.payload as Record)?.question as string | undefined; - const docId = (job.payload as Record)?.doc_id as string | undefined; + const detective = job.kind === "hypothesis_tournament" ? "holmes" + : job.kind === "contradiction_scan" ? "dupin" + : "locard"; + const detectiveName = + detective === "holmes" ? "Sherlock Holmes" : + detective === "dupin" ? "C. Auguste Dupin" : + "Edmond Locard"; + const detectiveSubtitle = + detective === "holmes" ? "Hypothesis tournament · rival hypotheses with Bayesian update" : + detective === "dupin" ? "Contradiction scan · pairs of chunks in irreconcilable tension" : + "Evidence chain · verbatim quotes with chain of custody (Locard)"; + const detectiveTone = + detective === "holmes" ? "text-[#7fdbff]" : + detective === "dupin" ? "text-[#ff8a4d]" : + "text-[#06d6a0]"; + const detectiveBg = + detective === "holmes" ? "from-[rgba(127,219,255,0.08)]" : + detective === "dupin" ? "from-[rgba(255,138,77,0.08)]" : + "from-[rgba(6,214,160,0.08)]"; + const payload = (job.payload ?? {}) as Record; + const question = (payload.question ?? payload.topic) as string | undefined; + const questionLabel = job.kind === "contradiction_scan" ? "Topic" : "Question"; + const docId = payload.doc_id as string | undefined; return (
@@ -77,20 +93,16 @@ export default async function JobPage({

{detectiveName}

-

- {isHolmes - ? "Hypothesis tournament · rival hypotheses with Bayesian update" - : "Evidence chain · verbatim quotes with chain of custody (Locard)"} -

+

{detectiveSubtitle}

- {detectiveSlug} + {detective} {question && (
-
Question
+
{questionLabel}
{question}
)} diff --git a/web/components/chat-bubble.tsx b/web/components/chat-bubble.tsx index 564c7a1..cc875de 100644 --- a/web/components/chat-bubble.tsx +++ b/web/components/chat-bubble.tsx @@ -684,13 +684,19 @@ function ToolTrace({ t }: { t: ToolBlock }) { ); } - const isHolmes = r.kind === "hypothesis_tournament"; - const tone = isHolmes ? "text-[#7fdbff] border-[#7fdbff]" : "text-[#06d6a0] border-[#06d6a0]"; + const detective = r.detective ?? ( + r.kind === "hypothesis_tournament" ? "holmes" : + r.kind === "contradiction_scan" ? "dupin" : "locard" + ); + const tone = + detective === "holmes" ? { text: "text-[#7fdbff]", border: "border-[#7fdbff]", label: "Holmes" } : + detective === "dupin" ? { text: "text-[#ff8a4d]", border: "border-[#ff8a4d]", label: "Dupin" } : + { text: "text-[#06d6a0]", border: "border-[#06d6a0]", label: "Locard" }; return ( -
+
-
- 🔎 {isHolmes ? "Holmes" : "Locard"} · {r.kind} +
+ 🔎 {tone.label} · {r.kind}
{r.status}
@@ -702,7 +708,7 @@ function ToolTrace({ t }: { t: ToolBlock }) { acompanhar a investigação diff --git a/web/components/job-status-poller.tsx b/web/components/job-status-poller.tsx index 4cf41d2..57270d0 100644 --- a/web/components/job-status-poller.tsx +++ b/web/components/job-status-poller.tsx @@ -61,9 +61,27 @@ interface HypothesisItem { status: string | null; } +interface ContradictionPositionItem { + doc_id: string; + chunk_id: string; + page: number; + statement: string; + stance?: string | null; +} + +interface ContradictionItem { + contradiction_id: string; + topic: string; + chunks: ContradictionPositionItem[]; + resolution_status: string | null; + notes: string | null; + detected_by: string | null; +} + interface FetchedJob extends InitialJob { evidence: EvidenceItem[]; hypotheses: HypothesisItem[]; + contradictions: ContradictionItem[]; duration_ms: number | null; } @@ -111,6 +129,7 @@ export function JobStatusPoller(props: { jobId: string; initialJob: InitialJob } ...props.initialJob, evidence: [], hypotheses: [], + contradictions: [], duration_ms: null, }); const [error, setError] = useState(null); @@ -239,6 +258,16 @@ export function JobStatusPoller(props: { jobId: string; initialJob: InitialJob }
)} + {/* Contradiction cards */} + {job.contradictions.length > 0 && ( +
+
+ Contradições detectadas ({job.contradictions.length}) +
+ {job.contradictions.map((c) => )} +
+ )} + {/* Evidence cards */} {job.evidence.length > 0 && (
@@ -250,13 +279,14 @@ export function JobStatusPoller(props: { jobId: string; initialJob: InitialJob } )} {/* Empty / in-flight state */} - {!isTerminal(job.status) && job.hypotheses.length === 0 && job.evidence.length === 0 && ( + {!isTerminal(job.status) && job.hypotheses.length === 0 && job.evidence.length === 0 && job.contradictions.length === 0 && (
🔎 Os detetives estão lendo o corpus…
Holmes constrói hipóteses rivais com priors + posteriors em ~60 s.
+ Dupin localiza pares de chunks em tensão irreconciliável em ~60 s.
Locard documenta evidências verbatim com cadeia de custódia em ~30 s por chunk.
@@ -405,6 +435,57 @@ function EvidenceCard({ e }: { e: EvidenceItem }) { ); } +function ContradictionCard({ c }: { c: ContradictionItem }) { + const statusTone = + c.resolution_status === "resolved" ? "text-[#06d6a0] border-[#06d6a0]" : + c.resolution_status === "irreconcilable" ? "text-[#ff3344] border-[#ff3344]" : + "text-[#ff8a4d] border-[#ff8a4d]"; + return ( +
+
+
{c.contradiction_id}
+ {c.resolution_status && ( + + {c.resolution_status} + + )} +
+
+ {c.topic} +
+ +
+ {c.chunks.map((p, i) => { + const pageStr = String(p.page).padStart(3, "0"); + return ( +
+
+ Position {i + 1}{p.stance ? ` — ${p.stance}` : ""} +
+
+ “{p.statement}” +
+ + {p.doc_id}/p{pageStr}#{p.chunk_id} + +
+ ); + })} +
+ + {c.notes && ( +
+
Notes
+
{c.notes}
+
+ )} +
+ ); +} + function ProbabilityBar({ label, value, color }: { label: string; value: number | null; color: string }) { const pct = value !== null ? Math.round(value * 100) : 0; return ( diff --git a/web/lib/chat/tools.ts b/web/lib/chat/tools.ts index e86614d..42b1542 100644 --- a/web/lib/chat/tools.ts +++ b/web/lib/chat/tools.ts @@ -356,13 +356,14 @@ const request_investigation_tool: ToolDefinition = { function: { name: "request_investigation", description: - "Queue a deeper investigation by the 8-detective Investigation Bureau. " + + "Queue a deeper investigation by the Investigation Bureau detectives. " + "Use ONLY when the user asks for analysis that requires structured reasoning " + "across multiple chunks — e.g. 'build rival hypotheses about X', " + - "'audit this doc for contradictions', 'trace the chain of custody for claim Y'. " + + "'find contradictions about Y', 'trace the chain of custody for claim Z'. " + "Do NOT use for plain lookups; hybrid_search is faster. " + "kinds: hypothesis_tournament (Holmes — 2-3 rival hypotheses with priors/posteriors) | " + - "evidence_chain (Locard — verbatim evidence with chain_of_custody on N chunks of one doc). " + + "evidence_chain (Locard — verbatim evidence with chain_of_custody on N chunks of one doc) | " + + "contradiction_scan (Dupin — pairs of chunks in irreconcilable tension on a topic). " + "Returns { job_id, kind, status_url, eta_seconds }. The UI renders a status card " + "with a link to /jobs/; the worker takes ~30-120 seconds.", parameters: { @@ -370,20 +371,27 @@ const request_investigation_tool: ToolDefinition = { properties: { kind: { type: "string", - enum: ["hypothesis_tournament", "evidence_chain"], + enum: ["hypothesis_tournament", "evidence_chain", "contradiction_scan"], description: "Detective task kind.", }, question: { type: "string", description: "For hypothesis_tournament: the investigative question (one sentence, declarative). " + - "Required for hypothesis_tournament; ignored for evidence_chain.", + "Required for hypothesis_tournament; ignored for the other kinds.", + }, + topic: { + type: "string", + description: + "For contradiction_scan: short noun-phrase naming the disputed point " + + "(e.g. 'date of the Roswell wreckage recovery'). Required for contradiction_scan; " + + "ignored for other kinds.", }, doc_id: { type: "string", description: - "Optional scope. For hypothesis_tournament: narrows the corpus shortlist. " + - "For evidence_chain: REQUIRED — the doc Locard scans.", + "Optional scope. hypothesis_tournament / contradiction_scan: narrows the corpus " + + "shortlist. evidence_chain: REQUIRED — the doc Locard scans.", }, chunks: { type: "array", @@ -778,8 +786,8 @@ async function handleRequestInvestigation( ctx: ToolHandlerContext, ): Promise { const kind = String(args.kind ?? "").trim(); - if (kind !== "hypothesis_tournament" && kind !== "evidence_chain") { - return { error: "bad_kind", message: "kind must be hypothesis_tournament or evidence_chain" }; + if (kind !== "hypothesis_tournament" && kind !== "evidence_chain" && kind !== "contradiction_scan") { + return { error: "bad_kind", message: "kind must be hypothesis_tournament, evidence_chain or contradiction_scan" }; } const docArg = typeof args.doc_id === "string" && args.doc_id.trim() ? args.doc_id.trim() : ctx.doc_id || null; @@ -792,6 +800,12 @@ async function handleRequestInvestigation( payload.question = question; payload.lang = lang; if (docArg) payload.doc_id = docArg; + } else if (kind === "contradiction_scan") { + const topic = String(args.topic ?? "").trim(); + if (!topic) return { error: "topic_required", message: "contradiction_scan needs a topic" }; + payload.topic = topic; + payload.lang = lang; + if (docArg) payload.doc_id = docArg; } else { if (!docArg) return { error: "doc_id_required", message: "evidence_chain needs a doc_id" }; payload.doc_id = docArg; @@ -802,8 +816,8 @@ async function handleRequestInvestigation( } const triggered_by = ctx.user_email ? `user:${ctx.user_email}` : "user:anonymous"; - // Investigation Bureau expected duration: Holmes ~60s, Locard ~30s × n_chunks. - const eta = kind === "hypothesis_tournament" ? 60 : 30 * 5; + // Investigation Bureau expected duration: Holmes ~60s, Dupin ~60s, Locard ~30s × n_chunks. + const eta = kind === "evidence_chain" ? 30 * 5 : 60; try { const rows = await pgQuery<{ job_id: string; created_at: string }>( @@ -821,7 +835,9 @@ async function handleRequestInvestigation( eta_seconds: eta, status_url: `/jobs/${row.job_id}`, payload_summary: payload, - detective: kind === "hypothesis_tournament" ? "holmes" : "locard", + detective: kind === "hypothesis_tournament" ? "holmes" + : kind === "contradiction_scan" ? "dupin" + : "locard", }; } catch (e) { return { error: "db_unavailable", message: (e as Error).message };