User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.
Migration 0007 (apply as supabase_admin):
- public.hypotheses +question_pt_br, +position_pt_br,
+argument_for_pt_br, +argument_against_pt_br
- public.contradictions +topic_pt_br, +notes_pt_br
- public.witnesses +access_to_event_pt_br, +bias_notes_pt_br,
+verdict_pt_br
- public.gaps +description_pt_br, +suggested_next_move_pt_br
- public.evidence: unchanged (verbatim_excerpt stays source-language)
- JSONB siblings inside contradictions.chunks + gaps.scope handled at
runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
why_surprising_pt_br, what_it_implies_pt_br).
Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
- Output protocol section names every EN field + its _pt_br sibling
- "Bilingual is mandatory" warning in the task instruction
- Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
- Schneier: parallel arrays — hidden_assumptions[i] matches
hidden_assumptions_pt_br[i], lengths must match
- Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body
Writer-side validation (all 7 tools):
- Reject INSERT if PT-BR sibling missing when EN field is set
- Persist both languages atomically in one INSERT (no half-updates)
- Markdown renderers write adjacent EN+PT-BR sections in case files
(## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)
Detective parse layer (all 7 detectives):
- Coerce both keys from JSON output
- "incomplete_bilingual_*" skip reason when either side missing
- Defensive: PT-BR fields trimmed + length-capped same as EN
Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.
Web (UI):
- /api/jobs/[id] hydrates _pt_br siblings from pg
- job-status-poller HypothesisCard: PT-BR primary, EN in <details>
fallback when both exist
- ContradictionCard: PT-BR statement primary + secondary EN quote
- WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
- GapCard: PT-BR title/why/implies primary
- /bureau hub: SELECTs both columns, renders PT-BR primary
- /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
fallback when both exist
- BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
primary
- DocBureauPanel /d/[doc]: same primary-PT-BR pattern
- New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
for future locale-driven switching when both languages are equally
full; current rule is PT-BR-first since the user is brasileiro)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
190 lines
7.1 KiB
TypeScript
190 lines
7.1 KiB
TypeScript
/**
|
|
* holmes.ts — hypothesis tournament detective.
|
|
*
|
|
* Workflow (matches agentic-layer-spec sec 7):
|
|
* 1. The runtime grounds Holmes with a small corpus shortlist via
|
|
* hybridSearch — Holmes never gets the whole DB, just the relevant 8-15
|
|
* chunks.
|
|
* 2. Claude Sonnet 4.6 reads the question + chunks, emits a JSON array of
|
|
* 2-3 rival hypotheses with priors/posteriors/citations.
|
|
* 3. The runtime parses the array and calls writeHypothesis() for each.
|
|
* The writer enforces posterior bounds + Tetlock band + FK to evidence.
|
|
*
|
|
* Holmes does NOT get tool calls. All grounding is pre-fed; all writes are
|
|
* applied by the runtime after validation (sa-security gate #2).
|
|
*/
|
|
import { readFile } from "node:fs/promises";
|
|
import path from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
import { audit } from "../lib/audit";
|
|
import { callClaude } from "../lib/claude";
|
|
import { env } from "../lib/env";
|
|
import { hybridSearch, type SearchHit } from "../lib/search";
|
|
import { writeHypothesis, type WriteHypothesisArgs } from "../tools/write_hypothesis";
|
|
|
|
const HERE = path.dirname(fileURLToPath(import.meta.url));
|
|
const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "holmes.md");
|
|
|
|
export interface HolmesTask {
|
|
job_id: string;
|
|
question: string;
|
|
/** Optional PT-BR mirror of the question. If omitted, the EN one is used
|
|
* for both sides until the model emits PT-BR output. */
|
|
question_pt_br?: string;
|
|
/** Optional scope narrowing — restrict the search to one doc / entity. */
|
|
doc_id?: string;
|
|
lang?: "pt" | "en";
|
|
/** How many chunks to feed Holmes. Default 12. */
|
|
context_chunks?: number;
|
|
budget_cap_usd?: number;
|
|
}
|
|
|
|
function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string {
|
|
const blocks = hits.map((h, i) => {
|
|
const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || "";
|
|
const pageStr = String(h.page).padStart(3, "0");
|
|
return [
|
|
`--- chunk ${i + 1} ---`,
|
|
`id: [[${h.doc_id}/p${pageStr}#${h.chunk_id}]]`,
|
|
`type: ${h.type}`,
|
|
h.classification ? `classification: ${h.classification}` : null,
|
|
"",
|
|
text.slice(0, 1200),
|
|
].filter(Boolean).join("\n");
|
|
});
|
|
return blocks.join("\n\n");
|
|
}
|
|
|
|
function buildPrompt(task: HolmesTask, hits: SearchHit[], lang: "pt" | "en"): string {
|
|
const block = renderChunkBlock(hits, lang);
|
|
const ptQ = task.question_pt_br?.trim();
|
|
return [
|
|
`# Question to investigate`,
|
|
"",
|
|
`**EN.** ${task.question}`,
|
|
ptQ ? `**PT-BR.** ${ptQ}` : null,
|
|
"",
|
|
`## Corpus shortlist (${hits.length} chunks${task.doc_id ? `, scoped to ${task.doc_id}` : ""})`,
|
|
"",
|
|
block,
|
|
"",
|
|
"## Your task",
|
|
"",
|
|
"Build 2-3 rival hypotheses about the question above. Each must cite at",
|
|
"least one chunk via [[doc-id/pNNN#cNNNN]] in both argument_for and",
|
|
"argument_against (EN) and in argument_for_pt_br and",
|
|
"argument_against_pt_br (PT-BR). Assign priors + posteriors summing",
|
|
"roughly to 1.0. Emit the JSON array exactly as specified by the system",
|
|
"prompt — no prose, no code fence, no preamble. **Bilingual is mandatory:",
|
|
"every narrative field appears in both EN and PT-BR.**",
|
|
].filter(Boolean).join("\n");
|
|
}
|
|
|
|
function extractJsonArray(text: string): unknown[] | null {
|
|
const t = text.trim();
|
|
if (/^`?NO_HYPOTHESES`?\b/i.test(t)) return null;
|
|
const stripped = t.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, "");
|
|
const first = stripped.indexOf("[");
|
|
const last = stripped.lastIndexOf("]");
|
|
if (first === -1 || last === -1) {
|
|
throw new Error(`holmes returned no JSON array: ${t.slice(0, 200)}`);
|
|
}
|
|
const parsed = JSON.parse(stripped.slice(first, last + 1));
|
|
if (!Array.isArray(parsed)) throw new Error("holmes JSON is not an array");
|
|
return parsed;
|
|
}
|
|
|
|
export async function runHolmes(task: HolmesTask): Promise<
|
|
| { hypotheses: Array<{ hypothesis_id: string; case_file: string }> }
|
|
| { skipped: true; reason: string }
|
|
> {
|
|
const lang: "pt" | "en" = task.lang ?? "pt";
|
|
const k = task.context_chunks ?? 12;
|
|
|
|
// 1. Ground with hybrid_search.
|
|
const hits = await hybridSearch({
|
|
query: task.question,
|
|
lang,
|
|
doc_id: task.doc_id ?? null,
|
|
top_k: k,
|
|
recall_k: 60,
|
|
});
|
|
await audit({
|
|
event: "holmes_grounded",
|
|
job_id: task.job_id,
|
|
detective: "holmes@detective",
|
|
question: task.question,
|
|
n_chunks: hits.length,
|
|
doc_id: task.doc_id ?? null,
|
|
});
|
|
if (hits.length === 0) {
|
|
return { skipped: true, reason: "no_corpus_match" };
|
|
}
|
|
|
|
// 2. Call Claude.
|
|
const systemPrompt = await readFile(PROMPT_PATH, "utf-8");
|
|
const prompt = buildPrompt(task, hits, lang);
|
|
const llm = await callClaude({
|
|
prompt,
|
|
systemPrompt,
|
|
model: env.CLAUDE_MODEL,
|
|
allowedTools: [],
|
|
timeoutMs: env.JOB_TIMEOUT_SECONDS * 1000,
|
|
budgetCapUsd: task.budget_cap_usd ?? env.BUDGET_CAP_USD_PER_JOB,
|
|
});
|
|
await audit({
|
|
event: "detective_completed",
|
|
job_id: task.job_id,
|
|
detective: "holmes@detective",
|
|
cost_usd: llm.costUsd,
|
|
tokens_in: llm.tokensIn,
|
|
tokens_out: llm.tokensOut,
|
|
duration_ms: llm.durationMs,
|
|
});
|
|
|
|
console.error(`[holmes] response (${llm.text.length} chars): ${llm.text.slice(0, 800)}`);
|
|
|
|
// 3. Parse + write.
|
|
const arr = extractJsonArray(llm.text);
|
|
if (arr === null) return { skipped: true, reason: "NO_HYPOTHESES" };
|
|
|
|
const out: Array<{ hypothesis_id: string; case_file: string }> = [];
|
|
for (const raw of arr.slice(0, 3)) {
|
|
const r = raw as Record<string, unknown>;
|
|
const strOrUndef = (k: string): string | undefined =>
|
|
typeof r[k] === "string" && (r[k] as string).trim().length > 0
|
|
? (r[k] as string).trim() : undefined;
|
|
const args: WriteHypothesisArgs = {
|
|
question: task.question,
|
|
question_pt_br: task.question_pt_br ?? task.question,
|
|
position: String(r.position ?? "").trim(),
|
|
position_pt_br: strOrUndef("position_pt_br"),
|
|
argument_for: strOrUndef("argument_for"),
|
|
argument_for_pt_br: strOrUndef("argument_for_pt_br"),
|
|
argument_against: strOrUndef("argument_against"),
|
|
argument_against_pt_br: strOrUndef("argument_against_pt_br"),
|
|
prior: Number(r.prior),
|
|
posterior: Number(r.posterior),
|
|
confidence_band: r.confidence_band as WriteHypothesisArgs["confidence_band"],
|
|
evidence_refs: Array.isArray(r.evidence_refs)
|
|
? (r.evidence_refs as Array<{ evidence_id?: string; supports?: boolean; weight?: number }>)
|
|
.filter((x): x is { evidence_id: string; supports?: boolean; weight?: number } =>
|
|
typeof x?.evidence_id === "string" && x.evidence_id.length > 0)
|
|
: [],
|
|
};
|
|
if (!args.position) continue;
|
|
try {
|
|
const r = await writeHypothesis(args, { job_id: task.job_id, detective: "holmes@detective" });
|
|
out.push(r);
|
|
} catch (e) {
|
|
await audit({
|
|
event: "write_hypothesis_failed",
|
|
job_id: task.job_id,
|
|
detective: "holmes@detective",
|
|
error: (e as Error).message,
|
|
position: args.position.slice(0, 200),
|
|
});
|
|
}
|
|
}
|
|
return { hypotheses: out };
|
|
}
|