/** * holmes.ts — hypothesis tournament detective. * * Workflow (matches agentic-layer-spec sec 7): * 1. The runtime grounds Holmes with a small corpus shortlist via * hybridSearch — Holmes never gets the whole DB, just the relevant 8-15 * chunks. * 2. Claude Sonnet 4.6 reads the question + chunks, emits a JSON array of * 2-3 rival hypotheses with priors/posteriors/citations. * 3. The runtime parses the array and calls writeHypothesis() for each. * The writer enforces posterior bounds + Tetlock band + FK to evidence. * * Holmes does NOT get tool calls. All grounding is pre-fed; all writes are * applied by the runtime after validation (sa-security gate #2). */ import { readFile } from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { audit } from "../lib/audit"; import { callClaude } from "../lib/claude"; import { env } from "../lib/env"; import { hybridSearch, type SearchHit } from "../lib/search"; import { writeHypothesis, type WriteHypothesisArgs } from "../tools/write_hypothesis"; const HERE = path.dirname(fileURLToPath(import.meta.url)); const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "holmes.md"); export interface HolmesTask { job_id: string; question: string; /** Optional PT-BR mirror of the question. If omitted, the EN one is used * for both sides until the model emits PT-BR output. */ question_pt_br?: string; /** Optional scope narrowing — restrict the search to one doc / entity. */ doc_id?: string; lang?: "pt" | "en"; /** How many chunks to feed Holmes. Default 12. */ context_chunks?: number; budget_cap_usd?: number; } function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string { const blocks = hits.map((h, i) => { const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || ""; const pageStr = String(h.page).padStart(3, "0"); return [ `--- chunk ${i + 1} ---`, `id: [[${h.doc_id}/p${pageStr}#${h.chunk_id}]]`, `type: ${h.type}`, h.classification ? `classification: ${h.classification}` : null, "", text.slice(0, 1200), ].filter(Boolean).join("\n"); }); return blocks.join("\n\n"); } function buildPrompt(task: HolmesTask, hits: SearchHit[], lang: "pt" | "en"): string { const block = renderChunkBlock(hits, lang); const ptQ = task.question_pt_br?.trim(); return [ `# Question to investigate`, "", `**EN.** ${task.question}`, ptQ ? `**PT-BR.** ${ptQ}` : null, "", `## Corpus shortlist (${hits.length} chunks${task.doc_id ? `, scoped to ${task.doc_id}` : ""})`, "", block, "", "## Your task", "", "Build 2-3 rival hypotheses about the question above. Each must cite at", "least one chunk via [[doc-id/pNNN#cNNNN]] in both argument_for and", "argument_against (EN) and in argument_for_pt_br and", "argument_against_pt_br (PT-BR). Assign priors + posteriors summing", "roughly to 1.0. Emit the JSON array exactly as specified by the system", "prompt — no prose, no code fence, no preamble. **Bilingual is mandatory:", "every narrative field appears in both EN and PT-BR.**", ].filter(Boolean).join("\n"); } function extractJsonArray(text: string): unknown[] | null { const t = text.trim(); if (/^`?NO_HYPOTHESES`?\b/i.test(t)) return null; const stripped = t.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, ""); const first = stripped.indexOf("["); const last = stripped.lastIndexOf("]"); if (first === -1 || last === -1) { throw new Error(`holmes returned no JSON array: ${t.slice(0, 200)}`); } const parsed = JSON.parse(stripped.slice(first, last + 1)); if (!Array.isArray(parsed)) throw new Error("holmes JSON is not an array"); return parsed; } export async function runHolmes(task: HolmesTask): Promise< | { hypotheses: Array<{ hypothesis_id: string; case_file: string }> } | { skipped: true; reason: string } > { const lang: "pt" | "en" = task.lang ?? "pt"; const k = task.context_chunks ?? 12; // 1. Ground with hybrid_search. const hits = await hybridSearch({ query: task.question, lang, doc_id: task.doc_id ?? null, top_k: k, recall_k: 60, }); await audit({ event: "holmes_grounded", job_id: task.job_id, detective: "holmes@detective", question: task.question, n_chunks: hits.length, doc_id: task.doc_id ?? null, }); if (hits.length === 0) { return { skipped: true, reason: "no_corpus_match" }; } // 2. Call Claude. const systemPrompt = await readFile(PROMPT_PATH, "utf-8"); const prompt = buildPrompt(task, hits, lang); const llm = await callClaude({ prompt, systemPrompt, model: env.CLAUDE_MODEL, allowedTools: [], timeoutMs: env.JOB_TIMEOUT_SECONDS * 1000, budgetCapUsd: task.budget_cap_usd ?? env.BUDGET_CAP_USD_PER_JOB, }); await audit({ event: "detective_completed", job_id: task.job_id, detective: "holmes@detective", cost_usd: llm.costUsd, tokens_in: llm.tokensIn, tokens_out: llm.tokensOut, duration_ms: llm.durationMs, }); console.error(`[holmes] response (${llm.text.length} chars): ${llm.text.slice(0, 800)}`); // 3. Parse + write. const arr = extractJsonArray(llm.text); if (arr === null) return { skipped: true, reason: "NO_HYPOTHESES" }; const out: Array<{ hypothesis_id: string; case_file: string }> = []; for (const raw of arr.slice(0, 3)) { const r = raw as Record; const strOrUndef = (k: string): string | undefined => typeof r[k] === "string" && (r[k] as string).trim().length > 0 ? (r[k] as string).trim() : undefined; const args: WriteHypothesisArgs = { question: task.question, question_pt_br: task.question_pt_br ?? task.question, position: String(r.position ?? "").trim(), position_pt_br: strOrUndef("position_pt_br"), argument_for: strOrUndef("argument_for"), argument_for_pt_br: strOrUndef("argument_for_pt_br"), argument_against: strOrUndef("argument_against"), argument_against_pt_br: strOrUndef("argument_against_pt_br"), prior: Number(r.prior), posterior: Number(r.posterior), confidence_band: r.confidence_band as WriteHypothesisArgs["confidence_band"], evidence_refs: Array.isArray(r.evidence_refs) ? (r.evidence_refs as Array<{ evidence_id?: string; supports?: boolean; weight?: number }>) .filter((x): x is { evidence_id: string; supports?: boolean; weight?: number } => typeof x?.evidence_id === "string" && x.evidence_id.length > 0) : [], }; if (!args.position) continue; try { const r = await writeHypothesis(args, { job_id: task.job_id, detective: "holmes@detective" }); out.push(r); } catch (e) { await audit({ event: "write_hypothesis_failed", job_id: task.job_id, detective: "holmes@detective", error: (e as Error).message, position: args.position.slice(0, 200), }); } } return { hypotheses: out }; }