/** * holmes.ts — hypothesis tournament detective. * * Workflow (matches agentic-layer-spec sec 7): * 1. The runtime grounds Holmes with a small corpus shortlist via * hybridSearch — Holmes never gets the whole DB, just the relevant 8-15 * chunks. * 2. Claude Sonnet 4.6 reads the question + chunks, emits a JSON array of * 2-3 rival hypotheses with priors/posteriors/citations. * 3. The runtime parses the array and calls writeHypothesis() for each. * The writer enforces posterior bounds + Tetlock band + FK to evidence. * * Holmes does NOT get tool calls. All grounding is pre-fed; all writes are * applied by the runtime after validation (sa-security gate #2). */ import { readFile } from "node:fs/promises"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { audit } from "../lib/audit"; import { callClaude } from "../lib/claude"; import { env } from "../lib/env"; import { hybridSearch, type SearchHit } from "../lib/search"; import { writeHypothesis, type WriteHypothesisArgs } from "../tools/write_hypothesis"; const HERE = path.dirname(fileURLToPath(import.meta.url)); const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "holmes.md"); export interface HolmesTask { job_id: string; question: string; /** Optional scope narrowing — restrict the search to one doc / entity. */ doc_id?: string; lang?: "pt" | "en"; /** How many chunks to feed Holmes. Default 12. */ context_chunks?: number; budget_cap_usd?: number; } function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string { const blocks = hits.map((h, i) => { const text = (lang === "en" ? h.content_en : h.content_pt) || h.content_en || h.content_pt || ""; const pageStr = String(h.page).padStart(3, "0"); return [ `--- chunk ${i + 1} ---`, `id: [[${h.doc_id}/p${pageStr}#${h.chunk_id}]]`, `type: ${h.type}`, h.classification ? `classification: ${h.classification}` : null, "", text.slice(0, 1200), ].filter(Boolean).join("\n"); }); return blocks.join("\n\n"); } function buildPrompt(task: HolmesTask, hits: SearchHit[], lang: "pt" | "en"): string { const block = renderChunkBlock(hits, lang); return [ `# Question to investigate`, "", task.question, "", `## Corpus shortlist (${hits.length} chunks${task.doc_id ? `, scoped to ${task.doc_id}` : ""})`, "", block, "", "## Your task", "", "Build 2-3 rival hypotheses about the question above. Each must cite at", "least one chunk via [[doc-id/pNNN#cNNNN]] in argument_for and", "argument_against. Assign priors + posteriors summing roughly to 1.0.", "Emit the JSON array exactly as specified by the system prompt — no prose,", "no code fence, no preamble.", ].join("\n"); } function extractJsonArray(text: string): unknown[] | null { const t = text.trim(); if (t === "NO_HYPOTHESES") return null; const stripped = t.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?```\s*$/i, ""); const first = stripped.indexOf("["); const last = stripped.lastIndexOf("]"); if (first === -1 || last === -1) { throw new Error(`holmes returned no JSON array: ${t.slice(0, 200)}`); } const parsed = JSON.parse(stripped.slice(first, last + 1)); if (!Array.isArray(parsed)) throw new Error("holmes JSON is not an array"); return parsed; } export async function runHolmes(task: HolmesTask): Promise< | { hypotheses: Array<{ hypothesis_id: string; case_file: string }> } | { skipped: true; reason: string } > { const lang: "pt" | "en" = task.lang ?? "pt"; const k = task.context_chunks ?? 12; // 1. Ground with hybrid_search. const hits = await hybridSearch({ query: task.question, lang, doc_id: task.doc_id ?? null, top_k: k, recall_k: 60, }); await audit({ event: "holmes_grounded", job_id: task.job_id, detective: "holmes@detective", question: task.question, n_chunks: hits.length, doc_id: task.doc_id ?? null, }); if (hits.length === 0) { return { skipped: true, reason: "no_corpus_match" }; } // 2. Call Claude. const systemPrompt = await readFile(PROMPT_PATH, "utf-8"); const prompt = buildPrompt(task, hits, lang); const llm = await callClaude({ prompt, systemPrompt, model: env.CLAUDE_MODEL, allowedTools: [], timeoutMs: env.JOB_TIMEOUT_SECONDS * 1000, budgetCapUsd: task.budget_cap_usd ?? env.BUDGET_CAP_USD_PER_JOB, }); await audit({ event: "detective_completed", job_id: task.job_id, detective: "holmes@detective", cost_usd: llm.costUsd, tokens_in: llm.tokensIn, tokens_out: llm.tokensOut, duration_ms: llm.durationMs, }); console.error(`[holmes] response (${llm.text.length} chars): ${llm.text.slice(0, 800)}`); // 3. Parse + write. const arr = extractJsonArray(llm.text); if (arr === null) return { skipped: true, reason: "NO_HYPOTHESES" }; const out: Array<{ hypothesis_id: string; case_file: string }> = []; for (const raw of arr.slice(0, 3)) { const args: WriteHypothesisArgs = { question: task.question, position: String((raw as { position?: unknown }).position ?? "").trim(), argument_for: typeof (raw as { argument_for?: unknown }).argument_for === "string" ? (raw as { argument_for: string }).argument_for : undefined, argument_against: typeof (raw as { argument_against?: unknown }).argument_against === "string" ? (raw as { argument_against: string }).argument_against : undefined, prior: Number((raw as { prior?: unknown }).prior), posterior: Number((raw as { posterior?: unknown }).posterior), confidence_band: (raw as { confidence_band?: WriteHypothesisArgs["confidence_band"] }).confidence_band, evidence_refs: Array.isArray((raw as { evidence_refs?: unknown }).evidence_refs) ? (raw as { evidence_refs: Array<{ evidence_id?: string; supports?: boolean; weight?: number }> }).evidence_refs .filter((r): r is { evidence_id: string; supports?: boolean; weight?: number } => typeof r?.evidence_id === "string" && r.evidence_id.length > 0) : [], }; if (!args.position) continue; try { const r = await writeHypothesis(args, { job_id: task.job_id, detective: "holmes@detective" }); out.push(r); } catch (e) { await audit({ event: "write_hypothesis_failed", job_id: task.job_id, detective: "holmes@detective", error: (e as Error).message, position: args.position.slice(0, 200), }); } } return { hypotheses: out }; }