/** * orchestrator.ts — chief-detective. Decides which detective runs for a job. * * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the * registry as we build each detective in W3.5+. Unknown kinds fail the job * loudly so we don't quietly drop work. */ import { audit } from "./lib/audit"; import { query } from "./lib/pg"; import { runLocard, type LocardTask } from "./detectives/locard"; import { runHolmes, type HolmesTask } from "./detectives/holmes"; import { runDupin, type DupinTask } from "./detectives/dupin"; export interface InvestigationJob { job_id: string; kind: string; payload: Record; triggered_by: string | null; } export async function dispatch(job: InvestigationJob, workerId: string): Promise { await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId }); let outputs: unknown[] = []; try { switch (job.kind) { case "evidence_chain": { // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning // the first 20 substantive chunks of the doc if not provided. const docId = String(job.payload.doc_id ?? ""); if (!docId) throw new Error("evidence_chain requires payload.doc_id"); const chunkIds = Array.isArray(job.payload.chunks) ? (job.payload.chunks as string[]) : await pickEvidenceCandidates(docId, 5); if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`); for (const chunk_id of chunkIds) { const task: LocardTask = { job_id: job.job_id, doc_id: docId, chunk_id, claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined, }; try { const r = await runLocard(task); outputs.push({ chunk_id, ...r }); } catch (e) { outputs.push({ chunk_id, error: (e as Error).message }); } } break; } case "hypothesis_tournament": { // Payload: { question, doc_id?, lang?, context_chunks? } const question = String(job.payload.question ?? "").trim(); if (!question) throw new Error("hypothesis_tournament requires payload.question"); const task: HolmesTask = { job_id: job.job_id, question, doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined, lang: job.payload.lang === "en" ? "en" : "pt", context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined, }; const r = await runHolmes(task); if ("skipped" in r) { outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason }); } else { for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h }); } break; } case "contradiction_scan": { // Payload: { topic, doc_id?, lang?, context_chunks? } const topic = String(job.payload.topic ?? "").trim(); if (!topic) throw new Error("contradiction_scan requires payload.topic"); const task: DupinTask = { job_id: job.job_id, topic, doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined, lang: job.payload.lang === "en" ? "en" : "pt", context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined, }; const r = await runDupin(task); if ("skipped" in r) { outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason }); } else { for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c }); } break; } default: throw new Error(`unknown_kind: ${job.kind}`); } // Status reflects reality: if every per-item attempt errored we mark // the job failed (so the UI doesn't say "complete" when nothing useful // was produced); if at least one succeeded we keep `complete` with the // mixed outputs payload. const allErrors = outputs.length > 0 && outputs.every( (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string", ); const summary = (() => { if (!allErrors) return null; // First few error messages, surfaced to the user via the jobs table. return outputs .map((o) => (o as { error?: string }).error) .filter((e): e is string => Boolean(e)) .slice(0, 3) .join(" | "); })(); await query( `UPDATE public.investigation_jobs SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3 WHERE job_id = $4`, [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id], ); await audit({ event: allErrors ? "job_failed_all_items" : "job_completed", job_id: job.job_id, kind: job.kind, n_outputs: outputs.length, ...(summary ? { summary } : {}), }); } catch (e) { const err = (e as Error).message; await query( `UPDATE public.investigation_jobs SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb WHERE job_id = $3`, [err, JSON.stringify(outputs), job.job_id], ); await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err }); } } /** * Pick a small set of chunks that are likely to yield evidence — body * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by * Sonnet's anomaly flag first so we extract the most interesting first. */ async function pickEvidenceCandidates(doc_id: string, limit: number): Promise { const rows = await query<{ chunk_id: string }>( `SELECT chunk_id FROM public.chunks WHERE doc_id = $1 AND is_searchable AND LENGTH(COALESCE(content_en, content_pt, '')) > 200 ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC LIMIT $2`, [doc_id, limit], ); return rows.map((r) => r.chunk_id); }