User flagged that the bureau was emitting English-only output, violating
the project's bilingual rule. Every narrative field now ships in both
languages: stored in sibling DB columns + rendered as adjacent markdown
sections per CLAUDE.md §3.
Migration 0007 (apply as supabase_admin):
- public.hypotheses +question_pt_br, +position_pt_br,
+argument_for_pt_br, +argument_against_pt_br
- public.contradictions +topic_pt_br, +notes_pt_br
- public.witnesses +access_to_event_pt_br, +bias_notes_pt_br,
+verdict_pt_br
- public.gaps +description_pt_br, +suggested_next_move_pt_br
- public.evidence: unchanged (verbatim_excerpt stays source-language)
- JSONB siblings inside contradictions.chunks + gaps.scope handled at
runtime (statement_pt_br, title_pt_br, dominant_model_pt_br,
why_surprising_pt_br, what_it_implies_pt_br).
Detective prompts (all 7) rewritten with explicit bilingual JSON contract:
- Output protocol section names every EN field + its _pt_br sibling
- "Bilingual is mandatory" warning in the task instruction
- Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS,
INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS,
NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS)
- Schneier: parallel arrays — hidden_assumptions[i] matches
hidden_assumptions_pt_br[i], lengths must match
- Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body
Writer-side validation (all 7 tools):
- Reject INSERT if PT-BR sibling missing when EN field is set
- Persist both languages atomically in one INSERT (no half-updates)
- Markdown renderers write adjacent EN+PT-BR sections in case files
(## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.)
Detective parse layer (all 7 detectives):
- Coerce both keys from JSON output
- "incomplete_bilingual_*" skip reason when either side missing
- Defensive: PT-BR fields trimmed + length-capped same as EN
Orchestrator propagates question_pt_br + topic_pt_br through job payload
to runHolmes / runCaseWriter, mirroring the chat-tool entry point.
Web (UI):
- /api/jobs/[id] hydrates _pt_br siblings from pg
- job-status-poller HypothesisCard: PT-BR primary, EN in <details>
fallback when both exist
- ContradictionCard: PT-BR statement primary + secondary EN quote
- WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT
- GapCard: PT-BR title/why/implies primary
- /bureau hub: SELECTs both columns, renders PT-BR primary
- /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN
fallback when both exist
- BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br
primary
- DocBureauPanel /d/[doc]: same primary-PT-BR pattern
- New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept
for future locale-driven switching when both languages are equally
full; current rule is PT-BR-first since the user is brasileiro)
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
251 lines
10 KiB
TypeScript
251 lines
10 KiB
TypeScript
/**
|
||
* orchestrator.ts — chief-detective. Decides which detective runs for a job.
|
||
*
|
||
* For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
|
||
* registry as we build each detective in W3.5+. Unknown kinds fail the job
|
||
* loudly so we don't quietly drop work.
|
||
*/
|
||
import { audit } from "./lib/audit";
|
||
import { query } from "./lib/pg";
|
||
import { runLocard, type LocardTask } from "./detectives/locard";
|
||
import { runHolmes, type HolmesTask } from "./detectives/holmes";
|
||
import { runDupin, type DupinTask } from "./detectives/dupin";
|
||
import { runSchneier, type SchneierTask } from "./detectives/schneier";
|
||
import { runPoirot, type PoirotTask } from "./detectives/poirot";
|
||
import { runTaleb, type TalebTask } from "./detectives/taleb";
|
||
import { runTetlock, type TetlockTask } from "./detectives/tetlock";
|
||
import { runCaseWriter, type CaseWriterTask } from "./detectives/case_writer";
|
||
|
||
export interface InvestigationJob {
|
||
job_id: string;
|
||
kind: string;
|
||
payload: Record<string, unknown>;
|
||
triggered_by: string | null;
|
||
}
|
||
|
||
export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
|
||
await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });
|
||
|
||
let outputs: unknown[] = [];
|
||
try {
|
||
switch (job.kind) {
|
||
case "evidence_chain": {
|
||
// Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
|
||
// the first 20 substantive chunks of the doc if not provided.
|
||
const docId = String(job.payload.doc_id ?? "");
|
||
if (!docId) throw new Error("evidence_chain requires payload.doc_id");
|
||
const chunkIds = Array.isArray(job.payload.chunks)
|
||
? (job.payload.chunks as string[])
|
||
: await pickEvidenceCandidates(docId, 5);
|
||
if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
|
||
for (const chunk_id of chunkIds) {
|
||
const task: LocardTask = {
|
||
job_id: job.job_id,
|
||
doc_id: docId,
|
||
chunk_id,
|
||
claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
|
||
};
|
||
try {
|
||
const r = await runLocard(task);
|
||
outputs.push({ chunk_id, ...r });
|
||
} catch (e) {
|
||
outputs.push({ chunk_id, error: (e as Error).message });
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
case "hypothesis_tournament": {
|
||
// Payload: { question, question_pt_br?, doc_id?, lang?, context_chunks? }
|
||
const question = String(job.payload.question ?? "").trim();
|
||
if (!question) throw new Error("hypothesis_tournament requires payload.question");
|
||
const task: HolmesTask = {
|
||
job_id: job.job_id,
|
||
question,
|
||
question_pt_br: typeof job.payload.question_pt_br === "string"
|
||
? job.payload.question_pt_br.trim() : undefined,
|
||
doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
|
||
};
|
||
const r = await runHolmes(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
|
||
} else {
|
||
for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
|
||
}
|
||
break;
|
||
}
|
||
case "case_report": {
|
||
// Payload: { topic, topic_pt_br?, doc_id?, slug?, lang? }
|
||
const topic = String(job.payload.topic ?? "").trim();
|
||
if (!topic) throw new Error("case_report requires payload.topic");
|
||
const task: CaseWriterTask = {
|
||
job_id: job.job_id, topic,
|
||
topic_pt_br: typeof job.payload.topic_pt_br === "string"
|
||
? job.payload.topic_pt_br.trim() : undefined,
|
||
doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
|
||
slug: typeof job.payload.slug === "string" ? job.payload.slug : undefined,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
};
|
||
const r = await runCaseWriter(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "case_report", skipped: true, reason: r.reason });
|
||
} else {
|
||
outputs.push({ kind: "case_report", ...r });
|
||
}
|
||
break;
|
||
}
|
||
case "calibrate_hypothesis": {
|
||
// Payload: { hypothesis_id }
|
||
const hyp = String(job.payload.hypothesis_id ?? "").trim();
|
||
if (!hyp) throw new Error("calibrate_hypothesis requires payload.hypothesis_id");
|
||
const task: TetlockTask = {
|
||
job_id: job.job_id,
|
||
hypothesis_id: hyp,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
};
|
||
const r = await runTetlock(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "calibrate_hypothesis", skipped: true, reason: r.reason });
|
||
} else {
|
||
outputs.push({ kind: "calibration", ...r });
|
||
}
|
||
break;
|
||
}
|
||
case "outlier_scan": {
|
||
// Payload: { topic, doc_id?, lang?, context_chunks? }
|
||
const topic = String(job.payload.topic ?? "").trim();
|
||
if (!topic) throw new Error("outlier_scan requires payload.topic");
|
||
const task: TalebTask = {
|
||
job_id: job.job_id, topic,
|
||
doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
|
||
};
|
||
const r = await runTaleb(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "outlier_scan", skipped: true, reason: r.reason });
|
||
} else {
|
||
for (const o of r.outliers) outputs.push({ kind: "outlier", ...o });
|
||
}
|
||
break;
|
||
}
|
||
case "witness_analysis": {
|
||
// Payload: { person_id } OR { person_entity_pk }
|
||
const person_id = typeof job.payload.person_id === "string" ? job.payload.person_id.trim() : undefined;
|
||
const person_entity_pk = typeof job.payload.person_entity_pk === "number"
|
||
? job.payload.person_entity_pk : undefined;
|
||
if (!person_id && !person_entity_pk) {
|
||
throw new Error("witness_analysis requires payload.person_id or person_entity_pk");
|
||
}
|
||
const task: PoirotTask = {
|
||
job_id: job.job_id,
|
||
person_id,
|
||
person_entity_pk,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
|
||
};
|
||
const r = await runPoirot(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "witness_analysis", skipped: true, reason: r.reason });
|
||
} else {
|
||
outputs.push({ kind: "witness_analysis", ...r });
|
||
}
|
||
break;
|
||
}
|
||
case "red_team_review": {
|
||
// Payload: { hypothesis_id }
|
||
const hyp = String(job.payload.hypothesis_id ?? "").trim();
|
||
if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
|
||
const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
|
||
const r = await runSchneier(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
|
||
} else {
|
||
outputs.push({ kind: "red_team_review", ...r });
|
||
}
|
||
break;
|
||
}
|
||
case "contradiction_scan": {
|
||
// Payload: { topic, doc_id?, lang?, context_chunks? }
|
||
const topic = String(job.payload.topic ?? "").trim();
|
||
if (!topic) throw new Error("contradiction_scan requires payload.topic");
|
||
const task: DupinTask = {
|
||
job_id: job.job_id,
|
||
topic,
|
||
doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
|
||
lang: job.payload.lang === "en" ? "en" : "pt",
|
||
context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
|
||
};
|
||
const r = await runDupin(task);
|
||
if ("skipped" in r) {
|
||
outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
|
||
} else {
|
||
for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
|
||
}
|
||
break;
|
||
}
|
||
|
||
default:
|
||
throw new Error(`unknown_kind: ${job.kind}`);
|
||
}
|
||
|
||
// Status reflects reality: if every per-item attempt errored we mark
|
||
// the job failed (so the UI doesn't say "complete" when nothing useful
|
||
// was produced); if at least one succeeded we keep `complete` with the
|
||
// mixed outputs payload.
|
||
const allErrors = outputs.length > 0 && outputs.every(
|
||
(o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
|
||
);
|
||
const summary = (() => {
|
||
if (!allErrors) return null;
|
||
// First few error messages, surfaced to the user via the jobs table.
|
||
return outputs
|
||
.map((o) => (o as { error?: string }).error)
|
||
.filter((e): e is string => Boolean(e))
|
||
.slice(0, 3)
|
||
.join(" | ");
|
||
})();
|
||
await query(
|
||
`UPDATE public.investigation_jobs
|
||
SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
|
||
WHERE job_id = $4`,
|
||
[allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
|
||
);
|
||
await audit({
|
||
event: allErrors ? "job_failed_all_items" : "job_completed",
|
||
job_id: job.job_id,
|
||
kind: job.kind,
|
||
n_outputs: outputs.length,
|
||
...(summary ? { summary } : {}),
|
||
});
|
||
} catch (e) {
|
||
const err = (e as Error).message;
|
||
await query(
|
||
`UPDATE public.investigation_jobs
|
||
SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
|
||
WHERE job_id = $3`,
|
||
[err, JSON.stringify(outputs), job.job_id],
|
||
);
|
||
await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Pick a small set of chunks that are likely to yield evidence — body
|
||
* paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
|
||
* Sonnet's anomaly flag first so we extract the most interesting first.
|
||
*/
|
||
async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
|
||
const rows = await query<{ chunk_id: string }>(
|
||
`SELECT chunk_id
|
||
FROM public.chunks
|
||
WHERE doc_id = $1
|
||
AND is_searchable
|
||
AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
|
||
ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
|
||
LIMIT $2`,
|
||
[doc_id, limit],
|
||
);
|
||
return rows.map((r) => r.chunk_id);
|
||
}
|