disclosure-bureau/investigator-runtime/src/orchestrator.ts

/**
 * orchestrator.ts — chief-detective. Decides which detective runs for a job.
 *
 * For W3.1–W3.4 we only know `evidence_chain` (Locard). Other kinds enter the
 * registry as we build each detective in W3.5+. Unknown kinds fail the job
 * loudly so we don't quietly drop work.
 */
import { audit } from "./lib/audit";
import { query } from "./lib/pg";
import { runLocard, type LocardTask } from "./detectives/locard";
import { runHolmes, type HolmesTask } from "./detectives/holmes";
import { runDupin, type DupinTask } from "./detectives/dupin";
import { runSchneier, type SchneierTask } from "./detectives/schneier";
import { runPoirot, type PoirotTask } from "./detectives/poirot";
import { runTaleb, type TalebTask } from "./detectives/taleb";
import { runTetlock, type TetlockTask } from "./detectives/tetlock";
import { runCaseWriter, type CaseWriterTask } from "./detectives/case_writer";

export interface InvestigationJob {
  job_id: string;
  kind: string;
  payload: Record<string, unknown>;
  triggered_by: string | null;
}

export async function dispatch(job: InvestigationJob, workerId: string): Promise<void> {
  await audit({ event: "job_claimed", job_id: job.job_id, kind: job.kind, worker_id: workerId });

  let outputs: unknown[] = [];
  try {
    switch (job.kind) {
      case "evidence_chain": {
        // Payload shape: { doc_id, chunks?: [chunk_ids] } — fall back to scanning
        // the first 20 substantive chunks of the doc if not provided.
        const docId = String(job.payload.doc_id ?? "");
        if (!docId) throw new Error("evidence_chain requires payload.doc_id");
        const chunkIds = Array.isArray(job.payload.chunks)
          ? (job.payload.chunks as string[])
          : await pickEvidenceCandidates(docId, 5);
        if (chunkIds.length === 0) throw new Error(`no candidate chunks in ${docId}`);
        for (const chunk_id of chunkIds) {
          const task: LocardTask = {
            job_id: job.job_id,
            doc_id: docId,
            chunk_id,
            claim: typeof job.payload.claim === "string" ? job.payload.claim : undefined,
          };
          try {
            const r = await runLocard(task);
            outputs.push({ chunk_id, ...r });
          } catch (e) {
            outputs.push({ chunk_id, error: (e as Error).message });
          }
        }
        break;
      }
      case "hypothesis_tournament": {
        // Payload: { question, question_pt_br?, doc_id?, lang?, context_chunks? }
        const question = String(job.payload.question ?? "").trim();
        if (!question) throw new Error("hypothesis_tournament requires payload.question");
        const task: HolmesTask = {
          job_id: job.job_id,
          question,
          question_pt_br: typeof job.payload.question_pt_br === "string"
            ? job.payload.question_pt_br.trim() : undefined,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runHolmes(task);
        if ("skipped" in r) {
          outputs.push({ kind: "hypothesis_tournament", skipped: true, reason: r.reason });
        } else {
          for (const h of r.hypotheses) outputs.push({ kind: "hypothesis", ...h });
        }
        break;
      }
      case "case_report": {
        // Payload: { topic, topic_pt_br?, doc_id?, slug?, lang? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("case_report requires payload.topic");
        const task: CaseWriterTask = {
          job_id: job.job_id, topic,
          topic_pt_br: typeof job.payload.topic_pt_br === "string"
            ? job.payload.topic_pt_br.trim() : undefined,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          slug: typeof job.payload.slug === "string" ? job.payload.slug : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
        };
        const r = await runCaseWriter(task);
        if ("skipped" in r) {
          outputs.push({ kind: "case_report", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "case_report", ...r });
        }
        break;
      }
      case "calibrate_hypothesis": {
        // Payload: { hypothesis_id }
        const hyp = String(job.payload.hypothesis_id ?? "").trim();
        if (!hyp) throw new Error("calibrate_hypothesis requires payload.hypothesis_id");
        const task: TetlockTask = {
          job_id: job.job_id,
          hypothesis_id: hyp,
          lang: job.payload.lang === "en" ? "en" : "pt",
        };
        const r = await runTetlock(task);
        if ("skipped" in r) {
          outputs.push({ kind: "calibrate_hypothesis", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "calibration", ...r });
        }
        break;
      }
      case "outlier_scan": {
        // Payload: { topic, doc_id?, lang?, context_chunks? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("outlier_scan requires payload.topic");
        const task: TalebTask = {
          job_id: job.job_id, topic,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runTaleb(task);
        if ("skipped" in r) {
          outputs.push({ kind: "outlier_scan", skipped: true, reason: r.reason });
        } else {
          for (const o of r.outliers) outputs.push({ kind: "outlier", ...o });
        }
        break;
      }
      case "witness_analysis": {
        // Payload: { person_id } OR { person_entity_pk }
        const person_id = typeof job.payload.person_id === "string" ? job.payload.person_id.trim() : undefined;
        const person_entity_pk = typeof job.payload.person_entity_pk === "number"
          ? job.payload.person_entity_pk : undefined;
        if (!person_id && !person_entity_pk) {
          throw new Error("witness_analysis requires payload.person_id or person_entity_pk");
        }
        const task: PoirotTask = {
          job_id: job.job_id,
          person_id,
          person_entity_pk,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runPoirot(task);
        if ("skipped" in r) {
          outputs.push({ kind: "witness_analysis", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "witness_analysis", ...r });
        }
        break;
      }
      case "red_team_review": {
        // Payload: { hypothesis_id }
        const hyp = String(job.payload.hypothesis_id ?? "").trim();
        if (!hyp) throw new Error("red_team_review requires payload.hypothesis_id");
        const task: SchneierTask = { job_id: job.job_id, hypothesis_id: hyp };
        const r = await runSchneier(task);
        if ("skipped" in r) {
          outputs.push({ kind: "red_team_review", skipped: true, reason: r.reason });
        } else {
          outputs.push({ kind: "red_team_review", ...r });
        }
        break;
      }
      case "contradiction_scan": {
        // Payload: { topic, doc_id?, lang?, context_chunks? }
        const topic = String(job.payload.topic ?? "").trim();
        if (!topic) throw new Error("contradiction_scan requires payload.topic");
        const task: DupinTask = {
          job_id: job.job_id,
          topic,
          doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined,
          lang: job.payload.lang === "en" ? "en" : "pt",
          context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined,
        };
        const r = await runDupin(task);
        if ("skipped" in r) {
          outputs.push({ kind: "contradiction_scan", skipped: true, reason: r.reason });
        } else {
          for (const c of r.contradictions) outputs.push({ kind: "contradiction", ...c });
        }
        break;
      }

      default:
        throw new Error(`unknown_kind: ${job.kind}`);
    }

    // Status reflects reality: if every per-item attempt errored we mark
    // the job failed (so the UI doesn't say "complete" when nothing useful
    // was produced); if at least one succeeded we keep `complete` with the
    // mixed outputs payload.
    const allErrors = outputs.length > 0 && outputs.every(
      (o): o is { error: string } => typeof (o as { error?: unknown }).error === "string",
    );
    const summary = (() => {
      if (!allErrors) return null;
      // First few error messages, surfaced to the user via the jobs table.
      return outputs
        .map((o) => (o as { error?: string }).error)
        .filter((e): e is string => Boolean(e))
        .slice(0, 3)
        .join(" | ");
    })();
    await query(
      `UPDATE public.investigation_jobs
          SET status = $1, finished_at = NOW(), outputs = $2::jsonb, error = $3
        WHERE job_id = $4`,
      [allErrors ? "failed" : "complete", JSON.stringify(outputs), summary, job.job_id],
    );
    await audit({
      event: allErrors ? "job_failed_all_items" : "job_completed",
      job_id: job.job_id,
      kind: job.kind,
      n_outputs: outputs.length,
      ...(summary ? { summary } : {}),
    });
  } catch (e) {
    const err = (e as Error).message;
    await query(
      `UPDATE public.investigation_jobs
          SET status = 'failed', finished_at = NOW(), error = $1, outputs = $2::jsonb
        WHERE job_id = $3`,
      [err, JSON.stringify(outputs), job.job_id],
    );
    await audit({ event: "job_failed", job_id: job.job_id, kind: job.kind, error: err });
  }
}

/**
 * Pick a small set of chunks that are likely to yield evidence — body
 * paragraphs, longer than 200 chars, marked `is_searchable`. Ordered by
 * Sonnet's anomaly flag first so we extract the most interesting first.
 */
async function pickEvidenceCandidates(doc_id: string, limit: number): Promise<string[]> {
  const rows = await query<{ chunk_id: string }>(
    `SELECT chunk_id
       FROM public.chunks
      WHERE doc_id = $1
        AND is_searchable
        AND LENGTH(COALESCE(content_en, content_pt, '')) > 200
      ORDER BY ufo_anomaly DESC, page ASC, order_in_page ASC
      LIMIT $2`,
    [doc_id, limit],
  );
  return rows.map((r) => r.chunk_id);
}