disclosure-bureau/web/lib/chat/tools.ts

/**
 * Sherlock's tool kit — OpenAI-style function-calling schema + local handlers.
 *
 * Each tool has:
 *   - definition: JSON Schema sent to the model
 *   - handler:    Node function that runs locally and returns a JSON-serializable result
 *
 * Tools called by the model trigger AG-UI events streamed to the frontend
 * (tool_start, tool_result, navigate). The frontend renders these inline in
 * the message AND, for `navigate_to`, can offer a clickable button to scroll
 * the UI to a target page.
 *
 * Retrieval stack (chunks-aware):
 *   - hybrid_search   → BM25 + dense (BGE-M3) + RRF + BGE-Reranker rerank
 *   - read_chunk      → fetch a single chunk by chunk_id (cite-then-quote)
 *   - list_anomalies  → all UFO/cryptid-flagged chunks (cheap, no LLM)
 *   - get_page_chunks → assemble one page from chunks
 * Wiki-aware fallbacks (when DB not available or richer entity data needed):
 *   - read_page, read_document, read_entity, search_corpus (legacy grep)
 *   - navigate_to     → emit clickable button to scroll UI
 */
import fs from "node:fs/promises";
import path from "node:path";
import {
  WIKI,
  readDocument,
  readPage,
  readEntity,
  listDocuments,
  listPages,
  classKeyToFolder,
} from "../wiki";
import {
  hybridSearch,
  getChunk,
  listAnomalies,
  getPageChunks,
  type ChunkHit,
} from "../retrieval/hybrid";
import {
  findEntity,
  getNeighbors,
  findPaths,
  getCoMentionChunks,
} from "../retrieval/graph";
import { pgQuery } from "../retrieval/db";

export interface ToolDefinition {
  type: "function";
  function: {
    name: string;
    description: string;
    parameters: Record<string, unknown>;
  };
}

export interface ToolHandlerContext {
  /** Currently-viewed location, if any, to bias search. */
  doc_id?: string | null;
  page_id?: string | null;
  /** UI language preference (pt | en). */
  lang?: "pt" | "en";
  /** Authenticated user's email — populated by /api/sessions/[id]/messages so
   *  tools that audit (e.g. request_investigation) can label `triggered_by`. */
  user_email?: string | null;
  /** Optional sink for inline AG-UI artifacts (citations, crops, entity cards).
   *  When provided, tools may push typed artifacts that the UI renders inline
   *  alongside the tool block. Safe to leave undefined for non-streaming callers. */
  emitArtifact?: (artifact: import("./agui").Artifact) => void;
}

export interface ToolHandler {
  (args: Record<string, unknown>, ctx: ToolHandlerContext): Promise<unknown>;
}

/* ─── Tool defs ─────────────────────────────────────────────────────────── */

const hybrid_search_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "hybrid_search",
    description:
      "PRIMARY semantic search over the entire UAP/UFO corpus chunks. " +
      "Combines BM25 keyword recall + BGE-M3 dense embeddings + cross-encoder rerank. " +
      "Returns up to top_k chunks with chunk_id, doc_id, page, bbox, text snippets, " +
      "classification, and relevance score. Use this for any question about content. " +
      "Filter with doc_id to scope to one document; type to restrict chunk type " +
      "(paragraph, heading, stamp, etc.); ufo_only=true to retrieve only anomaly-flagged chunks.",
    parameters: {
      type: "object",
      properties: {
        query: { type: "string", description: "Natural language query, PT or EN." },
        lang: { type: "string", enum: ["pt", "en"], description: "Search language (default pt)." },
        doc_id: { type: "string", description: "Optional: restrict to one document." },
        type: {
          type: "string",
          description:
            "Optional chunk-type filter: paragraph, heading, table_marker, image, stamp, signature, " +
            "address_block, classification_marking, redaction, footer, marginalia, form_field.",
        },
        classification: {
          type: "string",
          description: "Optional: SECRET, CONFIDENTIAL, RESTRICTED, NOFORN.",
        },
        ufo_only: { type: "boolean", description: "Only chunks flagged with UFO anomaly." },
        top_k: { type: "integer", description: "Number of final results (default 20, max 50)." },
      },
      required: ["query"],
    },
  },
};

const read_chunk_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "read_chunk",
    description:
      "Read ONE chunk in full (verbatim text EN+PT, full bbox, metadata, anomaly flags). " +
      "Use AFTER hybrid_search to expand a citation before quoting the user.",
    parameters: {
      type: "object",
      properties: {
        doc_id: { type: "string" },
        chunk_id: { type: "string", description: "e.g. 'c0042'" },
      },
      required: ["doc_id", "chunk_id"],
    },
  },
};

const get_page_chunks_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "get_page_chunks",
    description:
      "Get all chunks of one page in reading order. Use to reconstruct a page or to " +
      "answer 'what's on page N of doc X' questions with full structure.",
    parameters: {
      type: "object",
      properties: {
        doc_id: { type: "string" },
        page: { type: "integer", description: "Page number (1-indexed)." },
      },
      required: ["doc_id", "page"],
    },
  },
};

const list_anomalies_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "list_anomalies",
    description:
      "List all chunks flagged with a UFO or cryptid anomaly. Cheap query (no embedding). " +
      "Use for 'show me all sightings', 'all spherical objects', 'cryptid encounters'.",
    parameters: {
      type: "object",
      properties: {
        kind: { type: "string", enum: ["ufo", "cryptid"] },
        doc_id: { type: "string", description: "Optional: restrict to one doc." },
        limit: { type: "integer", description: "Max results (default 50)." },
      },
      required: ["kind"],
    },
  },
};

const read_page_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "read_page",
    description:
      "Read the legacy wiki page record for context (vision_description, " +
      "entities_extracted, content_classification). Useful WHEN the doc isn't in the new " +
      "chunk index yet OR you need page-level vision metadata. Prefer hybrid_search + " +
      "read_chunk for content questions.",
    parameters: {
      type: "object",
      properties: {
        doc_id: { type: "string" },
        page: { type: "string", description: "e.g. 'p007' or '7'." },
      },
      required: ["doc_id", "page"],
    },
  },
};

const read_document_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "read_document",
    description:
      "Get the consolidated overview of a document — summary, page index, " +
      "content_classification, key entities.",
    parameters: {
      type: "object",
      properties: { doc_id: { type: "string" } },
      required: ["doc_id"],
    },
  },
};

const read_entity_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "read_entity",
    description:
      "Read the detail of an entity (person, organization, location, event, " +
      "uap_object, vehicle, operation, concept) including enrichment from WebSearch.",
    parameters: {
      type: "object",
      properties: {
        class: {
          type: "string",
          enum: [
            "person",
            "organization",
            "location",
            "event",
            "uap_object",
            "vehicle",
            "operation",
            "concept",
          ],
        },
        id: { type: "string", description: "kebab-case id, e.g. 'j-edgar-hoover'." },
      },
      required: ["class", "id"],
    },
  },
};

const search_corpus_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "search_corpus",
    description:
      "Legacy keyword-only search over document IDs, titles, and entity IDs. " +
      "Prefer hybrid_search for content questions. Use this only to find entities/docs by name.",
    parameters: {
      type: "object",
      properties: {
        query: { type: "string" },
        scope: { type: "string", enum: ["all", "documents", "entities"] },
      },
      required: ["query"],
    },
  },
};

const entity_neighbors_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "entity_neighbors",
    description:
      "List entities co-mentioned with a given entity in the corpus chunks. " +
      "Use to answer 'who/what is connected to X' questions. Returns up to " +
      "limit neighbors sorted by edge weight (number of shared chunks).",
    parameters: {
      type: "object",
      properties: {
        class: {
          type: "string",
          enum: ["person", "organization", "location", "event", "uap_object", "vehicle", "operation", "concept"],
        },
        id: { type: "string", description: "kebab-case id or canonical name." },
        filter_classes: {
          type: "array",
          items: { type: "string" },
          description: "Optional: restrict neighbors to these entity classes.",
        },
        limit: { type: "integer", description: "Max neighbors (default 30, max 100)." },
      },
      required: ["class", "id"],
    },
  },
};

const entity_path_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "entity_path",
    description:
      "Find paths between two entities via shared chunks (multi-hop). Useful for " +
      "'how is X connected to Y' or 'show the trail between Hoover and Project Sign'.",
    parameters: {
      type: "object",
      properties: {
        from_class: { type: "string" },
        from_id: { type: "string" },
        to_class: { type: "string" },
        to_id: { type: "string" },
        max_hops: { type: "integer", description: "1-4 (default 3)." },
      },
      required: ["from_class", "from_id", "to_class", "to_id"],
    },
  },
};

const co_mention_chunks_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "co_mention_chunks",
    description:
      "Return chunks where two specific entities both appear. Use after entity_neighbors " +
      "to inspect the actual passages connecting them.",
    parameters: {
      type: "object",
      properties: {
        a_class: { type: "string" },
        a_id: { type: "string" },
        b_class: { type: "string" },
        b_id: { type: "string" },
        limit: { type: "integer", description: "Default 20, max 100." },
      },
      required: ["a_class", "a_id", "b_class", "b_id"],
    },
  },
};

const analyze_image_region_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "analyze_image_region",
    description:
      "Vision tool — answer a question about a cropped region of a document page. " +
      "Use this when the user asks about a photograph, diagram, sketch, signature, " +
      "stamp, redaction, or any visual element where the chunk's text description " +
      "isn't enough. The model reads the actual pixels via Sonnet vision. " +
      "Get the bbox + page from a prior hybrid_search hit (each chunk carries bbox). " +
      "Cost: ~$0.005–$0.02 per call. Use sparingly; prefer hybrid_search first.",
    parameters: {
      type: "object",
      properties: {
        doc_id: { type: "string" },
        page: { type: "integer", description: "1-indexed page number" },
        bbox: {
          type: "object",
          description: "Normalized bbox (0..1) of the region to analyze.",
          properties: {
            x: { type: "number" }, y: { type: "number" },
            w: { type: "number" }, h: { type: "number" },
          },
          required: ["x", "y", "w", "h"],
        },
        question: { type: "string", description: "What you want to know about the image." },
        context: { type: "string", description: "Optional: prose context that grounds the model." },
      },
      required: ["doc_id", "page", "bbox", "question"],
    },
  },
};

const request_investigation_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "request_investigation",
    description:
      "Queue a deeper investigation by the Investigation Bureau detectives. " +
      "Use ONLY when the user asks for analysis that requires structured reasoning " +
      "across multiple chunks — e.g. 'build rival hypotheses about X', " +
      "'find contradictions about Y', 'trace the chain of custody for claim Z'. " +
      "Do NOT use for plain lookups; hybrid_search is faster. " +
      "kinds: hypothesis_tournament (Holmes — 2-3 rival hypotheses with priors/posteriors) | " +
      "evidence_chain (Locard — verbatim evidence with chain_of_custody on N chunks of one doc) | " +
      "contradiction_scan (Dupin — pairs of chunks in irreconcilable tension on a topic) | " +
      "red_team_review (Schneier — attacks an existing hypothesis: hidden assumptions, failure modes, alt explanations) | " +
      "witness_analysis (Poirot — credibility / access / bias / corroboration for one named person) | " +
      "outlier_scan (Taleb — locates AT MOST 3 chunks that violate the dominant model for a topic) | " +
      "calibrate_hypothesis (Tetlock — recomputes posterior + band of an existing hypothesis vs fresh corpus) | " +
      "case_report (case-writer — five-act Watson narrative assembling all artefacts on a topic into one document). " +
      "Returns { job_id, kind, status_url, eta_seconds }. The UI renders a status card " +
      "with a link to /jobs/<job_id>; the worker takes ~30-120 seconds.",
    parameters: {
      type: "object",
      properties: {
        kind: {
          type: "string",
          enum: ["hypothesis_tournament", "evidence_chain", "contradiction_scan", "red_team_review", "witness_analysis", "outlier_scan", "calibrate_hypothesis", "case_report"],
          description: "Detective task kind.",
        },
        hypothesis_id: {
          type: "string",
          description:
            "For red_team_review: REQUIRED. The H-NNNN id of an existing hypothesis to attack. " +
            "Ignored for the other kinds.",
        },
        person_id: {
          type: "string",
          description:
            "For witness_analysis: REQUIRED. kebab-case entity_id of a person " +
            "(e.g. 'j-edgar-hoover'). Ignored for the other kinds.",
        },
        question: {
          type: "string",
          description:
            "For hypothesis_tournament: the investigative question (one sentence, declarative). " +
            "Required for hypothesis_tournament; ignored for the other kinds.",
        },
        topic: {
          type: "string",
          description:
            "For contradiction_scan: short noun-phrase naming the disputed point " +
            "(e.g. 'date of the Roswell wreckage recovery'). Required for contradiction_scan; " +
            "ignored for other kinds.",
        },
        doc_id: {
          type: "string",
          description:
            "Optional scope. hypothesis_tournament / contradiction_scan: narrows the corpus " +
            "shortlist. evidence_chain: REQUIRED — the doc Locard scans.",
        },
        chunks: {
          type: "array",
          items: { type: "string" },
          description:
            "Optional for evidence_chain: list of chunk_ids to inspect. Defaults to the " +
            "top 5 anomaly-flagged chunks in the doc.",
        },
        claim: {
          type: "string",
          description:
            "Optional for evidence_chain: a specific claim Locard should look for support of.",
        },
      },
      required: ["kind"],
    },
  },
};

const navigate_to_tool: ToolDefinition = {
  type: "function",
  function: {
    name: "navigate_to",
    description:
      "Offer the user a clickable button to navigate the main UI to a specific " +
      "doc, page, or chunk anchor. Target examples: '/d/<doc-id>', '/d/<doc-id>/p007', " +
      "'/d/<doc-id>/p007#c0042'. Frontend renders the button — does NOT auto-redirect.",
    parameters: {
      type: "object",
      properties: {
        target: { type: "string" },
        label: { type: "string", description: "Short button text (max 40 chars)." },
      },
      required: ["target", "label"],
    },
  },
};

export const TOOL_DEFINITIONS: ToolDefinition[] = [
  hybrid_search_tool,
  read_chunk_tool,
  get_page_chunks_tool,
  list_anomalies_tool,
  entity_neighbors_tool,
  entity_path_tool,
  co_mention_chunks_tool,
  read_page_tool,
  read_document_tool,
  read_entity_tool,
  search_corpus_tool,
  analyze_image_region_tool,
  request_investigation_tool,
  navigate_to_tool,
];

/* ─── Helpers ───────────────────────────────────────────────────────────── */

function pickLang(ctx: ToolHandlerContext, override?: unknown): "pt" | "en" {
  if (override === "en" || override === "pt") return override;
  return ctx.lang === "en" ? "en" : "pt";
}

function compactHit(h: ChunkHit, lang: "pt" | "en") {
  const text = lang === "en" ? h.content_en : h.content_pt;
  return {
    chunk_id: h.chunk_id,
    doc_id: h.doc_id,
    page: h.page,
    type: h.type,
    classification: h.classification,
    bbox: h.bbox,
    snippet: (text || "").slice(0, 300),
    score: Number((h.rerank_score ?? h.score).toFixed(4)),
    href: `/d/${h.doc_id}#${h.chunk_id}`,
  };
}

function snippet(text: string, query: string, len = 200): string {
  const lc = text.toLowerCase();
  const q = query.toLowerCase().split(/\s+/).find((w) => w.length >= 3) ?? "";
  const i = q ? lc.indexOf(q) : -1;
  const start = i >= 0 ? Math.max(0, i - 60) : 0;
  return text.slice(start, start + len).replace(/\s+/g, " ").trim();
}

/* ─── Tool handlers ─────────────────────────────────────────────────────── */

async function handleHybridSearch(
  args: Record<string, unknown>,
  ctx: ToolHandlerContext,
): Promise<unknown> {
  const query = String(args.query ?? "").trim();
  if (!query) return { error: "empty_query", hits: [] };
  const lang = pickLang(ctx, args.lang);
  const top_k = Math.min(Number(args.top_k) || 20, 50);

  try {
    const hits = await hybridSearch({
      query,
      lang,
      doc_id: (args.doc_id as string) || ctx.doc_id || null,
      type: (args.type as string) || null,
      classification: (args.classification as string) || null,
      ufo_only: Boolean(args.ufo_only),
      top_k,
      // W2-TD#8: chat is latency-sensitive — skip rerank when ≤10 candidates.
      // The model only cites the first few hits anyway and BGE-Reranker
      // adds 5-8s on CPU. RRF order from the RPC is plenty for the head.
      rerank_strategy: "when_top_k_gt",
      rerank_threshold: 10,
    });
    // Emit one citation (+ optional crop_image) artifact per hit so the UI can
    // render inline cards next to the assistant text. Limit to top 6 to avoid
    // flooding the chat with crops when top_k is large.
    if (ctx.emitArtifact) {
      for (const h of hits.slice(0, 6)) {
        ctx.emitArtifact({
          kind: "citation",
          chunk_id: h.chunk_id,
          doc_id: h.doc_id,
          page: h.page,
          type: h.type,
          classification: h.classification,
          bbox: h.bbox ?? null,
          snippet: ((lang === "en" ? h.content_en : h.content_pt) || "").slice(0, 300),
          score: Number((h.rerank_score ?? h.score).toFixed(4)),
        });
        if (h.bbox && h.bbox.w > 0 && h.bbox.h > 0) {
          const bb = h.bbox;
          const src =
            `/api/crop?doc=${encodeURIComponent(h.doc_id)}` +
            `&page=${h.page}&x=${bb.x}&y=${bb.y}&w=${bb.w}&h=${bb.h}&w_px=640`;
          ctx.emitArtifact({
            kind: "crop_image",
            src,
            doc_id: h.doc_id,
            page: h.page,
            chunk_id: h.chunk_id,
            alt_en: (h.content_en || h.chunk_id).slice(0, 120),
            alt_pt: (h.content_pt || h.chunk_id).slice(0, 120),
          });
        }
      }
    }
    return { query, lang, count: hits.length, hits: hits.map((h) => compactHit(h, lang)) };
  } catch (e) {
    return {
      error: "retrieval_unavailable",
      message: (e as Error).message,
      fallback: "use search_corpus (legacy keyword)",
    };
  }
}

async function handleReadChunk(args: Record<string, unknown>): Promise<unknown> {
  const doc_id = String(args.doc_id ?? "").trim();
  const chunk_id = String(args.chunk_id ?? "").trim();
  if (!doc_id || !chunk_id) return { error: "missing_args" };
  try {
    const c = await getChunk(doc_id, chunk_id);
    if (!c) return { error: "not_found", doc_id, chunk_id };
    return {
      chunk_id: c.chunk_id,
      doc_id: c.doc_id,
      page: c.page,
      type: c.type,
      bbox: c.bbox,
      classification: c.classification,
      content_en: c.content_en,
      content_pt: c.content_pt,
      href: `/d/${c.doc_id}#${c.chunk_id}`,
    };
  } catch (e) {
    return { error: "retrieval_unavailable", message: (e as Error).message };
  }
}

async function handleGetPageChunks(args: Record<string, unknown>): Promise<unknown> {
  const doc_id = String(args.doc_id ?? "").trim();
  const page = Number(args.page);
  if (!doc_id || !Number.isFinite(page) || page < 1) return { error: "bad_args" };
  try {
    const chunks = await getPageChunks(doc_id, page);
    return {
      doc_id,
      page,
      count: chunks.length,
      chunks: chunks.map((c) => ({
        chunk_id: c.chunk_id,
        type: c.type,
        bbox: c.bbox,
        classification: c.classification,
        content_en: (c.content_en || "").slice(0, 500),
        content_pt: (c.content_pt || "").slice(0, 500),
      })),
    };
  } catch (e) {
    return { error: "retrieval_unavailable", message: (e as Error).message };
  }
}

async function handleListAnomalies(
  args: Record<string, unknown>,
  ctx: ToolHandlerContext,
): Promise<unknown> {
  const kind = (args.kind as string) === "cryptid" ? "cryptid" : "ufo";
  const doc_id = (args.doc_id as string) || ctx.doc_id || null;
  const limit = Math.min(Number(args.limit) || 50, 200);
  try {
    const rows = await listAnomalies({ kind, doc_id, limit });
    return { kind, doc_id, count: rows.length, anomalies: rows };
  } catch (e) {
    return { error: "retrieval_unavailable", message: (e as Error).message };
  }
}

async function handleSearch(args: Record<string, unknown>): Promise<unknown> {
  const query = String(args.query ?? "").trim();
  const scope = (args.scope as string) ?? "all";
  if (!query) return { error: "empty_query", hits: [] };

  const ql = query.toLowerCase();
  const hits: Array<{ type: string; id: string; title: string; snippet: string; href: string }> = [];

  if (scope === "all" || scope === "documents") {
    const ids = await listDocuments();
    for (const id of ids) {
      const f = await readDocument(id);
      if (!f) continue;
      const title = String(f.fm.canonical_title ?? id);
      const hay = `${id} ${title} ${f.body.slice(0, 2000)}`.toLowerCase();
      if (hay.includes(ql)) {
        hits.push({
          type: "document",
          id,
          title,
          snippet: snippet(f.body, query),
          href: `/d/${id}`,
        });
      }
      if (hits.length >= 8) break;
    }
  }

  if ((scope === "all" || scope === "entities") && hits.length < 8) {
    const classes = ["people", "organizations", "locations", "events", "uap-objects", "vehicles", "operations", "concepts"];
    for (const cls of classes) {
      try {
        const entries = await fs.readdir(path.join(WIKI, "entities", cls));
        for (const file of entries) {
          if (!file.endsWith(".md")) continue;
          const id = file.replace(/\.md$/, "");
          if (id.toLowerCase().includes(ql)) {
            const content = await fs.readFile(path.join(WIKI, "entities", cls, file), "utf-8");
            const cname = content.match(/canonical_name:\s*([^\n]+)/)?.[1]?.trim() ?? id;
            hits.push({
              type: cls.replace(/s$/, ""),
              id,
              title: cname,
              snippet: id,
              href: `/e/${cls}/${id}`,
            });
            if (hits.length >= 8) break;
          }
        }
      } catch {
        /* dir missing — fine */
      }
      if (hits.length >= 8) break;
    }
  }

  return { query, scope, hits };
}

async function handleReadPage(args: Record<string, unknown>): Promise<unknown> {
  const doc_id = String(args.doc_id ?? "").trim();
  let page = String(args.page ?? "").trim();
  if (!/^p\d{3}$/.test(page)) {
    const n = parseInt(page, 10);
    if (!Number.isFinite(n)) return { error: "bad_page" };
    page = `p${String(n).padStart(3, "0")}`;
  }
  const md = await readPage(doc_id, page);
  if (!md) return { error: "not_found", doc_id, page };
  return {
    doc_id,
    page,
    page_type: md.fm.page_type,
    language: md.fm.language_detected,
    content_classification: md.fm.content_classification,
    redactions_count: Array.isArray(md.fm.redactions) ? (md.fm.redactions as never[]).length : 0,
    vision_description: md.fm.vision_description,
    vision_description_pt_br: md.fm.vision_description_pt_br,
    entities_extracted: md.fm.entities_extracted,
    body_excerpt: md.body.slice(0, 2000),
  };
}

async function handleReadDocument(args: Record<string, unknown>): Promise<unknown> {
  const doc_id = String(args.doc_id ?? "").trim();
  const md = await readDocument(doc_id);
  if (!md) return { error: "not_found", doc_id };
  const pages = await listPages(doc_id);
  return {
    doc_id,
    canonical_title: md.fm.canonical_title,
    collection: md.fm.collection,
    document_class: md.fm.document_class,
    page_count: pages.length,
    pages_index: pages.slice(0, 20),
    content_classification: md.fm.content_classification,
    languages_detected: md.fm.languages_detected,
    key_entities: md.fm.key_entities,
    executive_summary: md.body.slice(0, 2000),
  };
}

async function handleReadEntity(args: Record<string, unknown>): Promise<unknown> {
  const cls = String(args.class ?? "").trim();
  const id = String(args.id ?? "").trim();
  const folder = classKeyToFolder(cls);
  if (!folder) return { error: "bad_class", cls };
  const md = await readEntity(folder, id);
  if (!md) return { error: "not_found", cls, id };
  return {
    class: folder,
    id,
    canonical_name: md.fm.canonical_name,
    aliases: md.fm.aliases,
    total_mentions: md.fm.total_mentions,
    enrichment_status: md.fm.enrichment_status,
    external_sources: md.fm.external_sources,
    disambiguation_note: md.fm.disambiguation_note,
    body_excerpt: md.body.slice(0, 2000),
  };
}

async function handleEntityNeighbors(args: Record<string, unknown>): Promise<unknown> {
  const cls = String(args.class ?? "").trim();
  const id = String(args.id ?? "").trim();
  if (!cls || !id) return { error: "missing_args" };
  try {
    const ent = await findEntity(cls, id);
    if (!ent) return { error: "entity_not_found", class: cls, id };
    const filterClasses = (args.filter_classes as string[] | undefined)?.filter(Boolean);
    const limit = Math.min(Number(args.limit) || 30, 100);
    const neighbors = await getNeighbors(ent.entity_pk, { limit, classes: filterClasses });
    return { entity: ent, count: neighbors.length, neighbors };
  } catch (e) {
    return { error: "graph_unavailable", message: (e as Error).message };
  }
}

async function handleEntityPath(args: Record<string, unknown>): Promise<unknown> {
  const fromCls = String(args.from_class ?? "").trim();
  const fromId = String(args.from_id ?? "").trim();
  const toCls = String(args.to_class ?? "").trim();
  const toId = String(args.to_id ?? "").trim();
  const maxHops = Math.min(Number(args.max_hops) || 3, 4);
  if (!fromCls || !fromId || !toCls || !toId) return { error: "missing_args" };
  try {
    const [a, b] = await Promise.all([findEntity(fromCls, fromId), findEntity(toCls, toId)]);
    if (!a) return { error: "from_not_found", class: fromCls, id: fromId };
    if (!b) return { error: "to_not_found", class: toCls, id: toId };
    const paths = await findPaths(a.entity_pk, b.entity_pk, maxHops);
    return { from: a, to: b, max_hops: maxHops, paths };
  } catch (e) {
    return { error: "graph_unavailable", message: (e as Error).message };
  }
}

async function handleCoMentionChunks(args: Record<string, unknown>): Promise<unknown> {
  const aCls = String(args.a_class ?? "").trim();
  const aId = String(args.a_id ?? "").trim();
  const bCls = String(args.b_class ?? "").trim();
  const bId = String(args.b_id ?? "").trim();
  const limit = Math.min(Number(args.limit) || 20, 100);
  if (!aCls || !aId || !bCls || !bId) return { error: "missing_args" };
  try {
    const [a, b] = await Promise.all([findEntity(aCls, aId), findEntity(bCls, bId)]);
    if (!a || !b) return { error: "entity_not_found", a: aId, b: bId };
    const chunks = await getCoMentionChunks(a.entity_pk, b.entity_pk, limit);
    return { a, b, count: chunks.length, chunks };
  } catch (e) {
    return { error: "graph_unavailable", message: (e as Error).message };
  }
}

async function handleRequestInvestigation(
  args: Record<string, unknown>,
  ctx: ToolHandlerContext,
): Promise<unknown> {
  const kind = String(args.kind ?? "").trim();
  if (kind !== "hypothesis_tournament" && kind !== "evidence_chain" && kind !== "contradiction_scan"
      && kind !== "red_team_review" && kind !== "witness_analysis" && kind !== "outlier_scan"
      && kind !== "calibrate_hypothesis" && kind !== "case_report") {
    return { error: "bad_kind", message: "kind must be one of: hypothesis_tournament, evidence_chain, contradiction_scan, red_team_review, witness_analysis, outlier_scan, calibrate_hypothesis, case_report" };
  }
  const docArg = typeof args.doc_id === "string" && args.doc_id.trim()
    ? args.doc_id.trim() : ctx.doc_id || null;
  const lang = pickLang(ctx);

  const payload: Record<string, unknown> = {};
  if (kind === "hypothesis_tournament") {
    const question = String(args.question ?? "").trim();
    if (!question) return { error: "question_required", message: "hypothesis_tournament needs a question" };
    payload.question = question;
    payload.lang = lang;
    if (docArg) payload.doc_id = docArg;
  } else if (kind === "contradiction_scan") {
    const topic = String(args.topic ?? "").trim();
    if (!topic) return { error: "topic_required", message: "contradiction_scan needs a topic" };
    payload.topic = topic;
    payload.lang = lang;
    if (docArg) payload.doc_id = docArg;
  } else if (kind === "red_team_review") {
    const hyp = String(args.hypothesis_id ?? "").trim();
    if (!/^H-\d{4}$/.test(hyp)) {
      return { error: "hypothesis_id_required", message: "red_team_review needs hypothesis_id like H-0003" };
    }
    payload.hypothesis_id = hyp;
  } else if (kind === "witness_analysis") {
    const pid = String(args.person_id ?? "").trim();
    if (!/^[a-z0-9][a-z0-9-]*$/.test(pid)) {
      return { error: "person_id_required", message: "witness_analysis needs a kebab-case person_id like 'j-edgar-hoover'" };
    }
    payload.person_id = pid;
    payload.lang = lang;
  } else if (kind === "outlier_scan") {
    const topic = String(args.topic ?? "").trim();
    if (!topic) return { error: "topic_required", message: "outlier_scan needs a topic" };
    payload.topic = topic;
    payload.lang = lang;
    if (docArg) payload.doc_id = docArg;
  } else if (kind === "calibrate_hypothesis") {
    const hyp = String(args.hypothesis_id ?? "").trim();
    if (!/^H-\d{4}$/.test(hyp)) {
      return { error: "hypothesis_id_required", message: "calibrate_hypothesis needs hypothesis_id like H-0003" };
    }
    payload.hypothesis_id = hyp;
    payload.lang = lang;
  } else if (kind === "case_report") {
    const topic = String(args.topic ?? "").trim();
    if (!topic) return { error: "topic_required", message: "case_report needs a topic" };
    payload.topic = topic;
    payload.lang = lang;
    if (docArg) payload.doc_id = docArg;
  } else {
    if (!docArg) return { error: "doc_id_required", message: "evidence_chain needs a doc_id" };
    payload.doc_id = docArg;
    if (Array.isArray(args.chunks)) {
      payload.chunks = (args.chunks as unknown[]).filter((c): c is string => typeof c === "string");
    }
    if (typeof args.claim === "string" && args.claim.trim()) payload.claim = args.claim.trim();
  }

  const triggered_by = ctx.user_email ? `user:${ctx.user_email}` : "user:anonymous";
  // Investigation Bureau expected duration: Holmes ~60s, Dupin ~60s, Poirot ~45s,
  // Schneier ~30s, Taleb ~50s, Locard ~30s × n_chunks (default 5).
  const eta = kind === "evidence_chain" ? 30 * 5
            : kind === "red_team_review" ? 30
            : kind === "calibrate_hypothesis" ? 30
            : kind === "witness_analysis" ? 45
            : kind === "outlier_scan" ? 50
            : kind === "case_report" ? 180
            : 60;

  try {
    const rows = await pgQuery<{ job_id: string; created_at: string }>(
      `INSERT INTO public.investigation_jobs (kind, payload, triggered_by, status)
       VALUES ($1, $2::jsonb, $3, 'queued')
       RETURNING job_id, created_at`,
      [kind, JSON.stringify(payload), triggered_by],
    );
    const row = rows[0];
    if (!row) return { error: "insert_failed" };
    return {
      job_id: row.job_id,
      kind,
      status: "queued",
      eta_seconds: eta,
      status_url: `/jobs/${row.job_id}`,
      payload_summary: payload,
      detective: kind === "hypothesis_tournament" ? "holmes"
               : kind === "contradiction_scan" ? "dupin"
               : kind === "red_team_review" ? "schneier"
               : kind === "witness_analysis" ? "poirot"
               : kind === "outlier_scan" ? "taleb"
               : kind === "calibrate_hypothesis" ? "tetlock"
               : kind === "case_report" ? "case-writer"
               : "locard",
    };
  } catch (e) {
    return { error: "db_unavailable", message: (e as Error).message };
  }
}

async function handleNavigate(args: Record<string, unknown>): Promise<unknown> {
  const target = String(args.target ?? "").trim();
  const label = String(args.label ?? "").slice(0, 40);
  if (!target.startsWith("/")) return { error: "target_must_start_with_slash", target };
  return { ok: true, target, label };
}

async function handleAnalyzeImageRegion(
  args: Record<string, unknown>,
  ctx: ToolHandlerContext,
): Promise<unknown> {
  const doc_id = String(args.doc_id ?? "").trim();
  const page = Number(args.page);
  const bbox = args.bbox as { x: number; y: number; w: number; h: number } | undefined;
  const question = String(args.question ?? "").trim();
  if (!doc_id || !page || !bbox || !question) return { error: "missing_args" };
  try {
    const { analyzeImageRegion } = await import("./vision");
    const out = await analyzeImageRegion({
      doc_id, page, bbox, question,
      context: typeof args.context === "string" ? args.context : undefined,
      lang: ctx.lang === "en" ? "en" : "pt",
    });
    if (ctx.emitArtifact) {
      ctx.emitArtifact({
        kind: "crop_image",
        src: out.crop_url,
        doc_id, page,
        alt_en: question.slice(0, 120),
        alt_pt: question.slice(0, 120),
      });
    }
    return out;
  } catch (e) {
    return { error: "vision_failed", message: (e as Error).message };
  }
}

export const TOOL_HANDLERS: Record<string, ToolHandler> = {
  hybrid_search: handleHybridSearch,
  read_chunk: handleReadChunk,
  get_page_chunks: handleGetPageChunks,
  list_anomalies: handleListAnomalies,
  entity_neighbors: handleEntityNeighbors,
  entity_path: handleEntityPath,
  co_mention_chunks: handleCoMentionChunks,
  read_page: handleReadPage,
  read_document: handleReadDocument,
  read_entity: handleReadEntity,
  search_corpus: handleSearch,
  analyze_image_region: handleAnalyzeImageRegion,
  request_investigation: handleRequestInvestigation,
  navigate_to: handleNavigate,
};