disclosure-bureau/web/lib/chat/tools.ts
Luiz Gustavo b76e81e4b3
Some checks failed
CI / Web — typecheck + lint + build (push) Failing after 44s
CI / Scripts — Python smoke (push) Failing after 3s
CI / Web — npm audit (push) Failing after 43s
CI / Retrieval — golden set (Recall@5 + MRR) (push) Failing after 7s
W3.6: chat request_investigation tool + /jobs/[id] case-file viewer
Closes the loop between the chat UI and the Investigation Bureau runtime.

Chat tool (web/lib/chat/tools.ts):
  - request_investigation { kind, question, doc_id?, chunks?, claim? }
    INSERTs a row in public.investigation_jobs and returns
    { job_id, kind, status, eta_seconds, status_url, detective }.
  - kind=hypothesis_tournament → Holmes (1 question → 2-3 rival hypotheses)
  - kind=evidence_chain → Locard (1 doc → grade-A/B/C evidence with chain
    of custody, default top-5 anomaly chunks)
  - Plumbed user.email through ToolHandlerContext so triggered_by audits
    the requesting user.

Public job viewer:
  - GET /api/jobs/[id] joins investigation_jobs → public.evidence +
    public.hypotheses for the IDs surfaced in outputs[]. Returns one
    payload the page can render without n+1 round-trips. Strips
    triggered_by from the response (it carries the user's email).
  - app/jobs/[id]/page.tsx server-renders the case-file shell:
    detective lore header (Holmes blue or Locard green), question chip,
    scope chip with link back to the document.
  - components/job-status-poller.tsx client island that polls every 3 s
    while non-terminal, then once on terminal to hydrate evidence +
    hypotheses. Renders:
      · Phase tracker (queued → running → complete | failed)
      · Hypothesis cards w/ prior + posterior bars + Δ delta indicator
        + Tetlock band badge (high/medium/low/speculation)
      · Argument-for / argument-against with [[wiki-link]] auto-linking
        to /d/<doc>/p<NNN>#<cNNNN>
      · Evidence cards w/ Grade A/B/C badge + verbatim blockquote +
        bbox crop preview via /api/crop + custody-steps disclosure
      · Empty/in-flight panel ("os detetives estão lendo o corpus")
      · Failure panel surfacing error + partial outputs

Inline chat-bubble card (components/chat-bubble.tsx):
  - ToolTrace.richRender recognises request_investigation results and
    renders a detective banner with status + ETA + link to /jobs/[id]
    (target=_blank). Error case renders a red strip with the message.

UX flow now: user asks Sherlock a question → request_investigation
queues the job → chat card shows "🔎 Holmes · hypothesis_tournament ·
ETA ~60s" → user clicks → /jobs/<id> live-updates → 60 s later, 2-3
rival hypotheses + their arguments + chunk citations are rendered with
Bayesian update visible.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:26:18 -03:00

884 lines
31 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Sherlock's tool kit — OpenAI-style function-calling schema + local handlers.
*
* Each tool has:
* - definition: JSON Schema sent to the model
* - handler: Node function that runs locally and returns a JSON-serializable result
*
* Tools called by the model trigger AG-UI events streamed to the frontend
* (tool_start, tool_result, navigate). The frontend renders these inline in
* the message AND, for `navigate_to`, can offer a clickable button to scroll
* the UI to a target page.
*
* Retrieval stack (chunks-aware):
* - hybrid_search → BM25 + dense (BGE-M3) + RRF + BGE-Reranker rerank
* - read_chunk → fetch a single chunk by chunk_id (cite-then-quote)
* - list_anomalies → all UFO/cryptid-flagged chunks (cheap, no LLM)
* - get_page_chunks → assemble one page from chunks
* Wiki-aware fallbacks (when DB not available or richer entity data needed):
* - read_page, read_document, read_entity, search_corpus (legacy grep)
* - navigate_to → emit clickable button to scroll UI
*/
import fs from "node:fs/promises";
import path from "node:path";
import {
WIKI,
readDocument,
readPage,
readEntity,
listDocuments,
listPages,
classKeyToFolder,
} from "../wiki";
import {
hybridSearch,
getChunk,
listAnomalies,
getPageChunks,
type ChunkHit,
} from "../retrieval/hybrid";
import {
findEntity,
getNeighbors,
findPaths,
getCoMentionChunks,
} from "../retrieval/graph";
import { pgQuery } from "../retrieval/db";
export interface ToolDefinition {
type: "function";
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface ToolHandlerContext {
/** Currently-viewed location, if any, to bias search. */
doc_id?: string | null;
page_id?: string | null;
/** UI language preference (pt | en). */
lang?: "pt" | "en";
/** Authenticated user's email — populated by /api/sessions/[id]/messages so
* tools that audit (e.g. request_investigation) can label `triggered_by`. */
user_email?: string | null;
/** Optional sink for inline AG-UI artifacts (citations, crops, entity cards).
* When provided, tools may push typed artifacts that the UI renders inline
* alongside the tool block. Safe to leave undefined for non-streaming callers. */
emitArtifact?: (artifact: import("./agui").Artifact) => void;
}
export interface ToolHandler {
(args: Record<string, unknown>, ctx: ToolHandlerContext): Promise<unknown>;
}
/* ─── Tool defs ─────────────────────────────────────────────────────────── */
const hybrid_search_tool: ToolDefinition = {
type: "function",
function: {
name: "hybrid_search",
description:
"PRIMARY semantic search over the entire UAP/UFO corpus chunks. " +
"Combines BM25 keyword recall + BGE-M3 dense embeddings + cross-encoder rerank. " +
"Returns up to top_k chunks with chunk_id, doc_id, page, bbox, text snippets, " +
"classification, and relevance score. Use this for any question about content. " +
"Filter with doc_id to scope to one document; type to restrict chunk type " +
"(paragraph, heading, stamp, etc.); ufo_only=true to retrieve only anomaly-flagged chunks.",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Natural language query, PT or EN." },
lang: { type: "string", enum: ["pt", "en"], description: "Search language (default pt)." },
doc_id: { type: "string", description: "Optional: restrict to one document." },
type: {
type: "string",
description:
"Optional chunk-type filter: paragraph, heading, table_marker, image, stamp, signature, " +
"address_block, classification_marking, redaction, footer, marginalia, form_field.",
},
classification: {
type: "string",
description: "Optional: SECRET, CONFIDENTIAL, RESTRICTED, NOFORN.",
},
ufo_only: { type: "boolean", description: "Only chunks flagged with UFO anomaly." },
top_k: { type: "integer", description: "Number of final results (default 20, max 50)." },
},
required: ["query"],
},
},
};
const read_chunk_tool: ToolDefinition = {
type: "function",
function: {
name: "read_chunk",
description:
"Read ONE chunk in full (verbatim text EN+PT, full bbox, metadata, anomaly flags). " +
"Use AFTER hybrid_search to expand a citation before quoting the user.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
chunk_id: { type: "string", description: "e.g. 'c0042'" },
},
required: ["doc_id", "chunk_id"],
},
},
};
const get_page_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "get_page_chunks",
description:
"Get all chunks of one page in reading order. Use to reconstruct a page or to " +
"answer 'what's on page N of doc X' questions with full structure.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "Page number (1-indexed)." },
},
required: ["doc_id", "page"],
},
},
};
const list_anomalies_tool: ToolDefinition = {
type: "function",
function: {
name: "list_anomalies",
description:
"List all chunks flagged with a UFO or cryptid anomaly. Cheap query (no embedding). " +
"Use for 'show me all sightings', 'all spherical objects', 'cryptid encounters'.",
parameters: {
type: "object",
properties: {
kind: { type: "string", enum: ["ufo", "cryptid"] },
doc_id: { type: "string", description: "Optional: restrict to one doc." },
limit: { type: "integer", description: "Max results (default 50)." },
},
required: ["kind"],
},
},
};
const read_page_tool: ToolDefinition = {
type: "function",
function: {
name: "read_page",
description:
"Read the legacy wiki page record for context (vision_description, " +
"entities_extracted, content_classification). Useful WHEN the doc isn't in the new " +
"chunk index yet OR you need page-level vision metadata. Prefer hybrid_search + " +
"read_chunk for content questions.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "string", description: "e.g. 'p007' or '7'." },
},
required: ["doc_id", "page"],
},
},
};
const read_document_tool: ToolDefinition = {
type: "function",
function: {
name: "read_document",
description:
"Get the consolidated overview of a document — summary, page index, " +
"content_classification, key entities.",
parameters: {
type: "object",
properties: { doc_id: { type: "string" } },
required: ["doc_id"],
},
},
};
const read_entity_tool: ToolDefinition = {
type: "function",
function: {
name: "read_entity",
description:
"Read the detail of an entity (person, organization, location, event, " +
"uap_object, vehicle, operation, concept) including enrichment from WebSearch.",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: [
"person",
"organization",
"location",
"event",
"uap_object",
"vehicle",
"operation",
"concept",
],
},
id: { type: "string", description: "kebab-case id, e.g. 'j-edgar-hoover'." },
},
required: ["class", "id"],
},
},
};
const search_corpus_tool: ToolDefinition = {
type: "function",
function: {
name: "search_corpus",
description:
"Legacy keyword-only search over document IDs, titles, and entity IDs. " +
"Prefer hybrid_search for content questions. Use this only to find entities/docs by name.",
parameters: {
type: "object",
properties: {
query: { type: "string" },
scope: { type: "string", enum: ["all", "documents", "entities"] },
},
required: ["query"],
},
},
};
const entity_neighbors_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_neighbors",
description:
"List entities co-mentioned with a given entity in the corpus chunks. " +
"Use to answer 'who/what is connected to X' questions. Returns up to " +
"limit neighbors sorted by edge weight (number of shared chunks).",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: ["person", "organization", "location", "event", "uap_object", "vehicle", "operation", "concept"],
},
id: { type: "string", description: "kebab-case id or canonical name." },
filter_classes: {
type: "array",
items: { type: "string" },
description: "Optional: restrict neighbors to these entity classes.",
},
limit: { type: "integer", description: "Max neighbors (default 30, max 100)." },
},
required: ["class", "id"],
},
},
};
const entity_path_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_path",
description:
"Find paths between two entities via shared chunks (multi-hop). Useful for " +
"'how is X connected to Y' or 'show the trail between Hoover and Project Sign'.",
parameters: {
type: "object",
properties: {
from_class: { type: "string" },
from_id: { type: "string" },
to_class: { type: "string" },
to_id: { type: "string" },
max_hops: { type: "integer", description: "1-4 (default 3)." },
},
required: ["from_class", "from_id", "to_class", "to_id"],
},
},
};
const co_mention_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "co_mention_chunks",
description:
"Return chunks where two specific entities both appear. Use after entity_neighbors " +
"to inspect the actual passages connecting them.",
parameters: {
type: "object",
properties: {
a_class: { type: "string" },
a_id: { type: "string" },
b_class: { type: "string" },
b_id: { type: "string" },
limit: { type: "integer", description: "Default 20, max 100." },
},
required: ["a_class", "a_id", "b_class", "b_id"],
},
},
};
const analyze_image_region_tool: ToolDefinition = {
type: "function",
function: {
name: "analyze_image_region",
description:
"Vision tool — answer a question about a cropped region of a document page. " +
"Use this when the user asks about a photograph, diagram, sketch, signature, " +
"stamp, redaction, or any visual element where the chunk's text description " +
"isn't enough. The model reads the actual pixels via Sonnet vision. " +
"Get the bbox + page from a prior hybrid_search hit (each chunk carries bbox). " +
"Cost: ~$0.005$0.02 per call. Use sparingly; prefer hybrid_search first.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "1-indexed page number" },
bbox: {
type: "object",
description: "Normalized bbox (0..1) of the region to analyze.",
properties: {
x: { type: "number" }, y: { type: "number" },
w: { type: "number" }, h: { type: "number" },
},
required: ["x", "y", "w", "h"],
},
question: { type: "string", description: "What you want to know about the image." },
context: { type: "string", description: "Optional: prose context that grounds the model." },
},
required: ["doc_id", "page", "bbox", "question"],
},
},
};
const request_investigation_tool: ToolDefinition = {
type: "function",
function: {
name: "request_investigation",
description:
"Queue a deeper investigation by the 8-detective Investigation Bureau. " +
"Use ONLY when the user asks for analysis that requires structured reasoning " +
"across multiple chunks — e.g. 'build rival hypotheses about X', " +
"'audit this doc for contradictions', 'trace the chain of custody for claim Y'. " +
"Do NOT use for plain lookups; hybrid_search is faster. " +
"kinds: hypothesis_tournament (Holmes — 2-3 rival hypotheses with priors/posteriors) | " +
"evidence_chain (Locard — verbatim evidence with chain_of_custody on N chunks of one doc). " +
"Returns { job_id, kind, status_url, eta_seconds }. The UI renders a status card " +
"with a link to /jobs/<job_id>; the worker takes ~30-120 seconds.",
parameters: {
type: "object",
properties: {
kind: {
type: "string",
enum: ["hypothesis_tournament", "evidence_chain"],
description: "Detective task kind.",
},
question: {
type: "string",
description:
"For hypothesis_tournament: the investigative question (one sentence, declarative). " +
"Required for hypothesis_tournament; ignored for evidence_chain.",
},
doc_id: {
type: "string",
description:
"Optional scope. For hypothesis_tournament: narrows the corpus shortlist. " +
"For evidence_chain: REQUIRED — the doc Locard scans.",
},
chunks: {
type: "array",
items: { type: "string" },
description:
"Optional for evidence_chain: list of chunk_ids to inspect. Defaults to the " +
"top 5 anomaly-flagged chunks in the doc.",
},
claim: {
type: "string",
description:
"Optional for evidence_chain: a specific claim Locard should look for support of.",
},
},
required: ["kind"],
},
},
};
const navigate_to_tool: ToolDefinition = {
type: "function",
function: {
name: "navigate_to",
description:
"Offer the user a clickable button to navigate the main UI to a specific " +
"doc, page, or chunk anchor. Target examples: '/d/<doc-id>', '/d/<doc-id>/p007', " +
"'/d/<doc-id>/p007#c0042'. Frontend renders the button — does NOT auto-redirect.",
parameters: {
type: "object",
properties: {
target: { type: "string" },
label: { type: "string", description: "Short button text (max 40 chars)." },
},
required: ["target", "label"],
},
},
};
export const TOOL_DEFINITIONS: ToolDefinition[] = [
hybrid_search_tool,
read_chunk_tool,
get_page_chunks_tool,
list_anomalies_tool,
entity_neighbors_tool,
entity_path_tool,
co_mention_chunks_tool,
read_page_tool,
read_document_tool,
read_entity_tool,
search_corpus_tool,
analyze_image_region_tool,
request_investigation_tool,
navigate_to_tool,
];
/* ─── Helpers ───────────────────────────────────────────────────────────── */
function pickLang(ctx: ToolHandlerContext, override?: unknown): "pt" | "en" {
if (override === "en" || override === "pt") return override;
return ctx.lang === "en" ? "en" : "pt";
}
function compactHit(h: ChunkHit, lang: "pt" | "en") {
const text = lang === "en" ? h.content_en : h.content_pt;
return {
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox,
snippet: (text || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
href: `/d/${h.doc_id}#${h.chunk_id}`,
};
}
function snippet(text: string, query: string, len = 200): string {
const lc = text.toLowerCase();
const q = query.toLowerCase().split(/\s+/).find((w) => w.length >= 3) ?? "";
const i = q ? lc.indexOf(q) : -1;
const start = i >= 0 ? Math.max(0, i - 60) : 0;
return text.slice(start, start + len).replace(/\s+/g, " ").trim();
}
/* ─── Tool handlers ─────────────────────────────────────────────────────── */
async function handleHybridSearch(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const query = String(args.query ?? "").trim();
if (!query) return { error: "empty_query", hits: [] };
const lang = pickLang(ctx, args.lang);
const top_k = Math.min(Number(args.top_k) || 20, 50);
try {
const hits = await hybridSearch({
query,
lang,
doc_id: (args.doc_id as string) || ctx.doc_id || null,
type: (args.type as string) || null,
classification: (args.classification as string) || null,
ufo_only: Boolean(args.ufo_only),
top_k,
// W2-TD#8: chat is latency-sensitive — skip rerank when ≤10 candidates.
// The model only cites the first few hits anyway and BGE-Reranker
// adds 5-8s on CPU. RRF order from the RPC is plenty for the head.
rerank_strategy: "when_top_k_gt",
rerank_threshold: 10,
});
// Emit one citation (+ optional crop_image) artifact per hit so the UI can
// render inline cards next to the assistant text. Limit to top 6 to avoid
// flooding the chat with crops when top_k is large.
if (ctx.emitArtifact) {
for (const h of hits.slice(0, 6)) {
ctx.emitArtifact({
kind: "citation",
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox ?? null,
snippet: ((lang === "en" ? h.content_en : h.content_pt) || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
});
if (h.bbox && h.bbox.w > 0 && h.bbox.h > 0) {
const bb = h.bbox;
const src =
`/api/crop?doc=${encodeURIComponent(h.doc_id)}` +
`&page=${h.page}&x=${bb.x}&y=${bb.y}&w=${bb.w}&h=${bb.h}&w_px=640`;
ctx.emitArtifact({
kind: "crop_image",
src,
doc_id: h.doc_id,
page: h.page,
chunk_id: h.chunk_id,
alt_en: (h.content_en || h.chunk_id).slice(0, 120),
alt_pt: (h.content_pt || h.chunk_id).slice(0, 120),
});
}
}
}
return { query, lang, count: hits.length, hits: hits.map((h) => compactHit(h, lang)) };
} catch (e) {
return {
error: "retrieval_unavailable",
message: (e as Error).message,
fallback: "use search_corpus (legacy keyword)",
};
}
}
async function handleReadChunk(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const chunk_id = String(args.chunk_id ?? "").trim();
if (!doc_id || !chunk_id) return { error: "missing_args" };
try {
const c = await getChunk(doc_id, chunk_id);
if (!c) return { error: "not_found", doc_id, chunk_id };
return {
chunk_id: c.chunk_id,
doc_id: c.doc_id,
page: c.page,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: c.content_en,
content_pt: c.content_pt,
href: `/d/${c.doc_id}#${c.chunk_id}`,
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleGetPageChunks(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
if (!doc_id || !Number.isFinite(page) || page < 1) return { error: "bad_args" };
try {
const chunks = await getPageChunks(doc_id, page);
return {
doc_id,
page,
count: chunks.length,
chunks: chunks.map((c) => ({
chunk_id: c.chunk_id,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: (c.content_en || "").slice(0, 500),
content_pt: (c.content_pt || "").slice(0, 500),
})),
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleListAnomalies(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = (args.kind as string) === "cryptid" ? "cryptid" : "ufo";
const doc_id = (args.doc_id as string) || ctx.doc_id || null;
const limit = Math.min(Number(args.limit) || 50, 200);
try {
const rows = await listAnomalies({ kind, doc_id, limit });
return { kind, doc_id, count: rows.length, anomalies: rows };
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleSearch(args: Record<string, unknown>): Promise<unknown> {
const query = String(args.query ?? "").trim();
const scope = (args.scope as string) ?? "all";
if (!query) return { error: "empty_query", hits: [] };
const ql = query.toLowerCase();
const hits: Array<{ type: string; id: string; title: string; snippet: string; href: string }> = [];
if (scope === "all" || scope === "documents") {
const ids = await listDocuments();
for (const id of ids) {
const f = await readDocument(id);
if (!f) continue;
const title = String(f.fm.canonical_title ?? id);
const hay = `${id} ${title} ${f.body.slice(0, 2000)}`.toLowerCase();
if (hay.includes(ql)) {
hits.push({
type: "document",
id,
title,
snippet: snippet(f.body, query),
href: `/d/${id}`,
});
}
if (hits.length >= 8) break;
}
}
if ((scope === "all" || scope === "entities") && hits.length < 8) {
const classes = ["people", "organizations", "locations", "events", "uap-objects", "vehicles", "operations", "concepts"];
for (const cls of classes) {
try {
const entries = await fs.readdir(path.join(WIKI, "entities", cls));
for (const file of entries) {
if (!file.endsWith(".md")) continue;
const id = file.replace(/\.md$/, "");
if (id.toLowerCase().includes(ql)) {
const content = await fs.readFile(path.join(WIKI, "entities", cls, file), "utf-8");
const cname = content.match(/canonical_name:\s*([^\n]+)/)?.[1]?.trim() ?? id;
hits.push({
type: cls.replace(/s$/, ""),
id,
title: cname,
snippet: id,
href: `/e/${cls}/${id}`,
});
if (hits.length >= 8) break;
}
}
} catch {
/* dir missing — fine */
}
if (hits.length >= 8) break;
}
}
return { query, scope, hits };
}
async function handleReadPage(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
let page = String(args.page ?? "").trim();
if (!/^p\d{3}$/.test(page)) {
const n = parseInt(page, 10);
if (!Number.isFinite(n)) return { error: "bad_page" };
page = `p${String(n).padStart(3, "0")}`;
}
const md = await readPage(doc_id, page);
if (!md) return { error: "not_found", doc_id, page };
return {
doc_id,
page,
page_type: md.fm.page_type,
language: md.fm.language_detected,
content_classification: md.fm.content_classification,
redactions_count: Array.isArray(md.fm.redactions) ? (md.fm.redactions as never[]).length : 0,
vision_description: md.fm.vision_description,
vision_description_pt_br: md.fm.vision_description_pt_br,
entities_extracted: md.fm.entities_extracted,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleReadDocument(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const md = await readDocument(doc_id);
if (!md) return { error: "not_found", doc_id };
const pages = await listPages(doc_id);
return {
doc_id,
canonical_title: md.fm.canonical_title,
collection: md.fm.collection,
document_class: md.fm.document_class,
page_count: pages.length,
pages_index: pages.slice(0, 20),
content_classification: md.fm.content_classification,
languages_detected: md.fm.languages_detected,
key_entities: md.fm.key_entities,
executive_summary: md.body.slice(0, 2000),
};
}
async function handleReadEntity(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
const folder = classKeyToFolder(cls);
if (!folder) return { error: "bad_class", cls };
const md = await readEntity(folder, id);
if (!md) return { error: "not_found", cls, id };
return {
class: folder,
id,
canonical_name: md.fm.canonical_name,
aliases: md.fm.aliases,
total_mentions: md.fm.total_mentions,
enrichment_status: md.fm.enrichment_status,
external_sources: md.fm.external_sources,
disambiguation_note: md.fm.disambiguation_note,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleEntityNeighbors(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
if (!cls || !id) return { error: "missing_args" };
try {
const ent = await findEntity(cls, id);
if (!ent) return { error: "entity_not_found", class: cls, id };
const filterClasses = (args.filter_classes as string[] | undefined)?.filter(Boolean);
const limit = Math.min(Number(args.limit) || 30, 100);
const neighbors = await getNeighbors(ent.entity_pk, { limit, classes: filterClasses });
return { entity: ent, count: neighbors.length, neighbors };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleEntityPath(args: Record<string, unknown>): Promise<unknown> {
const fromCls = String(args.from_class ?? "").trim();
const fromId = String(args.from_id ?? "").trim();
const toCls = String(args.to_class ?? "").trim();
const toId = String(args.to_id ?? "").trim();
const maxHops = Math.min(Number(args.max_hops) || 3, 4);
if (!fromCls || !fromId || !toCls || !toId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(fromCls, fromId), findEntity(toCls, toId)]);
if (!a) return { error: "from_not_found", class: fromCls, id: fromId };
if (!b) return { error: "to_not_found", class: toCls, id: toId };
const paths = await findPaths(a.entity_pk, b.entity_pk, maxHops);
return { from: a, to: b, max_hops: maxHops, paths };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleCoMentionChunks(args: Record<string, unknown>): Promise<unknown> {
const aCls = String(args.a_class ?? "").trim();
const aId = String(args.a_id ?? "").trim();
const bCls = String(args.b_class ?? "").trim();
const bId = String(args.b_id ?? "").trim();
const limit = Math.min(Number(args.limit) || 20, 100);
if (!aCls || !aId || !bCls || !bId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(aCls, aId), findEntity(bCls, bId)]);
if (!a || !b) return { error: "entity_not_found", a: aId, b: bId };
const chunks = await getCoMentionChunks(a.entity_pk, b.entity_pk, limit);
return { a, b, count: chunks.length, chunks };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleRequestInvestigation(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = String(args.kind ?? "").trim();
if (kind !== "hypothesis_tournament" && kind !== "evidence_chain") {
return { error: "bad_kind", message: "kind must be hypothesis_tournament or evidence_chain" };
}
const docArg = typeof args.doc_id === "string" && args.doc_id.trim()
? args.doc_id.trim() : ctx.doc_id || null;
const lang = pickLang(ctx);
const payload: Record<string, unknown> = {};
if (kind === "hypothesis_tournament") {
const question = String(args.question ?? "").trim();
if (!question) return { error: "question_required", message: "hypothesis_tournament needs a question" };
payload.question = question;
payload.lang = lang;
if (docArg) payload.doc_id = docArg;
} else {
if (!docArg) return { error: "doc_id_required", message: "evidence_chain needs a doc_id" };
payload.doc_id = docArg;
if (Array.isArray(args.chunks)) {
payload.chunks = (args.chunks as unknown[]).filter((c): c is string => typeof c === "string");
}
if (typeof args.claim === "string" && args.claim.trim()) payload.claim = args.claim.trim();
}
const triggered_by = ctx.user_email ? `user:${ctx.user_email}` : "user:anonymous";
// Investigation Bureau expected duration: Holmes ~60s, Locard ~30s × n_chunks.
const eta = kind === "hypothesis_tournament" ? 60 : 30 * 5;
try {
const rows = await pgQuery<{ job_id: string; created_at: string }>(
`INSERT INTO public.investigation_jobs (kind, payload, triggered_by, status)
VALUES ($1, $2::jsonb, $3, 'queued')
RETURNING job_id, created_at`,
[kind, JSON.stringify(payload), triggered_by],
);
const row = rows[0];
if (!row) return { error: "insert_failed" };
return {
job_id: row.job_id,
kind,
status: "queued",
eta_seconds: eta,
status_url: `/jobs/${row.job_id}`,
payload_summary: payload,
detective: kind === "hypothesis_tournament" ? "holmes" : "locard",
};
} catch (e) {
return { error: "db_unavailable", message: (e as Error).message };
}
}
async function handleNavigate(args: Record<string, unknown>): Promise<unknown> {
const target = String(args.target ?? "").trim();
const label = String(args.label ?? "").slice(0, 40);
if (!target.startsWith("/")) return { error: "target_must_start_with_slash", target };
return { ok: true, target, label };
}
async function handleAnalyzeImageRegion(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
const bbox = args.bbox as { x: number; y: number; w: number; h: number } | undefined;
const question = String(args.question ?? "").trim();
if (!doc_id || !page || !bbox || !question) return { error: "missing_args" };
try {
const { analyzeImageRegion } = await import("./vision");
const out = await analyzeImageRegion({
doc_id, page, bbox, question,
context: typeof args.context === "string" ? args.context : undefined,
lang: ctx.lang === "en" ? "en" : "pt",
});
if (ctx.emitArtifact) {
ctx.emitArtifact({
kind: "crop_image",
src: out.crop_url,
doc_id, page,
alt_en: question.slice(0, 120),
alt_pt: question.slice(0, 120),
});
}
return out;
} catch (e) {
return { error: "vision_failed", message: (e as Error).message };
}
}
export const TOOL_HANDLERS: Record<string, ToolHandler> = {
hybrid_search: handleHybridSearch,
read_chunk: handleReadChunk,
get_page_chunks: handleGetPageChunks,
list_anomalies: handleListAnomalies,
entity_neighbors: handleEntityNeighbors,
entity_path: handleEntityPath,
co_mention_chunks: handleCoMentionChunks,
read_page: handleReadPage,
read_document: handleReadDocument,
read_entity: handleReadEntity,
search_corpus: handleSearch,
analyze_image_region: handleAnalyzeImageRegion,
request_investigation: handleRequestInvestigation,
navigate_to: handleNavigate,
};