disclosure-bureau/web/lib/chat/tools.ts
Luiz Gustavo 5ac53cb3e2
Some checks failed
CI / Web — typecheck + lint + build (push) Failing after 39s
CI / Scripts — Python smoke (push) Failing after 4s
CI / Web — npm audit (push) Failing after 37s
CI / Retrieval — golden set (Recall@5 + MRR) (push) Failing after 4s
W3.7: Dupin contradiction-scan detective + UI integration
Adds the third AI detective in the Investigation Bureau runtime: C. Auguste
Dupin, who scans a corpus shortlist for pairs (or small groups) of chunks
that cannot both be true under any ordinary reading.

Runtime:
  - prompts/dupin.md — discipline (no contradiction without ≥2 distinct
    chunk_ids; reject same-vocabulary near-misses; FEW high-confidence
    over MANY weak ones; emit `NO_CONTRADICTIONS` when corpus is silent)
  - src/detectives/dupin.ts — hybridSearch with k=18 (more chunks than
    Holmes because contradictions emerge from comparing dispersed
    claims), strict JSON-array parsing, AT MOST 3 contradictions per call
  - src/tools/write_contradiction.ts — validates topic + ≥2 positions
    drawn from ≥2 distinct chunks, resolves chunk_pk via DB lookup
    (rejects positions citing unknown chunks), INSERTs into
    public.contradictions + writes case/contradictions/R-NNNN.md
  - orchestrator: new `contradiction_scan` kind dispatching to runDupin;
    payload { topic, doc_id?, lang?, context_chunks? }

Chat + UI:
  - request_investigation gains kind=contradiction_scan + topic arg;
    triggered detective auto-resolves to dupin
  - chat-bubble inline card renders dupin in orange (#ff8a4d) to
    distinguish from holmes (cyan) and locard (green)
  - /jobs/[id] page swaps title + subtitle + tone per detective;
    "Question" label becomes "Topic" for contradiction_scan
  - /api/jobs/[id] hydrates public.contradictions when outputs[] surfaces
    contradiction_ids
  - job-status-poller renders ContradictionCard: topic + N positions
    (verbatim statements quoted, stance label optional, link to source
    chunk) + optional notes panel, with resolution_status badge
    (open/resolved/irreconcilable)

R-NNNN shares the contradiction_id_seq slot with relation per
CLAUDE.md naming — same conceptual class (a connection between two
pieces of evidence in tension).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:34:04 -03:00

900 lines
32 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Sherlock's tool kit — OpenAI-style function-calling schema + local handlers.
*
* Each tool has:
* - definition: JSON Schema sent to the model
* - handler: Node function that runs locally and returns a JSON-serializable result
*
* Tools called by the model trigger AG-UI events streamed to the frontend
* (tool_start, tool_result, navigate). The frontend renders these inline in
* the message AND, for `navigate_to`, can offer a clickable button to scroll
* the UI to a target page.
*
* Retrieval stack (chunks-aware):
* - hybrid_search → BM25 + dense (BGE-M3) + RRF + BGE-Reranker rerank
* - read_chunk → fetch a single chunk by chunk_id (cite-then-quote)
* - list_anomalies → all UFO/cryptid-flagged chunks (cheap, no LLM)
* - get_page_chunks → assemble one page from chunks
* Wiki-aware fallbacks (when DB not available or richer entity data needed):
* - read_page, read_document, read_entity, search_corpus (legacy grep)
* - navigate_to → emit clickable button to scroll UI
*/
import fs from "node:fs/promises";
import path from "node:path";
import {
WIKI,
readDocument,
readPage,
readEntity,
listDocuments,
listPages,
classKeyToFolder,
} from "../wiki";
import {
hybridSearch,
getChunk,
listAnomalies,
getPageChunks,
type ChunkHit,
} from "../retrieval/hybrid";
import {
findEntity,
getNeighbors,
findPaths,
getCoMentionChunks,
} from "../retrieval/graph";
import { pgQuery } from "../retrieval/db";
export interface ToolDefinition {
type: "function";
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface ToolHandlerContext {
/** Currently-viewed location, if any, to bias search. */
doc_id?: string | null;
page_id?: string | null;
/** UI language preference (pt | en). */
lang?: "pt" | "en";
/** Authenticated user's email — populated by /api/sessions/[id]/messages so
* tools that audit (e.g. request_investigation) can label `triggered_by`. */
user_email?: string | null;
/** Optional sink for inline AG-UI artifacts (citations, crops, entity cards).
* When provided, tools may push typed artifacts that the UI renders inline
* alongside the tool block. Safe to leave undefined for non-streaming callers. */
emitArtifact?: (artifact: import("./agui").Artifact) => void;
}
export interface ToolHandler {
(args: Record<string, unknown>, ctx: ToolHandlerContext): Promise<unknown>;
}
/* ─── Tool defs ─────────────────────────────────────────────────────────── */
const hybrid_search_tool: ToolDefinition = {
type: "function",
function: {
name: "hybrid_search",
description:
"PRIMARY semantic search over the entire UAP/UFO corpus chunks. " +
"Combines BM25 keyword recall + BGE-M3 dense embeddings + cross-encoder rerank. " +
"Returns up to top_k chunks with chunk_id, doc_id, page, bbox, text snippets, " +
"classification, and relevance score. Use this for any question about content. " +
"Filter with doc_id to scope to one document; type to restrict chunk type " +
"(paragraph, heading, stamp, etc.); ufo_only=true to retrieve only anomaly-flagged chunks.",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Natural language query, PT or EN." },
lang: { type: "string", enum: ["pt", "en"], description: "Search language (default pt)." },
doc_id: { type: "string", description: "Optional: restrict to one document." },
type: {
type: "string",
description:
"Optional chunk-type filter: paragraph, heading, table_marker, image, stamp, signature, " +
"address_block, classification_marking, redaction, footer, marginalia, form_field.",
},
classification: {
type: "string",
description: "Optional: SECRET, CONFIDENTIAL, RESTRICTED, NOFORN.",
},
ufo_only: { type: "boolean", description: "Only chunks flagged with UFO anomaly." },
top_k: { type: "integer", description: "Number of final results (default 20, max 50)." },
},
required: ["query"],
},
},
};
const read_chunk_tool: ToolDefinition = {
type: "function",
function: {
name: "read_chunk",
description:
"Read ONE chunk in full (verbatim text EN+PT, full bbox, metadata, anomaly flags). " +
"Use AFTER hybrid_search to expand a citation before quoting the user.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
chunk_id: { type: "string", description: "e.g. 'c0042'" },
},
required: ["doc_id", "chunk_id"],
},
},
};
const get_page_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "get_page_chunks",
description:
"Get all chunks of one page in reading order. Use to reconstruct a page or to " +
"answer 'what's on page N of doc X' questions with full structure.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "Page number (1-indexed)." },
},
required: ["doc_id", "page"],
},
},
};
const list_anomalies_tool: ToolDefinition = {
type: "function",
function: {
name: "list_anomalies",
description:
"List all chunks flagged with a UFO or cryptid anomaly. Cheap query (no embedding). " +
"Use for 'show me all sightings', 'all spherical objects', 'cryptid encounters'.",
parameters: {
type: "object",
properties: {
kind: { type: "string", enum: ["ufo", "cryptid"] },
doc_id: { type: "string", description: "Optional: restrict to one doc." },
limit: { type: "integer", description: "Max results (default 50)." },
},
required: ["kind"],
},
},
};
const read_page_tool: ToolDefinition = {
type: "function",
function: {
name: "read_page",
description:
"Read the legacy wiki page record for context (vision_description, " +
"entities_extracted, content_classification). Useful WHEN the doc isn't in the new " +
"chunk index yet OR you need page-level vision metadata. Prefer hybrid_search + " +
"read_chunk for content questions.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "string", description: "e.g. 'p007' or '7'." },
},
required: ["doc_id", "page"],
},
},
};
const read_document_tool: ToolDefinition = {
type: "function",
function: {
name: "read_document",
description:
"Get the consolidated overview of a document — summary, page index, " +
"content_classification, key entities.",
parameters: {
type: "object",
properties: { doc_id: { type: "string" } },
required: ["doc_id"],
},
},
};
const read_entity_tool: ToolDefinition = {
type: "function",
function: {
name: "read_entity",
description:
"Read the detail of an entity (person, organization, location, event, " +
"uap_object, vehicle, operation, concept) including enrichment from WebSearch.",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: [
"person",
"organization",
"location",
"event",
"uap_object",
"vehicle",
"operation",
"concept",
],
},
id: { type: "string", description: "kebab-case id, e.g. 'j-edgar-hoover'." },
},
required: ["class", "id"],
},
},
};
const search_corpus_tool: ToolDefinition = {
type: "function",
function: {
name: "search_corpus",
description:
"Legacy keyword-only search over document IDs, titles, and entity IDs. " +
"Prefer hybrid_search for content questions. Use this only to find entities/docs by name.",
parameters: {
type: "object",
properties: {
query: { type: "string" },
scope: { type: "string", enum: ["all", "documents", "entities"] },
},
required: ["query"],
},
},
};
const entity_neighbors_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_neighbors",
description:
"List entities co-mentioned with a given entity in the corpus chunks. " +
"Use to answer 'who/what is connected to X' questions. Returns up to " +
"limit neighbors sorted by edge weight (number of shared chunks).",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: ["person", "organization", "location", "event", "uap_object", "vehicle", "operation", "concept"],
},
id: { type: "string", description: "kebab-case id or canonical name." },
filter_classes: {
type: "array",
items: { type: "string" },
description: "Optional: restrict neighbors to these entity classes.",
},
limit: { type: "integer", description: "Max neighbors (default 30, max 100)." },
},
required: ["class", "id"],
},
},
};
const entity_path_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_path",
description:
"Find paths between two entities via shared chunks (multi-hop). Useful for " +
"'how is X connected to Y' or 'show the trail between Hoover and Project Sign'.",
parameters: {
type: "object",
properties: {
from_class: { type: "string" },
from_id: { type: "string" },
to_class: { type: "string" },
to_id: { type: "string" },
max_hops: { type: "integer", description: "1-4 (default 3)." },
},
required: ["from_class", "from_id", "to_class", "to_id"],
},
},
};
const co_mention_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "co_mention_chunks",
description:
"Return chunks where two specific entities both appear. Use after entity_neighbors " +
"to inspect the actual passages connecting them.",
parameters: {
type: "object",
properties: {
a_class: { type: "string" },
a_id: { type: "string" },
b_class: { type: "string" },
b_id: { type: "string" },
limit: { type: "integer", description: "Default 20, max 100." },
},
required: ["a_class", "a_id", "b_class", "b_id"],
},
},
};
const analyze_image_region_tool: ToolDefinition = {
type: "function",
function: {
name: "analyze_image_region",
description:
"Vision tool — answer a question about a cropped region of a document page. " +
"Use this when the user asks about a photograph, diagram, sketch, signature, " +
"stamp, redaction, or any visual element where the chunk's text description " +
"isn't enough. The model reads the actual pixels via Sonnet vision. " +
"Get the bbox + page from a prior hybrid_search hit (each chunk carries bbox). " +
"Cost: ~$0.005$0.02 per call. Use sparingly; prefer hybrid_search first.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "1-indexed page number" },
bbox: {
type: "object",
description: "Normalized bbox (0..1) of the region to analyze.",
properties: {
x: { type: "number" }, y: { type: "number" },
w: { type: "number" }, h: { type: "number" },
},
required: ["x", "y", "w", "h"],
},
question: { type: "string", description: "What you want to know about the image." },
context: { type: "string", description: "Optional: prose context that grounds the model." },
},
required: ["doc_id", "page", "bbox", "question"],
},
},
};
const request_investigation_tool: ToolDefinition = {
type: "function",
function: {
name: "request_investigation",
description:
"Queue a deeper investigation by the Investigation Bureau detectives. " +
"Use ONLY when the user asks for analysis that requires structured reasoning " +
"across multiple chunks — e.g. 'build rival hypotheses about X', " +
"'find contradictions about Y', 'trace the chain of custody for claim Z'. " +
"Do NOT use for plain lookups; hybrid_search is faster. " +
"kinds: hypothesis_tournament (Holmes — 2-3 rival hypotheses with priors/posteriors) | " +
"evidence_chain (Locard — verbatim evidence with chain_of_custody on N chunks of one doc) | " +
"contradiction_scan (Dupin — pairs of chunks in irreconcilable tension on a topic). " +
"Returns { job_id, kind, status_url, eta_seconds }. The UI renders a status card " +
"with a link to /jobs/<job_id>; the worker takes ~30-120 seconds.",
parameters: {
type: "object",
properties: {
kind: {
type: "string",
enum: ["hypothesis_tournament", "evidence_chain", "contradiction_scan"],
description: "Detective task kind.",
},
question: {
type: "string",
description:
"For hypothesis_tournament: the investigative question (one sentence, declarative). " +
"Required for hypothesis_tournament; ignored for the other kinds.",
},
topic: {
type: "string",
description:
"For contradiction_scan: short noun-phrase naming the disputed point " +
"(e.g. 'date of the Roswell wreckage recovery'). Required for contradiction_scan; " +
"ignored for other kinds.",
},
doc_id: {
type: "string",
description:
"Optional scope. hypothesis_tournament / contradiction_scan: narrows the corpus " +
"shortlist. evidence_chain: REQUIRED — the doc Locard scans.",
},
chunks: {
type: "array",
items: { type: "string" },
description:
"Optional for evidence_chain: list of chunk_ids to inspect. Defaults to the " +
"top 5 anomaly-flagged chunks in the doc.",
},
claim: {
type: "string",
description:
"Optional for evidence_chain: a specific claim Locard should look for support of.",
},
},
required: ["kind"],
},
},
};
const navigate_to_tool: ToolDefinition = {
type: "function",
function: {
name: "navigate_to",
description:
"Offer the user a clickable button to navigate the main UI to a specific " +
"doc, page, or chunk anchor. Target examples: '/d/<doc-id>', '/d/<doc-id>/p007', " +
"'/d/<doc-id>/p007#c0042'. Frontend renders the button — does NOT auto-redirect.",
parameters: {
type: "object",
properties: {
target: { type: "string" },
label: { type: "string", description: "Short button text (max 40 chars)." },
},
required: ["target", "label"],
},
},
};
export const TOOL_DEFINITIONS: ToolDefinition[] = [
hybrid_search_tool,
read_chunk_tool,
get_page_chunks_tool,
list_anomalies_tool,
entity_neighbors_tool,
entity_path_tool,
co_mention_chunks_tool,
read_page_tool,
read_document_tool,
read_entity_tool,
search_corpus_tool,
analyze_image_region_tool,
request_investigation_tool,
navigate_to_tool,
];
/* ─── Helpers ───────────────────────────────────────────────────────────── */
function pickLang(ctx: ToolHandlerContext, override?: unknown): "pt" | "en" {
if (override === "en" || override === "pt") return override;
return ctx.lang === "en" ? "en" : "pt";
}
function compactHit(h: ChunkHit, lang: "pt" | "en") {
const text = lang === "en" ? h.content_en : h.content_pt;
return {
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox,
snippet: (text || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
href: `/d/${h.doc_id}#${h.chunk_id}`,
};
}
function snippet(text: string, query: string, len = 200): string {
const lc = text.toLowerCase();
const q = query.toLowerCase().split(/\s+/).find((w) => w.length >= 3) ?? "";
const i = q ? lc.indexOf(q) : -1;
const start = i >= 0 ? Math.max(0, i - 60) : 0;
return text.slice(start, start + len).replace(/\s+/g, " ").trim();
}
/* ─── Tool handlers ─────────────────────────────────────────────────────── */
async function handleHybridSearch(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const query = String(args.query ?? "").trim();
if (!query) return { error: "empty_query", hits: [] };
const lang = pickLang(ctx, args.lang);
const top_k = Math.min(Number(args.top_k) || 20, 50);
try {
const hits = await hybridSearch({
query,
lang,
doc_id: (args.doc_id as string) || ctx.doc_id || null,
type: (args.type as string) || null,
classification: (args.classification as string) || null,
ufo_only: Boolean(args.ufo_only),
top_k,
// W2-TD#8: chat is latency-sensitive — skip rerank when ≤10 candidates.
// The model only cites the first few hits anyway and BGE-Reranker
// adds 5-8s on CPU. RRF order from the RPC is plenty for the head.
rerank_strategy: "when_top_k_gt",
rerank_threshold: 10,
});
// Emit one citation (+ optional crop_image) artifact per hit so the UI can
// render inline cards next to the assistant text. Limit to top 6 to avoid
// flooding the chat with crops when top_k is large.
if (ctx.emitArtifact) {
for (const h of hits.slice(0, 6)) {
ctx.emitArtifact({
kind: "citation",
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox ?? null,
snippet: ((lang === "en" ? h.content_en : h.content_pt) || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
});
if (h.bbox && h.bbox.w > 0 && h.bbox.h > 0) {
const bb = h.bbox;
const src =
`/api/crop?doc=${encodeURIComponent(h.doc_id)}` +
`&page=${h.page}&x=${bb.x}&y=${bb.y}&w=${bb.w}&h=${bb.h}&w_px=640`;
ctx.emitArtifact({
kind: "crop_image",
src,
doc_id: h.doc_id,
page: h.page,
chunk_id: h.chunk_id,
alt_en: (h.content_en || h.chunk_id).slice(0, 120),
alt_pt: (h.content_pt || h.chunk_id).slice(0, 120),
});
}
}
}
return { query, lang, count: hits.length, hits: hits.map((h) => compactHit(h, lang)) };
} catch (e) {
return {
error: "retrieval_unavailable",
message: (e as Error).message,
fallback: "use search_corpus (legacy keyword)",
};
}
}
async function handleReadChunk(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const chunk_id = String(args.chunk_id ?? "").trim();
if (!doc_id || !chunk_id) return { error: "missing_args" };
try {
const c = await getChunk(doc_id, chunk_id);
if (!c) return { error: "not_found", doc_id, chunk_id };
return {
chunk_id: c.chunk_id,
doc_id: c.doc_id,
page: c.page,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: c.content_en,
content_pt: c.content_pt,
href: `/d/${c.doc_id}#${c.chunk_id}`,
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleGetPageChunks(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
if (!doc_id || !Number.isFinite(page) || page < 1) return { error: "bad_args" };
try {
const chunks = await getPageChunks(doc_id, page);
return {
doc_id,
page,
count: chunks.length,
chunks: chunks.map((c) => ({
chunk_id: c.chunk_id,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: (c.content_en || "").slice(0, 500),
content_pt: (c.content_pt || "").slice(0, 500),
})),
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleListAnomalies(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = (args.kind as string) === "cryptid" ? "cryptid" : "ufo";
const doc_id = (args.doc_id as string) || ctx.doc_id || null;
const limit = Math.min(Number(args.limit) || 50, 200);
try {
const rows = await listAnomalies({ kind, doc_id, limit });
return { kind, doc_id, count: rows.length, anomalies: rows };
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleSearch(args: Record<string, unknown>): Promise<unknown> {
const query = String(args.query ?? "").trim();
const scope = (args.scope as string) ?? "all";
if (!query) return { error: "empty_query", hits: [] };
const ql = query.toLowerCase();
const hits: Array<{ type: string; id: string; title: string; snippet: string; href: string }> = [];
if (scope === "all" || scope === "documents") {
const ids = await listDocuments();
for (const id of ids) {
const f = await readDocument(id);
if (!f) continue;
const title = String(f.fm.canonical_title ?? id);
const hay = `${id} ${title} ${f.body.slice(0, 2000)}`.toLowerCase();
if (hay.includes(ql)) {
hits.push({
type: "document",
id,
title,
snippet: snippet(f.body, query),
href: `/d/${id}`,
});
}
if (hits.length >= 8) break;
}
}
if ((scope === "all" || scope === "entities") && hits.length < 8) {
const classes = ["people", "organizations", "locations", "events", "uap-objects", "vehicles", "operations", "concepts"];
for (const cls of classes) {
try {
const entries = await fs.readdir(path.join(WIKI, "entities", cls));
for (const file of entries) {
if (!file.endsWith(".md")) continue;
const id = file.replace(/\.md$/, "");
if (id.toLowerCase().includes(ql)) {
const content = await fs.readFile(path.join(WIKI, "entities", cls, file), "utf-8");
const cname = content.match(/canonical_name:\s*([^\n]+)/)?.[1]?.trim() ?? id;
hits.push({
type: cls.replace(/s$/, ""),
id,
title: cname,
snippet: id,
href: `/e/${cls}/${id}`,
});
if (hits.length >= 8) break;
}
}
} catch {
/* dir missing — fine */
}
if (hits.length >= 8) break;
}
}
return { query, scope, hits };
}
async function handleReadPage(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
let page = String(args.page ?? "").trim();
if (!/^p\d{3}$/.test(page)) {
const n = parseInt(page, 10);
if (!Number.isFinite(n)) return { error: "bad_page" };
page = `p${String(n).padStart(3, "0")}`;
}
const md = await readPage(doc_id, page);
if (!md) return { error: "not_found", doc_id, page };
return {
doc_id,
page,
page_type: md.fm.page_type,
language: md.fm.language_detected,
content_classification: md.fm.content_classification,
redactions_count: Array.isArray(md.fm.redactions) ? (md.fm.redactions as never[]).length : 0,
vision_description: md.fm.vision_description,
vision_description_pt_br: md.fm.vision_description_pt_br,
entities_extracted: md.fm.entities_extracted,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleReadDocument(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const md = await readDocument(doc_id);
if (!md) return { error: "not_found", doc_id };
const pages = await listPages(doc_id);
return {
doc_id,
canonical_title: md.fm.canonical_title,
collection: md.fm.collection,
document_class: md.fm.document_class,
page_count: pages.length,
pages_index: pages.slice(0, 20),
content_classification: md.fm.content_classification,
languages_detected: md.fm.languages_detected,
key_entities: md.fm.key_entities,
executive_summary: md.body.slice(0, 2000),
};
}
async function handleReadEntity(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
const folder = classKeyToFolder(cls);
if (!folder) return { error: "bad_class", cls };
const md = await readEntity(folder, id);
if (!md) return { error: "not_found", cls, id };
return {
class: folder,
id,
canonical_name: md.fm.canonical_name,
aliases: md.fm.aliases,
total_mentions: md.fm.total_mentions,
enrichment_status: md.fm.enrichment_status,
external_sources: md.fm.external_sources,
disambiguation_note: md.fm.disambiguation_note,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleEntityNeighbors(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
if (!cls || !id) return { error: "missing_args" };
try {
const ent = await findEntity(cls, id);
if (!ent) return { error: "entity_not_found", class: cls, id };
const filterClasses = (args.filter_classes as string[] | undefined)?.filter(Boolean);
const limit = Math.min(Number(args.limit) || 30, 100);
const neighbors = await getNeighbors(ent.entity_pk, { limit, classes: filterClasses });
return { entity: ent, count: neighbors.length, neighbors };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleEntityPath(args: Record<string, unknown>): Promise<unknown> {
const fromCls = String(args.from_class ?? "").trim();
const fromId = String(args.from_id ?? "").trim();
const toCls = String(args.to_class ?? "").trim();
const toId = String(args.to_id ?? "").trim();
const maxHops = Math.min(Number(args.max_hops) || 3, 4);
if (!fromCls || !fromId || !toCls || !toId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(fromCls, fromId), findEntity(toCls, toId)]);
if (!a) return { error: "from_not_found", class: fromCls, id: fromId };
if (!b) return { error: "to_not_found", class: toCls, id: toId };
const paths = await findPaths(a.entity_pk, b.entity_pk, maxHops);
return { from: a, to: b, max_hops: maxHops, paths };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleCoMentionChunks(args: Record<string, unknown>): Promise<unknown> {
const aCls = String(args.a_class ?? "").trim();
const aId = String(args.a_id ?? "").trim();
const bCls = String(args.b_class ?? "").trim();
const bId = String(args.b_id ?? "").trim();
const limit = Math.min(Number(args.limit) || 20, 100);
if (!aCls || !aId || !bCls || !bId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(aCls, aId), findEntity(bCls, bId)]);
if (!a || !b) return { error: "entity_not_found", a: aId, b: bId };
const chunks = await getCoMentionChunks(a.entity_pk, b.entity_pk, limit);
return { a, b, count: chunks.length, chunks };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleRequestInvestigation(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = String(args.kind ?? "").trim();
if (kind !== "hypothesis_tournament" && kind !== "evidence_chain" && kind !== "contradiction_scan") {
return { error: "bad_kind", message: "kind must be hypothesis_tournament, evidence_chain or contradiction_scan" };
}
const docArg = typeof args.doc_id === "string" && args.doc_id.trim()
? args.doc_id.trim() : ctx.doc_id || null;
const lang = pickLang(ctx);
const payload: Record<string, unknown> = {};
if (kind === "hypothesis_tournament") {
const question = String(args.question ?? "").trim();
if (!question) return { error: "question_required", message: "hypothesis_tournament needs a question" };
payload.question = question;
payload.lang = lang;
if (docArg) payload.doc_id = docArg;
} else if (kind === "contradiction_scan") {
const topic = String(args.topic ?? "").trim();
if (!topic) return { error: "topic_required", message: "contradiction_scan needs a topic" };
payload.topic = topic;
payload.lang = lang;
if (docArg) payload.doc_id = docArg;
} else {
if (!docArg) return { error: "doc_id_required", message: "evidence_chain needs a doc_id" };
payload.doc_id = docArg;
if (Array.isArray(args.chunks)) {
payload.chunks = (args.chunks as unknown[]).filter((c): c is string => typeof c === "string");
}
if (typeof args.claim === "string" && args.claim.trim()) payload.claim = args.claim.trim();
}
const triggered_by = ctx.user_email ? `user:${ctx.user_email}` : "user:anonymous";
// Investigation Bureau expected duration: Holmes ~60s, Dupin ~60s, Locard ~30s × n_chunks.
const eta = kind === "evidence_chain" ? 30 * 5 : 60;
try {
const rows = await pgQuery<{ job_id: string; created_at: string }>(
`INSERT INTO public.investigation_jobs (kind, payload, triggered_by, status)
VALUES ($1, $2::jsonb, $3, 'queued')
RETURNING job_id, created_at`,
[kind, JSON.stringify(payload), triggered_by],
);
const row = rows[0];
if (!row) return { error: "insert_failed" };
return {
job_id: row.job_id,
kind,
status: "queued",
eta_seconds: eta,
status_url: `/jobs/${row.job_id}`,
payload_summary: payload,
detective: kind === "hypothesis_tournament" ? "holmes"
: kind === "contradiction_scan" ? "dupin"
: "locard",
};
} catch (e) {
return { error: "db_unavailable", message: (e as Error).message };
}
}
async function handleNavigate(args: Record<string, unknown>): Promise<unknown> {
const target = String(args.target ?? "").trim();
const label = String(args.label ?? "").slice(0, 40);
if (!target.startsWith("/")) return { error: "target_must_start_with_slash", target };
return { ok: true, target, label };
}
async function handleAnalyzeImageRegion(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
const bbox = args.bbox as { x: number; y: number; w: number; h: number } | undefined;
const question = String(args.question ?? "").trim();
if (!doc_id || !page || !bbox || !question) return { error: "missing_args" };
try {
const { analyzeImageRegion } = await import("./vision");
const out = await analyzeImageRegion({
doc_id, page, bbox, question,
context: typeof args.context === "string" ? args.context : undefined,
lang: ctx.lang === "en" ? "en" : "pt",
});
if (ctx.emitArtifact) {
ctx.emitArtifact({
kind: "crop_image",
src: out.crop_url,
doc_id, page,
alt_en: question.slice(0, 120),
alt_pt: question.slice(0, 120),
});
}
return out;
} catch (e) {
return { error: "vision_failed", message: (e as Error).message };
}
}
export const TOOL_HANDLERS: Record<string, ToolHandler> = {
hybrid_search: handleHybridSearch,
read_chunk: handleReadChunk,
get_page_chunks: handleGetPageChunks,
list_anomalies: handleListAnomalies,
entity_neighbors: handleEntityNeighbors,
entity_path: handleEntityPath,
co_mention_chunks: handleCoMentionChunks,
read_page: handleReadPage,
read_document: handleReadDocument,
read_entity: handleReadEntity,
search_corpus: handleSearch,
analyze_image_region: handleAnalyzeImageRegion,
request_investigation: handleRequestInvestigation,
navigate_to: handleNavigate,
};