disclosure-bureau/web/lib/chat/tools.ts
Luiz Gustavo eaf282c535
Some checks failed
CI / Web — typecheck + lint + build (push) Failing after 40s
CI / Scripts — Python smoke (push) Failing after 3s
CI / Web — npm audit (push) Failing after 29s
CI / Retrieval — golden set (Recall@5 + MRR) (push) Failing after 3s
W2: rerank opt-in, analyze_image_region tool, RAG eval, graph cleanup, ADRs
- TD#8 hybrid.ts: rerank_strategy {always|when_top_k_gt|never} + threshold
  (default skips rerank for top_k ≤ 15; chat tool uses threshold 10)
- O11 vision.ts + tools.ts: analyze_image_region tool — sharp-crops the
  bbox, claude CLI reads the temp PNG via Read tool, Sonnet vision answers
- TD#12 /graph: SigmaGraph replaces ForceGraphCanvas; react-force-graph-2d
  uninstalled (-37 transitive deps); force-graph-canvas.tsx deleted
- TD#27 messages/route.ts gatherContext slice sizes via CTX_* env vars
- TD#22 tests/rag/: golden.yaml (15 queries) + run.py (Recall@k + MRR +
  negative-pass rate) + baseline.json + CI job in .forgejo/workflows/ci.yml
- docs/adrs/: ADR-001..005 published from systems-atelier deliverables

Verified live on disclosure.top: top_k=5 path skips rerank (6.7s embed-only,
was 12-15s with rerank); rerank=always still available on demand.
First RAG baseline: Recall@5 = 0.2083, MRR = 0.25, Negative pass = 1.0.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 19:20:09 -03:00

771 lines
27 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Sherlock's tool kit — OpenAI-style function-calling schema + local handlers.
*
* Each tool has:
* - definition: JSON Schema sent to the model
* - handler: Node function that runs locally and returns a JSON-serializable result
*
* Tools called by the model trigger AG-UI events streamed to the frontend
* (tool_start, tool_result, navigate). The frontend renders these inline in
* the message AND, for `navigate_to`, can offer a clickable button to scroll
* the UI to a target page.
*
* Retrieval stack (chunks-aware):
* - hybrid_search → BM25 + dense (BGE-M3) + RRF + BGE-Reranker rerank
* - read_chunk → fetch a single chunk by chunk_id (cite-then-quote)
* - list_anomalies → all UFO/cryptid-flagged chunks (cheap, no LLM)
* - get_page_chunks → assemble one page from chunks
* Wiki-aware fallbacks (when DB not available or richer entity data needed):
* - read_page, read_document, read_entity, search_corpus (legacy grep)
* - navigate_to → emit clickable button to scroll UI
*/
import fs from "node:fs/promises";
import path from "node:path";
import {
WIKI,
readDocument,
readPage,
readEntity,
listDocuments,
listPages,
classKeyToFolder,
} from "../wiki";
import {
hybridSearch,
getChunk,
listAnomalies,
getPageChunks,
type ChunkHit,
} from "../retrieval/hybrid";
import {
findEntity,
getNeighbors,
findPaths,
getCoMentionChunks,
} from "../retrieval/graph";
export interface ToolDefinition {
type: "function";
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface ToolHandlerContext {
/** Currently-viewed location, if any, to bias search. */
doc_id?: string | null;
page_id?: string | null;
/** UI language preference (pt | en). */
lang?: "pt" | "en";
/** Optional sink for inline AG-UI artifacts (citations, crops, entity cards).
* When provided, tools may push typed artifacts that the UI renders inline
* alongside the tool block. Safe to leave undefined for non-streaming callers. */
emitArtifact?: (artifact: import("./agui").Artifact) => void;
}
export interface ToolHandler {
(args: Record<string, unknown>, ctx: ToolHandlerContext): Promise<unknown>;
}
/* ─── Tool defs ─────────────────────────────────────────────────────────── */
const hybrid_search_tool: ToolDefinition = {
type: "function",
function: {
name: "hybrid_search",
description:
"PRIMARY semantic search over the entire UAP/UFO corpus chunks. " +
"Combines BM25 keyword recall + BGE-M3 dense embeddings + cross-encoder rerank. " +
"Returns up to top_k chunks with chunk_id, doc_id, page, bbox, text snippets, " +
"classification, and relevance score. Use this for any question about content. " +
"Filter with doc_id to scope to one document; type to restrict chunk type " +
"(paragraph, heading, stamp, etc.); ufo_only=true to retrieve only anomaly-flagged chunks.",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Natural language query, PT or EN." },
lang: { type: "string", enum: ["pt", "en"], description: "Search language (default pt)." },
doc_id: { type: "string", description: "Optional: restrict to one document." },
type: {
type: "string",
description:
"Optional chunk-type filter: paragraph, heading, table_marker, image, stamp, signature, " +
"address_block, classification_marking, redaction, footer, marginalia, form_field.",
},
classification: {
type: "string",
description: "Optional: SECRET, CONFIDENTIAL, RESTRICTED, NOFORN.",
},
ufo_only: { type: "boolean", description: "Only chunks flagged with UFO anomaly." },
top_k: { type: "integer", description: "Number of final results (default 20, max 50)." },
},
required: ["query"],
},
},
};
const read_chunk_tool: ToolDefinition = {
type: "function",
function: {
name: "read_chunk",
description:
"Read ONE chunk in full (verbatim text EN+PT, full bbox, metadata, anomaly flags). " +
"Use AFTER hybrid_search to expand a citation before quoting the user.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
chunk_id: { type: "string", description: "e.g. 'c0042'" },
},
required: ["doc_id", "chunk_id"],
},
},
};
const get_page_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "get_page_chunks",
description:
"Get all chunks of one page in reading order. Use to reconstruct a page or to " +
"answer 'what's on page N of doc X' questions with full structure.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "Page number (1-indexed)." },
},
required: ["doc_id", "page"],
},
},
};
const list_anomalies_tool: ToolDefinition = {
type: "function",
function: {
name: "list_anomalies",
description:
"List all chunks flagged with a UFO or cryptid anomaly. Cheap query (no embedding). " +
"Use for 'show me all sightings', 'all spherical objects', 'cryptid encounters'.",
parameters: {
type: "object",
properties: {
kind: { type: "string", enum: ["ufo", "cryptid"] },
doc_id: { type: "string", description: "Optional: restrict to one doc." },
limit: { type: "integer", description: "Max results (default 50)." },
},
required: ["kind"],
},
},
};
const read_page_tool: ToolDefinition = {
type: "function",
function: {
name: "read_page",
description:
"Read the legacy wiki page record for context (vision_description, " +
"entities_extracted, content_classification). Useful WHEN the doc isn't in the new " +
"chunk index yet OR you need page-level vision metadata. Prefer hybrid_search + " +
"read_chunk for content questions.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "string", description: "e.g. 'p007' or '7'." },
},
required: ["doc_id", "page"],
},
},
};
const read_document_tool: ToolDefinition = {
type: "function",
function: {
name: "read_document",
description:
"Get the consolidated overview of a document — summary, page index, " +
"content_classification, key entities.",
parameters: {
type: "object",
properties: { doc_id: { type: "string" } },
required: ["doc_id"],
},
},
};
const read_entity_tool: ToolDefinition = {
type: "function",
function: {
name: "read_entity",
description:
"Read the detail of an entity (person, organization, location, event, " +
"uap_object, vehicle, operation, concept) including enrichment from WebSearch.",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: [
"person",
"organization",
"location",
"event",
"uap_object",
"vehicle",
"operation",
"concept",
],
},
id: { type: "string", description: "kebab-case id, e.g. 'j-edgar-hoover'." },
},
required: ["class", "id"],
},
},
};
const search_corpus_tool: ToolDefinition = {
type: "function",
function: {
name: "search_corpus",
description:
"Legacy keyword-only search over document IDs, titles, and entity IDs. " +
"Prefer hybrid_search for content questions. Use this only to find entities/docs by name.",
parameters: {
type: "object",
properties: {
query: { type: "string" },
scope: { type: "string", enum: ["all", "documents", "entities"] },
},
required: ["query"],
},
},
};
const entity_neighbors_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_neighbors",
description:
"List entities co-mentioned with a given entity in the corpus chunks. " +
"Use to answer 'who/what is connected to X' questions. Returns up to " +
"limit neighbors sorted by edge weight (number of shared chunks).",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: ["person", "organization", "location", "event", "uap_object", "vehicle", "operation", "concept"],
},
id: { type: "string", description: "kebab-case id or canonical name." },
filter_classes: {
type: "array",
items: { type: "string" },
description: "Optional: restrict neighbors to these entity classes.",
},
limit: { type: "integer", description: "Max neighbors (default 30, max 100)." },
},
required: ["class", "id"],
},
},
};
const entity_path_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_path",
description:
"Find paths between two entities via shared chunks (multi-hop). Useful for " +
"'how is X connected to Y' or 'show the trail between Hoover and Project Sign'.",
parameters: {
type: "object",
properties: {
from_class: { type: "string" },
from_id: { type: "string" },
to_class: { type: "string" },
to_id: { type: "string" },
max_hops: { type: "integer", description: "1-4 (default 3)." },
},
required: ["from_class", "from_id", "to_class", "to_id"],
},
},
};
const co_mention_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "co_mention_chunks",
description:
"Return chunks where two specific entities both appear. Use after entity_neighbors " +
"to inspect the actual passages connecting them.",
parameters: {
type: "object",
properties: {
a_class: { type: "string" },
a_id: { type: "string" },
b_class: { type: "string" },
b_id: { type: "string" },
limit: { type: "integer", description: "Default 20, max 100." },
},
required: ["a_class", "a_id", "b_class", "b_id"],
},
},
};
const analyze_image_region_tool: ToolDefinition = {
type: "function",
function: {
name: "analyze_image_region",
description:
"Vision tool — answer a question about a cropped region of a document page. " +
"Use this when the user asks about a photograph, diagram, sketch, signature, " +
"stamp, redaction, or any visual element where the chunk's text description " +
"isn't enough. The model reads the actual pixels via Sonnet vision. " +
"Get the bbox + page from a prior hybrid_search hit (each chunk carries bbox). " +
"Cost: ~$0.005$0.02 per call. Use sparingly; prefer hybrid_search first.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "1-indexed page number" },
bbox: {
type: "object",
description: "Normalized bbox (0..1) of the region to analyze.",
properties: {
x: { type: "number" }, y: { type: "number" },
w: { type: "number" }, h: { type: "number" },
},
required: ["x", "y", "w", "h"],
},
question: { type: "string", description: "What you want to know about the image." },
context: { type: "string", description: "Optional: prose context that grounds the model." },
},
required: ["doc_id", "page", "bbox", "question"],
},
},
};
const navigate_to_tool: ToolDefinition = {
type: "function",
function: {
name: "navigate_to",
description:
"Offer the user a clickable button to navigate the main UI to a specific " +
"doc, page, or chunk anchor. Target examples: '/d/<doc-id>', '/d/<doc-id>/p007', " +
"'/d/<doc-id>/p007#c0042'. Frontend renders the button — does NOT auto-redirect.",
parameters: {
type: "object",
properties: {
target: { type: "string" },
label: { type: "string", description: "Short button text (max 40 chars)." },
},
required: ["target", "label"],
},
},
};
export const TOOL_DEFINITIONS: ToolDefinition[] = [
hybrid_search_tool,
read_chunk_tool,
get_page_chunks_tool,
list_anomalies_tool,
entity_neighbors_tool,
entity_path_tool,
co_mention_chunks_tool,
read_page_tool,
read_document_tool,
read_entity_tool,
search_corpus_tool,
analyze_image_region_tool,
navigate_to_tool,
];
/* ─── Helpers ───────────────────────────────────────────────────────────── */
function pickLang(ctx: ToolHandlerContext, override?: unknown): "pt" | "en" {
if (override === "en" || override === "pt") return override;
return ctx.lang === "en" ? "en" : "pt";
}
function compactHit(h: ChunkHit, lang: "pt" | "en") {
const text = lang === "en" ? h.content_en : h.content_pt;
return {
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox,
snippet: (text || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
href: `/d/${h.doc_id}#${h.chunk_id}`,
};
}
function snippet(text: string, query: string, len = 200): string {
const lc = text.toLowerCase();
const q = query.toLowerCase().split(/\s+/).find((w) => w.length >= 3) ?? "";
const i = q ? lc.indexOf(q) : -1;
const start = i >= 0 ? Math.max(0, i - 60) : 0;
return text.slice(start, start + len).replace(/\s+/g, " ").trim();
}
/* ─── Tool handlers ─────────────────────────────────────────────────────── */
async function handleHybridSearch(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const query = String(args.query ?? "").trim();
if (!query) return { error: "empty_query", hits: [] };
const lang = pickLang(ctx, args.lang);
const top_k = Math.min(Number(args.top_k) || 20, 50);
try {
const hits = await hybridSearch({
query,
lang,
doc_id: (args.doc_id as string) || ctx.doc_id || null,
type: (args.type as string) || null,
classification: (args.classification as string) || null,
ufo_only: Boolean(args.ufo_only),
top_k,
// W2-TD#8: chat is latency-sensitive — skip rerank when ≤10 candidates.
// The model only cites the first few hits anyway and BGE-Reranker
// adds 5-8s on CPU. RRF order from the RPC is plenty for the head.
rerank_strategy: "when_top_k_gt",
rerank_threshold: 10,
});
// Emit one citation (+ optional crop_image) artifact per hit so the UI can
// render inline cards next to the assistant text. Limit to top 6 to avoid
// flooding the chat with crops when top_k is large.
if (ctx.emitArtifact) {
for (const h of hits.slice(0, 6)) {
ctx.emitArtifact({
kind: "citation",
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox ?? null,
snippet: ((lang === "en" ? h.content_en : h.content_pt) || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
});
if (h.bbox && h.bbox.w > 0 && h.bbox.h > 0) {
const bb = h.bbox;
const src =
`/api/crop?doc=${encodeURIComponent(h.doc_id)}` +
`&page=${h.page}&x=${bb.x}&y=${bb.y}&w=${bb.w}&h=${bb.h}&w_px=640`;
ctx.emitArtifact({
kind: "crop_image",
src,
doc_id: h.doc_id,
page: h.page,
chunk_id: h.chunk_id,
alt_en: (h.content_en || h.chunk_id).slice(0, 120),
alt_pt: (h.content_pt || h.chunk_id).slice(0, 120),
});
}
}
}
return { query, lang, count: hits.length, hits: hits.map((h) => compactHit(h, lang)) };
} catch (e) {
return {
error: "retrieval_unavailable",
message: (e as Error).message,
fallback: "use search_corpus (legacy keyword)",
};
}
}
async function handleReadChunk(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const chunk_id = String(args.chunk_id ?? "").trim();
if (!doc_id || !chunk_id) return { error: "missing_args" };
try {
const c = await getChunk(doc_id, chunk_id);
if (!c) return { error: "not_found", doc_id, chunk_id };
return {
chunk_id: c.chunk_id,
doc_id: c.doc_id,
page: c.page,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: c.content_en,
content_pt: c.content_pt,
href: `/d/${c.doc_id}#${c.chunk_id}`,
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleGetPageChunks(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
if (!doc_id || !Number.isFinite(page) || page < 1) return { error: "bad_args" };
try {
const chunks = await getPageChunks(doc_id, page);
return {
doc_id,
page,
count: chunks.length,
chunks: chunks.map((c) => ({
chunk_id: c.chunk_id,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: (c.content_en || "").slice(0, 500),
content_pt: (c.content_pt || "").slice(0, 500),
})),
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleListAnomalies(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = (args.kind as string) === "cryptid" ? "cryptid" : "ufo";
const doc_id = (args.doc_id as string) || ctx.doc_id || null;
const limit = Math.min(Number(args.limit) || 50, 200);
try {
const rows = await listAnomalies({ kind, doc_id, limit });
return { kind, doc_id, count: rows.length, anomalies: rows };
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleSearch(args: Record<string, unknown>): Promise<unknown> {
const query = String(args.query ?? "").trim();
const scope = (args.scope as string) ?? "all";
if (!query) return { error: "empty_query", hits: [] };
const ql = query.toLowerCase();
const hits: Array<{ type: string; id: string; title: string; snippet: string; href: string }> = [];
if (scope === "all" || scope === "documents") {
const ids = await listDocuments();
for (const id of ids) {
const f = await readDocument(id);
if (!f) continue;
const title = String(f.fm.canonical_title ?? id);
const hay = `${id} ${title} ${f.body.slice(0, 2000)}`.toLowerCase();
if (hay.includes(ql)) {
hits.push({
type: "document",
id,
title,
snippet: snippet(f.body, query),
href: `/d/${id}`,
});
}
if (hits.length >= 8) break;
}
}
if ((scope === "all" || scope === "entities") && hits.length < 8) {
const classes = ["people", "organizations", "locations", "events", "uap-objects", "vehicles", "operations", "concepts"];
for (const cls of classes) {
try {
const entries = await fs.readdir(path.join(WIKI, "entities", cls));
for (const file of entries) {
if (!file.endsWith(".md")) continue;
const id = file.replace(/\.md$/, "");
if (id.toLowerCase().includes(ql)) {
const content = await fs.readFile(path.join(WIKI, "entities", cls, file), "utf-8");
const cname = content.match(/canonical_name:\s*([^\n]+)/)?.[1]?.trim() ?? id;
hits.push({
type: cls.replace(/s$/, ""),
id,
title: cname,
snippet: id,
href: `/e/${cls}/${id}`,
});
if (hits.length >= 8) break;
}
}
} catch {
/* dir missing — fine */
}
if (hits.length >= 8) break;
}
}
return { query, scope, hits };
}
async function handleReadPage(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
let page = String(args.page ?? "").trim();
if (!/^p\d{3}$/.test(page)) {
const n = parseInt(page, 10);
if (!Number.isFinite(n)) return { error: "bad_page" };
page = `p${String(n).padStart(3, "0")}`;
}
const md = await readPage(doc_id, page);
if (!md) return { error: "not_found", doc_id, page };
return {
doc_id,
page,
page_type: md.fm.page_type,
language: md.fm.language_detected,
content_classification: md.fm.content_classification,
redactions_count: Array.isArray(md.fm.redactions) ? (md.fm.redactions as never[]).length : 0,
vision_description: md.fm.vision_description,
vision_description_pt_br: md.fm.vision_description_pt_br,
entities_extracted: md.fm.entities_extracted,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleReadDocument(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const md = await readDocument(doc_id);
if (!md) return { error: "not_found", doc_id };
const pages = await listPages(doc_id);
return {
doc_id,
canonical_title: md.fm.canonical_title,
collection: md.fm.collection,
document_class: md.fm.document_class,
page_count: pages.length,
pages_index: pages.slice(0, 20),
content_classification: md.fm.content_classification,
languages_detected: md.fm.languages_detected,
key_entities: md.fm.key_entities,
executive_summary: md.body.slice(0, 2000),
};
}
async function handleReadEntity(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
const folder = classKeyToFolder(cls);
if (!folder) return { error: "bad_class", cls };
const md = await readEntity(folder, id);
if (!md) return { error: "not_found", cls, id };
return {
class: folder,
id,
canonical_name: md.fm.canonical_name,
aliases: md.fm.aliases,
total_mentions: md.fm.total_mentions,
enrichment_status: md.fm.enrichment_status,
external_sources: md.fm.external_sources,
disambiguation_note: md.fm.disambiguation_note,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleEntityNeighbors(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
if (!cls || !id) return { error: "missing_args" };
try {
const ent = await findEntity(cls, id);
if (!ent) return { error: "entity_not_found", class: cls, id };
const filterClasses = (args.filter_classes as string[] | undefined)?.filter(Boolean);
const limit = Math.min(Number(args.limit) || 30, 100);
const neighbors = await getNeighbors(ent.entity_pk, { limit, classes: filterClasses });
return { entity: ent, count: neighbors.length, neighbors };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleEntityPath(args: Record<string, unknown>): Promise<unknown> {
const fromCls = String(args.from_class ?? "").trim();
const fromId = String(args.from_id ?? "").trim();
const toCls = String(args.to_class ?? "").trim();
const toId = String(args.to_id ?? "").trim();
const maxHops = Math.min(Number(args.max_hops) || 3, 4);
if (!fromCls || !fromId || !toCls || !toId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(fromCls, fromId), findEntity(toCls, toId)]);
if (!a) return { error: "from_not_found", class: fromCls, id: fromId };
if (!b) return { error: "to_not_found", class: toCls, id: toId };
const paths = await findPaths(a.entity_pk, b.entity_pk, maxHops);
return { from: a, to: b, max_hops: maxHops, paths };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleCoMentionChunks(args: Record<string, unknown>): Promise<unknown> {
const aCls = String(args.a_class ?? "").trim();
const aId = String(args.a_id ?? "").trim();
const bCls = String(args.b_class ?? "").trim();
const bId = String(args.b_id ?? "").trim();
const limit = Math.min(Number(args.limit) || 20, 100);
if (!aCls || !aId || !bCls || !bId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(aCls, aId), findEntity(bCls, bId)]);
if (!a || !b) return { error: "entity_not_found", a: aId, b: bId };
const chunks = await getCoMentionChunks(a.entity_pk, b.entity_pk, limit);
return { a, b, count: chunks.length, chunks };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleNavigate(args: Record<string, unknown>): Promise<unknown> {
const target = String(args.target ?? "").trim();
const label = String(args.label ?? "").slice(0, 40);
if (!target.startsWith("/")) return { error: "target_must_start_with_slash", target };
return { ok: true, target, label };
}
async function handleAnalyzeImageRegion(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
const bbox = args.bbox as { x: number; y: number; w: number; h: number } | undefined;
const question = String(args.question ?? "").trim();
if (!doc_id || !page || !bbox || !question) return { error: "missing_args" };
try {
const { analyzeImageRegion } = await import("./vision");
const out = await analyzeImageRegion({
doc_id, page, bbox, question,
context: typeof args.context === "string" ? args.context : undefined,
lang: ctx.lang === "en" ? "en" : "pt",
});
if (ctx.emitArtifact) {
ctx.emitArtifact({
kind: "crop_image",
src: out.crop_url,
doc_id, page,
alt_en: question.slice(0, 120),
alt_pt: question.slice(0, 120),
});
}
return out;
} catch (e) {
return { error: "vision_failed", message: (e as Error).message };
}
}
export const TOOL_HANDLERS: Record<string, ToolHandler> = {
hybrid_search: handleHybridSearch,
read_chunk: handleReadChunk,
get_page_chunks: handleGetPageChunks,
list_anomalies: handleListAnomalies,
entity_neighbors: handleEntityNeighbors,
entity_path: handleEntityPath,
co_mention_chunks: handleCoMentionChunks,
read_page: handleReadPage,
read_document: handleReadDocument,
read_entity: handleReadEntity,
search_corpus: handleSearch,
analyze_image_region: handleAnalyzeImageRegion,
navigate_to: handleNavigate,
};