disclosure-bureau/web/lib/chat/tools.ts

701 lines
24 KiB
TypeScript
Raw Normal View History

/**
* Sherlock's tool kit OpenAI-style function-calling schema + local handlers.
*
* Each tool has:
* - definition: JSON Schema sent to the model
* - handler: Node function that runs locally and returns a JSON-serializable result
*
* Tools called by the model trigger AG-UI events streamed to the frontend
* (tool_start, tool_result, navigate). The frontend renders these inline in
* the message AND, for `navigate_to`, can offer a clickable button to scroll
* the UI to a target page.
*
* Retrieval stack (chunks-aware):
* - hybrid_search BM25 + dense (BGE-M3) + RRF + BGE-Reranker rerank
* - read_chunk fetch a single chunk by chunk_id (cite-then-quote)
* - list_anomalies all UFO/cryptid-flagged chunks (cheap, no LLM)
* - get_page_chunks assemble one page from chunks
* Wiki-aware fallbacks (when DB not available or richer entity data needed):
* - read_page, read_document, read_entity, search_corpus (legacy grep)
* - navigate_to emit clickable button to scroll UI
*/
import fs from "node:fs/promises";
import path from "node:path";
import {
WIKI,
readDocument,
readPage,
readEntity,
listDocuments,
listPages,
classKeyToFolder,
} from "../wiki";
import {
hybridSearch,
getChunk,
listAnomalies,
getPageChunks,
type ChunkHit,
} from "../retrieval/hybrid";
import {
findEntity,
getNeighbors,
findPaths,
getCoMentionChunks,
} from "../retrieval/graph";
export interface ToolDefinition {
type: "function";
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface ToolHandlerContext {
/** Currently-viewed location, if any, to bias search. */
doc_id?: string | null;
page_id?: string | null;
/** UI language preference (pt | en). */
lang?: "pt" | "en";
ship: synthesize 158 entities, AG-UI artifacts, chat persistence, auth flow Fase 3 onda 2 — entity synthesis at scale: - scripts/synthesize/20_entity_summary.py: queries DB for entities with total_mentions ≥ threshold + top-K verbatim chunk snippets via entity_mentions JOIN, prompts Sonnet (Holmes-Watson voice, bilingual), writes narrative_summary EN+PT-BR + summary_status=synthesized. Ran on 187 candidates (mentions ≥ 20) → 158 OK · 1 err · 29 skipped (no snippets). Combined with anchor curation: 20 curated + 158 synthesized = 178 entities with real narrative (vs 0 a day ago). Fase 4 — chat with typed artifacts + persistence: - lib/chat/agui.ts: AG-UI v1 typed Artifact union (citation, crop_image, entity_card, evidence_card, hypothesis_card, case_card, navigation_offer) alongside the existing event types. - lib/chat/tools.ts + openrouter.ts: hybrid_search emits up to 6 citation + crop_image artifacts per query. Provider collects them and returns in done.artifacts so the route can persist. - api/sessions/[id]/messages: persist artifacts to messages.citations. - components/chat-bubble.tsx: ArtifactCard renders inline cards (citation, crop_image, entity_card, navigation_offer) for streamed and persisted messages. activeId now persisted in localStorage so navigation between pages keeps the same conversation. New sessions are lazy (only when user has zero). loadMessages hydrates tools + artifacts from server. CRUD UI: rename (✎) + archive (🗑) buttons per session in the list. Home search: - doc-list-filters: input now fires hybrid_search (rerank=0 for speed) in parallel with the local title filter; chunk hits render above the doc grid with snippet + score + classification. - api/search/hybrid: accept ?rerank=0 to skip the cross-encoder (1.3s vs 60s). Auth flow: - infra: SMTP_HOST=mail.spacemail.com:587 + DMARC published; mail now lands in inbox. GOTRUE_MAILER_AUTOCONFIRM=false (real email verification). - kong.yml: proxy /auth/callback on api.disclosure.top → web:3000 so PKCE email links don't 404 at the gateway. - web/app/auth/callback: handle both ?code= (OAuth) and ?token=&type= (PKCE); redirect to the public site host before verifyOtp so the session cookie lands on the right domain. Audit deliverables: - .nirvana/outputs/disclosure-bureau/.../systems-atelier/: 5 docs (code analysis, tech debt, discovery brief, system arch, 5 ADRs) authored by sa-principal that produced this roadmap. Kept in-tree for traceability.
2026-05-18 06:52:59 +00:00
/** Optional sink for inline AG-UI artifacts (citations, crops, entity cards).
* When provided, tools may push typed artifacts that the UI renders inline
* alongside the tool block. Safe to leave undefined for non-streaming callers. */
emitArtifact?: (artifact: import("./agui").Artifact) => void;
}
export interface ToolHandler {
(args: Record<string, unknown>, ctx: ToolHandlerContext): Promise<unknown>;
}
/* ─── Tool defs ─────────────────────────────────────────────────────────── */
const hybrid_search_tool: ToolDefinition = {
type: "function",
function: {
name: "hybrid_search",
description:
"PRIMARY semantic search over the entire UAP/UFO corpus chunks. " +
"Combines BM25 keyword recall + BGE-M3 dense embeddings + cross-encoder rerank. " +
"Returns up to top_k chunks with chunk_id, doc_id, page, bbox, text snippets, " +
"classification, and relevance score. Use this for any question about content. " +
"Filter with doc_id to scope to one document; type to restrict chunk type " +
"(paragraph, heading, stamp, etc.); ufo_only=true to retrieve only anomaly-flagged chunks.",
parameters: {
type: "object",
properties: {
query: { type: "string", description: "Natural language query, PT or EN." },
lang: { type: "string", enum: ["pt", "en"], description: "Search language (default pt)." },
doc_id: { type: "string", description: "Optional: restrict to one document." },
type: {
type: "string",
description:
"Optional chunk-type filter: paragraph, heading, table_marker, image, stamp, signature, " +
"address_block, classification_marking, redaction, footer, marginalia, form_field.",
},
classification: {
type: "string",
description: "Optional: SECRET, CONFIDENTIAL, RESTRICTED, NOFORN.",
},
ufo_only: { type: "boolean", description: "Only chunks flagged with UFO anomaly." },
top_k: { type: "integer", description: "Number of final results (default 20, max 50)." },
},
required: ["query"],
},
},
};
const read_chunk_tool: ToolDefinition = {
type: "function",
function: {
name: "read_chunk",
description:
"Read ONE chunk in full (verbatim text EN+PT, full bbox, metadata, anomaly flags). " +
"Use AFTER hybrid_search to expand a citation before quoting the user.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
chunk_id: { type: "string", description: "e.g. 'c0042'" },
},
required: ["doc_id", "chunk_id"],
},
},
};
const get_page_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "get_page_chunks",
description:
"Get all chunks of one page in reading order. Use to reconstruct a page or to " +
"answer 'what's on page N of doc X' questions with full structure.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "integer", description: "Page number (1-indexed)." },
},
required: ["doc_id", "page"],
},
},
};
const list_anomalies_tool: ToolDefinition = {
type: "function",
function: {
name: "list_anomalies",
description:
"List all chunks flagged with a UFO or cryptid anomaly. Cheap query (no embedding). " +
"Use for 'show me all sightings', 'all spherical objects', 'cryptid encounters'.",
parameters: {
type: "object",
properties: {
kind: { type: "string", enum: ["ufo", "cryptid"] },
doc_id: { type: "string", description: "Optional: restrict to one doc." },
limit: { type: "integer", description: "Max results (default 50)." },
},
required: ["kind"],
},
},
};
const read_page_tool: ToolDefinition = {
type: "function",
function: {
name: "read_page",
description:
"Read the legacy wiki page record for context (vision_description, " +
"entities_extracted, content_classification). Useful WHEN the doc isn't in the new " +
"chunk index yet OR you need page-level vision metadata. Prefer hybrid_search + " +
"read_chunk for content questions.",
parameters: {
type: "object",
properties: {
doc_id: { type: "string" },
page: { type: "string", description: "e.g. 'p007' or '7'." },
},
required: ["doc_id", "page"],
},
},
};
const read_document_tool: ToolDefinition = {
type: "function",
function: {
name: "read_document",
description:
"Get the consolidated overview of a document — summary, page index, " +
"content_classification, key entities.",
parameters: {
type: "object",
properties: { doc_id: { type: "string" } },
required: ["doc_id"],
},
},
};
const read_entity_tool: ToolDefinition = {
type: "function",
function: {
name: "read_entity",
description:
"Read the detail of an entity (person, organization, location, event, " +
"uap_object, vehicle, operation, concept) including enrichment from WebSearch.",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: [
"person",
"organization",
"location",
"event",
"uap_object",
"vehicle",
"operation",
"concept",
],
},
id: { type: "string", description: "kebab-case id, e.g. 'j-edgar-hoover'." },
},
required: ["class", "id"],
},
},
};
const search_corpus_tool: ToolDefinition = {
type: "function",
function: {
name: "search_corpus",
description:
"Legacy keyword-only search over document IDs, titles, and entity IDs. " +
"Prefer hybrid_search for content questions. Use this only to find entities/docs by name.",
parameters: {
type: "object",
properties: {
query: { type: "string" },
scope: { type: "string", enum: ["all", "documents", "entities"] },
},
required: ["query"],
},
},
};
const entity_neighbors_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_neighbors",
description:
"List entities co-mentioned with a given entity in the corpus chunks. " +
"Use to answer 'who/what is connected to X' questions. Returns up to " +
"limit neighbors sorted by edge weight (number of shared chunks).",
parameters: {
type: "object",
properties: {
class: {
type: "string",
enum: ["person", "organization", "location", "event", "uap_object", "vehicle", "operation", "concept"],
},
id: { type: "string", description: "kebab-case id or canonical name." },
filter_classes: {
type: "array",
items: { type: "string" },
description: "Optional: restrict neighbors to these entity classes.",
},
limit: { type: "integer", description: "Max neighbors (default 30, max 100)." },
},
required: ["class", "id"],
},
},
};
const entity_path_tool: ToolDefinition = {
type: "function",
function: {
name: "entity_path",
description:
"Find paths between two entities via shared chunks (multi-hop). Useful for " +
"'how is X connected to Y' or 'show the trail between Hoover and Project Sign'.",
parameters: {
type: "object",
properties: {
from_class: { type: "string" },
from_id: { type: "string" },
to_class: { type: "string" },
to_id: { type: "string" },
max_hops: { type: "integer", description: "1-4 (default 3)." },
},
required: ["from_class", "from_id", "to_class", "to_id"],
},
},
};
const co_mention_chunks_tool: ToolDefinition = {
type: "function",
function: {
name: "co_mention_chunks",
description:
"Return chunks where two specific entities both appear. Use after entity_neighbors " +
"to inspect the actual passages connecting them.",
parameters: {
type: "object",
properties: {
a_class: { type: "string" },
a_id: { type: "string" },
b_class: { type: "string" },
b_id: { type: "string" },
limit: { type: "integer", description: "Default 20, max 100." },
},
required: ["a_class", "a_id", "b_class", "b_id"],
},
},
};
const navigate_to_tool: ToolDefinition = {
type: "function",
function: {
name: "navigate_to",
description:
"Offer the user a clickable button to navigate the main UI to a specific " +
"doc, page, or chunk anchor. Target examples: '/d/<doc-id>', '/d/<doc-id>/p007', " +
"'/d/<doc-id>/p007#c0042'. Frontend renders the button — does NOT auto-redirect.",
parameters: {
type: "object",
properties: {
target: { type: "string" },
label: { type: "string", description: "Short button text (max 40 chars)." },
},
required: ["target", "label"],
},
},
};
export const TOOL_DEFINITIONS: ToolDefinition[] = [
hybrid_search_tool,
read_chunk_tool,
get_page_chunks_tool,
list_anomalies_tool,
entity_neighbors_tool,
entity_path_tool,
co_mention_chunks_tool,
read_page_tool,
read_document_tool,
read_entity_tool,
search_corpus_tool,
navigate_to_tool,
];
/* ─── Helpers ───────────────────────────────────────────────────────────── */
function pickLang(ctx: ToolHandlerContext, override?: unknown): "pt" | "en" {
if (override === "en" || override === "pt") return override;
return ctx.lang === "en" ? "en" : "pt";
}
function compactHit(h: ChunkHit, lang: "pt" | "en") {
const text = lang === "en" ? h.content_en : h.content_pt;
return {
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox,
snippet: (text || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
href: `/d/${h.doc_id}#${h.chunk_id}`,
};
}
function snippet(text: string, query: string, len = 200): string {
const lc = text.toLowerCase();
const q = query.toLowerCase().split(/\s+/).find((w) => w.length >= 3) ?? "";
const i = q ? lc.indexOf(q) : -1;
const start = i >= 0 ? Math.max(0, i - 60) : 0;
return text.slice(start, start + len).replace(/\s+/g, " ").trim();
}
/* ─── Tool handlers ─────────────────────────────────────────────────────── */
async function handleHybridSearch(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const query = String(args.query ?? "").trim();
if (!query) return { error: "empty_query", hits: [] };
const lang = pickLang(ctx, args.lang);
const top_k = Math.min(Number(args.top_k) || 20, 50);
try {
const hits = await hybridSearch({
query,
lang,
doc_id: (args.doc_id as string) || ctx.doc_id || null,
type: (args.type as string) || null,
classification: (args.classification as string) || null,
ufo_only: Boolean(args.ufo_only),
top_k,
});
ship: synthesize 158 entities, AG-UI artifacts, chat persistence, auth flow Fase 3 onda 2 — entity synthesis at scale: - scripts/synthesize/20_entity_summary.py: queries DB for entities with total_mentions ≥ threshold + top-K verbatim chunk snippets via entity_mentions JOIN, prompts Sonnet (Holmes-Watson voice, bilingual), writes narrative_summary EN+PT-BR + summary_status=synthesized. Ran on 187 candidates (mentions ≥ 20) → 158 OK · 1 err · 29 skipped (no snippets). Combined with anchor curation: 20 curated + 158 synthesized = 178 entities with real narrative (vs 0 a day ago). Fase 4 — chat with typed artifacts + persistence: - lib/chat/agui.ts: AG-UI v1 typed Artifact union (citation, crop_image, entity_card, evidence_card, hypothesis_card, case_card, navigation_offer) alongside the existing event types. - lib/chat/tools.ts + openrouter.ts: hybrid_search emits up to 6 citation + crop_image artifacts per query. Provider collects them and returns in done.artifacts so the route can persist. - api/sessions/[id]/messages: persist artifacts to messages.citations. - components/chat-bubble.tsx: ArtifactCard renders inline cards (citation, crop_image, entity_card, navigation_offer) for streamed and persisted messages. activeId now persisted in localStorage so navigation between pages keeps the same conversation. New sessions are lazy (only when user has zero). loadMessages hydrates tools + artifacts from server. CRUD UI: rename (✎) + archive (🗑) buttons per session in the list. Home search: - doc-list-filters: input now fires hybrid_search (rerank=0 for speed) in parallel with the local title filter; chunk hits render above the doc grid with snippet + score + classification. - api/search/hybrid: accept ?rerank=0 to skip the cross-encoder (1.3s vs 60s). Auth flow: - infra: SMTP_HOST=mail.spacemail.com:587 + DMARC published; mail now lands in inbox. GOTRUE_MAILER_AUTOCONFIRM=false (real email verification). - kong.yml: proxy /auth/callback on api.disclosure.top → web:3000 so PKCE email links don't 404 at the gateway. - web/app/auth/callback: handle both ?code= (OAuth) and ?token=&type= (PKCE); redirect to the public site host before verifyOtp so the session cookie lands on the right domain. Audit deliverables: - .nirvana/outputs/disclosure-bureau/.../systems-atelier/: 5 docs (code analysis, tech debt, discovery brief, system arch, 5 ADRs) authored by sa-principal that produced this roadmap. Kept in-tree for traceability.
2026-05-18 06:52:59 +00:00
// Emit one citation (+ optional crop_image) artifact per hit so the UI can
// render inline cards next to the assistant text. Limit to top 6 to avoid
// flooding the chat with crops when top_k is large.
if (ctx.emitArtifact) {
for (const h of hits.slice(0, 6)) {
ctx.emitArtifact({
kind: "citation",
chunk_id: h.chunk_id,
doc_id: h.doc_id,
page: h.page,
type: h.type,
classification: h.classification,
bbox: h.bbox ?? null,
snippet: ((lang === "en" ? h.content_en : h.content_pt) || "").slice(0, 300),
score: Number((h.rerank_score ?? h.score).toFixed(4)),
});
if (h.bbox && h.bbox.w > 0 && h.bbox.h > 0) {
const bb = h.bbox;
const src =
`/api/crop?doc=${encodeURIComponent(h.doc_id)}` +
`&page=${h.page}&x=${bb.x}&y=${bb.y}&w=${bb.w}&h=${bb.h}&w_px=640`;
ctx.emitArtifact({
kind: "crop_image",
src,
doc_id: h.doc_id,
page: h.page,
chunk_id: h.chunk_id,
alt_en: (h.content_en || h.chunk_id).slice(0, 120),
alt_pt: (h.content_pt || h.chunk_id).slice(0, 120),
});
}
}
}
return { query, lang, count: hits.length, hits: hits.map((h) => compactHit(h, lang)) };
} catch (e) {
return {
error: "retrieval_unavailable",
message: (e as Error).message,
fallback: "use search_corpus (legacy keyword)",
};
}
}
async function handleReadChunk(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const chunk_id = String(args.chunk_id ?? "").trim();
if (!doc_id || !chunk_id) return { error: "missing_args" };
try {
const c = await getChunk(doc_id, chunk_id);
if (!c) return { error: "not_found", doc_id, chunk_id };
return {
chunk_id: c.chunk_id,
doc_id: c.doc_id,
page: c.page,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: c.content_en,
content_pt: c.content_pt,
href: `/d/${c.doc_id}#${c.chunk_id}`,
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleGetPageChunks(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const page = Number(args.page);
if (!doc_id || !Number.isFinite(page) || page < 1) return { error: "bad_args" };
try {
const chunks = await getPageChunks(doc_id, page);
return {
doc_id,
page,
count: chunks.length,
chunks: chunks.map((c) => ({
chunk_id: c.chunk_id,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: (c.content_en || "").slice(0, 500),
content_pt: (c.content_pt || "").slice(0, 500),
})),
};
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleListAnomalies(
args: Record<string, unknown>,
ctx: ToolHandlerContext,
): Promise<unknown> {
const kind = (args.kind as string) === "cryptid" ? "cryptid" : "ufo";
const doc_id = (args.doc_id as string) || ctx.doc_id || null;
const limit = Math.min(Number(args.limit) || 50, 200);
try {
const rows = await listAnomalies({ kind, doc_id, limit });
return { kind, doc_id, count: rows.length, anomalies: rows };
} catch (e) {
return { error: "retrieval_unavailable", message: (e as Error).message };
}
}
async function handleSearch(args: Record<string, unknown>): Promise<unknown> {
const query = String(args.query ?? "").trim();
const scope = (args.scope as string) ?? "all";
if (!query) return { error: "empty_query", hits: [] };
const ql = query.toLowerCase();
const hits: Array<{ type: string; id: string; title: string; snippet: string; href: string }> = [];
if (scope === "all" || scope === "documents") {
const ids = await listDocuments();
for (const id of ids) {
const f = await readDocument(id);
if (!f) continue;
const title = String(f.fm.canonical_title ?? id);
const hay = `${id} ${title} ${f.body.slice(0, 2000)}`.toLowerCase();
if (hay.includes(ql)) {
hits.push({
type: "document",
id,
title,
snippet: snippet(f.body, query),
href: `/d/${id}`,
});
}
if (hits.length >= 8) break;
}
}
if ((scope === "all" || scope === "entities") && hits.length < 8) {
const classes = ["people", "organizations", "locations", "events", "uap-objects", "vehicles", "operations", "concepts"];
for (const cls of classes) {
try {
const entries = await fs.readdir(path.join(WIKI, "entities", cls));
for (const file of entries) {
if (!file.endsWith(".md")) continue;
const id = file.replace(/\.md$/, "");
if (id.toLowerCase().includes(ql)) {
const content = await fs.readFile(path.join(WIKI, "entities", cls, file), "utf-8");
const cname = content.match(/canonical_name:\s*([^\n]+)/)?.[1]?.trim() ?? id;
hits.push({
type: cls.replace(/s$/, ""),
id,
title: cname,
snippet: id,
href: `/e/${cls}/${id}`,
});
if (hits.length >= 8) break;
}
}
} catch {
/* dir missing — fine */
}
if (hits.length >= 8) break;
}
}
return { query, scope, hits };
}
async function handleReadPage(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
let page = String(args.page ?? "").trim();
if (!/^p\d{3}$/.test(page)) {
const n = parseInt(page, 10);
if (!Number.isFinite(n)) return { error: "bad_page" };
page = `p${String(n).padStart(3, "0")}`;
}
const md = await readPage(doc_id, page);
if (!md) return { error: "not_found", doc_id, page };
return {
doc_id,
page,
page_type: md.fm.page_type,
language: md.fm.language_detected,
content_classification: md.fm.content_classification,
redactions_count: Array.isArray(md.fm.redactions) ? (md.fm.redactions as never[]).length : 0,
vision_description: md.fm.vision_description,
vision_description_pt_br: md.fm.vision_description_pt_br,
entities_extracted: md.fm.entities_extracted,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleReadDocument(args: Record<string, unknown>): Promise<unknown> {
const doc_id = String(args.doc_id ?? "").trim();
const md = await readDocument(doc_id);
if (!md) return { error: "not_found", doc_id };
const pages = await listPages(doc_id);
return {
doc_id,
canonical_title: md.fm.canonical_title,
collection: md.fm.collection,
document_class: md.fm.document_class,
page_count: pages.length,
pages_index: pages.slice(0, 20),
content_classification: md.fm.content_classification,
languages_detected: md.fm.languages_detected,
key_entities: md.fm.key_entities,
executive_summary: md.body.slice(0, 2000),
};
}
async function handleReadEntity(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
const folder = classKeyToFolder(cls);
if (!folder) return { error: "bad_class", cls };
const md = await readEntity(folder, id);
if (!md) return { error: "not_found", cls, id };
return {
class: folder,
id,
canonical_name: md.fm.canonical_name,
aliases: md.fm.aliases,
total_mentions: md.fm.total_mentions,
enrichment_status: md.fm.enrichment_status,
external_sources: md.fm.external_sources,
disambiguation_note: md.fm.disambiguation_note,
body_excerpt: md.body.slice(0, 2000),
};
}
async function handleEntityNeighbors(args: Record<string, unknown>): Promise<unknown> {
const cls = String(args.class ?? "").trim();
const id = String(args.id ?? "").trim();
if (!cls || !id) return { error: "missing_args" };
try {
const ent = await findEntity(cls, id);
if (!ent) return { error: "entity_not_found", class: cls, id };
const filterClasses = (args.filter_classes as string[] | undefined)?.filter(Boolean);
const limit = Math.min(Number(args.limit) || 30, 100);
const neighbors = await getNeighbors(ent.entity_pk, { limit, classes: filterClasses });
return { entity: ent, count: neighbors.length, neighbors };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleEntityPath(args: Record<string, unknown>): Promise<unknown> {
const fromCls = String(args.from_class ?? "").trim();
const fromId = String(args.from_id ?? "").trim();
const toCls = String(args.to_class ?? "").trim();
const toId = String(args.to_id ?? "").trim();
const maxHops = Math.min(Number(args.max_hops) || 3, 4);
if (!fromCls || !fromId || !toCls || !toId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(fromCls, fromId), findEntity(toCls, toId)]);
if (!a) return { error: "from_not_found", class: fromCls, id: fromId };
if (!b) return { error: "to_not_found", class: toCls, id: toId };
const paths = await findPaths(a.entity_pk, b.entity_pk, maxHops);
return { from: a, to: b, max_hops: maxHops, paths };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleCoMentionChunks(args: Record<string, unknown>): Promise<unknown> {
const aCls = String(args.a_class ?? "").trim();
const aId = String(args.a_id ?? "").trim();
const bCls = String(args.b_class ?? "").trim();
const bId = String(args.b_id ?? "").trim();
const limit = Math.min(Number(args.limit) || 20, 100);
if (!aCls || !aId || !bCls || !bId) return { error: "missing_args" };
try {
const [a, b] = await Promise.all([findEntity(aCls, aId), findEntity(bCls, bId)]);
if (!a || !b) return { error: "entity_not_found", a: aId, b: bId };
const chunks = await getCoMentionChunks(a.entity_pk, b.entity_pk, limit);
return { a, b, count: chunks.length, chunks };
} catch (e) {
return { error: "graph_unavailable", message: (e as Error).message };
}
}
async function handleNavigate(args: Record<string, unknown>): Promise<unknown> {
const target = String(args.target ?? "").trim();
const label = String(args.label ?? "").slice(0, 40);
if (!target.startsWith("/")) return { error: "target_must_start_with_slash", target };
return { ok: true, target, label };
}
export const TOOL_HANDLERS: Record<string, ToolHandler> = {
hybrid_search: handleHybridSearch,
read_chunk: handleReadChunk,
get_page_chunks: handleGetPageChunks,
list_anomalies: handleListAnomalies,
entity_neighbors: handleEntityNeighbors,
entity_path: handleEntityPath,
co_mention_chunks: handleCoMentionChunks,
read_page: handleReadPage,
read_document: handleReadDocument,
read_entity: handleReadEntity,
search_corpus: handleSearch,
navigate_to: handleNavigate,
};