disclosure-bureau/web/lib/chat/openrouter.ts
guto c0c6652dd5 guard /admin/* by role + filter chat artifacts to cited chunks
middleware.ts now checks profiles.role on every /admin/* request and
returns a plain 404 (not a redirect) for anonymous users and any
authenticated user without role='admin'. The 404 wording matches a
non-existent route, so we don't leak the existence of an admin area.
gutomec@gmail.com promoted to admin in the live DB.

openrouter.ts chat now collects artifacts silently during the tool-call
loop instead of streaming them to the SSE. After the model writes its
final prose (and after the forced-synthesis pass if needed), we scan
the assembled text for [[doc-id/p007#cNNNN]] citations and emit ONLY the
artifacts referenced. Duplicates are deduped by chunk_id. The persisted
citations column on messages now stores the filtered set too, so old
sessions reload with the same focused card grid.

Before: every hybrid_search hit (up to 6 per call × 5 calls = 30+ citation
cards plus crop images) flooded the chat regardless of what the model
ended up using. After: only the chunks actually woven into the answer.
2026-05-18 17:41:35 -03:00

445 lines
15 KiB
TypeScript

/**
* OpenRouter provider — OpenAI-compatible chat completions.
*
* Exports:
* sendOnce() — non-streaming, single call (no tools)
* streamWithTools() — streaming + tool-call loop (Pattern C)
*
* The tool-call loop runs locally:
* 1. Call model with messages + tools, stream=true.
* 2. Read SSE deltas: text → emit `text_delta`; tool_calls → buffer args by id.
* 3. When finish_reason='tool_calls': execute handlers locally, push results
* back as tool messages, call model again. Repeat until finish='stop'.
* 4. When done: emit `done`.
*
* Free models (May 2026):
* deepseek/deepseek-v4-flash:free — primary, solid tool calling
* nvidia/nemotron-3-super-120b-a12b:free — fallback
*/
import type { AGUIEvent } from "./agui";
import { TOOL_DEFINITIONS, TOOL_HANDLERS, type ToolHandlerContext } from "./tools";
const PRIMARY = process.env.OPENROUTER_MODEL || "deepseek/deepseek-v4-flash:free";
const FALLBACK = process.env.OPENROUTER_FALLBACK_MODEL || "nvidia/nemotron-3-super-120b-a12b:free";
const ENDPOINT = "https://openrouter.ai/api/v1/chat/completions";
type OAMsg =
| { role: "system" | "user"; content: string }
| { role: "assistant"; content?: string | null; tool_calls?: OAToolCall[] }
| { role: "tool"; content: string; tool_call_id: string; name?: string };
interface OAToolCall {
id: string;
type: "function";
function: { name: string; arguments: string };
}
interface OADelta {
role?: string;
content?: string;
tool_calls?: Array<{
index: number;
id?: string;
type?: string;
function?: { name?: string; arguments?: string };
}>;
}
interface OAStreamChunk {
choices?: Array<{
delta?: OADelta;
finish_reason?: string | null;
message?: { role: string; content?: string | null; tool_calls?: OAToolCall[] };
}>;
usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
model?: string;
error?: { message?: string };
}
function headers() {
const apiKey = process.env.OPENROUTER_API_KEY;
if (!apiKey) throw new Error("OPENROUTER_API_KEY not set");
return {
"Authorization": `Bearer ${apiKey}`,
"Content-Type": "application/json",
"HTTP-Referer": process.env.NEXT_PUBLIC_SITE_URL || "https://disclosure.top",
"X-Title": "The Disclosure Bureau",
};
}
export interface SendOnceReq {
system: string;
messages: Array<{ role: "user" | "assistant" | "system"; content: string }>;
maxTokens?: number;
}
/** Non-streaming single shot — used by claude-code fallback path and tests. */
export async function sendOnce(req: SendOnceReq, model = PRIMARY): Promise<{
content: string;
model: string;
tokensIn?: number;
tokensOut?: number;
}> {
const body = {
model,
messages: [
{ role: "system", content: req.system },
...req.messages.slice(-20),
],
max_tokens: req.maxTokens ?? 1024,
};
const res = await fetch(ENDPOINT, {
method: "POST",
headers: headers(),
body: JSON.stringify(body),
});
if (!res.ok) {
const txt = await res.text();
const err = new Error(`openrouter HTTP ${res.status}: ${txt.slice(0, 300)}`);
if (res.status === 429 || res.status === 402) {
(err as Error & { isRateLimit?: boolean }).isRateLimit = true;
}
throw err;
}
const data = await res.json();
if (data.error) throw new Error(`openrouter error: ${data.error.message}`);
return {
content: data.choices?.[0]?.message?.content ?? "",
model: data.model ?? model,
tokensIn: data.usage?.prompt_tokens,
tokensOut: data.usage?.completion_tokens,
};
}
/* ─── Pattern C: streaming + tool-call loop ─────────────────────────────── */
export interface StreamRequest {
system: string;
history: Array<{ role: "user" | "assistant"; content: string }>;
userTurn: string;
ctx: ToolHandlerContext;
maxTurns?: number; // max tool-call loop iterations
}
export interface StreamCallbacks {
emit: (ev: AGUIEvent) => void;
}
/**
* Stream with tool-call loop. Returns the final assistant content (assembled
* across deltas + after all tool calls resolved) plus token usage.
*
* The caller is responsible for closing the SSE stream after this resolves.
*/
export async function streamWithTools(
req: StreamRequest,
cb: StreamCallbacks,
): Promise<{
content: string;
model: string;
tokensIn: number;
tokensOut: number;
toolCalls: Array<{ name: string; args: Record<string, unknown>; result: unknown }>;
artifacts: import("./agui").Artifact[];
}> {
const maxTurns = req.maxTurns ?? 5;
// Collect artifacts silently — do NOT emit them to the SSE stream while
// tools are running. We only know which ones are "relevant" after the
// model has written its final prose, by matching [[doc-id/p007#cNNNN]]
// citations in the text. Whatever isn't cited gets dropped to keep the
// chat UI focused on the actual answer.
const collectedArtifacts: import("./agui").Artifact[] = [];
const originalEmit = cb.emit;
cb = {
emit: (ev) => {
if (ev.type === "artifact") {
collectedArtifacts.push(ev.artifact);
return; // suppressed — emitted later after filtering
}
originalEmit(ev);
},
};
const messages: OAMsg[] = [
{ role: "system", content: req.system },
...req.history.map((m): OAMsg => ({
role: m.role,
content: m.content,
})),
{ role: "user", content: req.userTurn },
];
let assembledText = "";
let totalIn = 0;
let totalOut = 0;
let modelUsed = PRIMARY;
const toolTrace: Array<{ name: string; args: Record<string, unknown>; result: unknown }> = [];
for (let turn = 0; turn < maxTurns; turn++) {
// Run one round.
let model = PRIMARY;
let res: Response;
try {
res = await openrouterStreamCall(messages, model);
} catch (e) {
if ((e as Error & { isRateLimit?: boolean }).isRateLimit) {
model = FALLBACK;
res = await openrouterStreamCall(messages, model);
} else throw e;
}
modelUsed = model;
if (!res.body) throw new Error("openrouter: no response body");
const { roundText, finishReason, toolCalls, usage } = await readSSE(res.body, cb);
assembledText += roundText;
totalIn += usage?.prompt_tokens ?? 0;
totalOut += usage?.completion_tokens ?? 0;
if (finishReason === "tool_calls" && toolCalls.length > 0) {
// Append the assistant's tool-call turn to message history
messages.push({
role: "assistant",
content: roundText || null,
tool_calls: toolCalls,
});
// Execute each tool locally
for (const tc of toolCalls) {
let argsObj: Record<string, unknown> = {};
try { argsObj = JSON.parse(tc.function.arguments || "{}"); }
catch { /* malformed — pass empty */ }
cb.emit({ type: "tool_start", id: tc.id, name: tc.function.name, args: argsObj });
const handler = TOOL_HANDLERS[tc.function.name];
const t0 = Date.now();
let result: unknown;
// Per-tool artifact sink: bridges typed artifacts produced by the
// handler into the SSE stream as `artifact` events.
const ctxWithSink: ToolHandlerContext = {
...req.ctx,
emitArtifact: (artifact) => cb.emit({ type: "artifact", artifact }),
};
try {
if (!handler) throw new Error(`unknown tool: ${tc.function.name}`);
result = await handler(argsObj, ctxWithSink);
} catch (e) {
result = { error: e instanceof Error ? e.message : String(e) };
}
const dt = Date.now() - t0;
cb.emit({ type: "tool_result", id: tc.id, result, durationMs: dt });
toolTrace.push({ name: tc.function.name, args: argsObj, result });
// Surface navigate_to as a UI event the frontend can render as a button
if (tc.function.name === "navigate_to") {
const target = String(argsObj.target ?? "");
const label = String(argsObj.label ?? target);
if (target.startsWith("/")) {
cb.emit({ type: "navigate", target, label });
}
}
// Append the tool result for the model
messages.push({
role: "tool",
tool_call_id: tc.id,
name: tc.function.name,
content: JSON.stringify(result).slice(0, 8000),
});
}
// Continue loop — model will see tool results and produce next turn
continue;
}
// No tool calls → done
break;
}
// Forced synthesis: free-tier models often exhaust the tool-call budget
// without ever producing prose, returning content_len=0. If the loop ended
// with empty text but the model did call tools, force ONE more turn without
// tools so the model must answer in plain text.
if (!assembledText.trim() && toolTrace.length > 0) {
cb.emit({
type: "text_delta",
delta: "",
});
messages.push({
role: "user",
content:
"Com base nas ferramentas que você acabou de chamar e nos resultados acima, " +
"responda agora ao usuário EM TEXTO (3-8 frases), em português brasileiro. " +
"Cite os chunks no formato [[doc-id/p007#c0042]]. Não chame mais nenhuma ferramenta.",
});
try {
const res = await openrouterStreamCall(messages, modelUsed, { withTools: false });
if (res.body) {
const final = await readSSE(res.body, cb);
assembledText += final.roundText;
totalIn += final.usage?.prompt_tokens ?? 0;
totalOut += final.usage?.completion_tokens ?? 0;
}
} catch (e) {
cb.emit({ type: "error", message: `synthesis failed: ${e instanceof Error ? e.message : String(e)}` });
}
}
// Filter artifacts to only those actually cited in the assistant's prose.
// Citation grammar: [[doc-id]], [[doc-id/p007]], [[doc-id#c0042]],
// [[doc-id/p007#c0042]]. The chunk anchor (cNNNN) is the strongest signal.
const citedChunks = new Set<string>();
const citedDocs = new Set<string>();
const wikiLinkRe = /\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]/g;
let m: RegExpExecArray | null;
while ((m = wikiLinkRe.exec(assembledText)) !== null) {
const target = m[1].trim();
const chunkMatch = target.match(/(c\d{4})$/i);
if (chunkMatch) citedChunks.add(chunkMatch[1].toLowerCase());
// Doc id is whatever comes before the first `/` or `#`
const docMatch = target.match(/^([A-Za-z0-9._-]+)/);
if (docMatch) citedDocs.add(docMatch[1]);
}
const relevantArtifacts = collectedArtifacts.filter((a) => {
if (a.kind === "citation") {
return citedChunks.has(a.chunk_id.toLowerCase()) ||
(citedDocs.has(a.doc_id) && citedChunks.size === 0);
}
if (a.kind === "crop_image") {
if (a.chunk_id && citedChunks.has(a.chunk_id.toLowerCase())) return true;
return false;
}
// Entity / case / hypothesis / nav cards have no chunk reference — keep
// them all; they are rarely emitted by hybrid_search anyway.
return true;
});
// Now emit the filtered set so the UI renders only the relevant cards.
// De-dupe by kind+key so a citation and its crop_image both fire but no
// duplicate citations of the same chunk_id appear.
const seen = new Set<string>();
for (const a of relevantArtifacts) {
const key =
a.kind === "citation" ? `cit:${a.chunk_id}` :
a.kind === "crop_image" ? `crop:${a.chunk_id ?? a.src}` :
`${a.kind}:${JSON.stringify(a)}`;
if (seen.has(key)) continue;
seen.add(key);
originalEmit({ type: "artifact", artifact: a });
}
return {
content: assembledText,
model: modelUsed,
tokensIn: totalIn,
tokensOut: totalOut,
toolCalls: toolTrace,
artifacts: relevantArtifacts,
};
}
async function openrouterStreamCall(
messages: OAMsg[],
model: string,
opts: { withTools?: boolean } = {},
): Promise<Response> {
const withTools = opts.withTools !== false;
const body: Record<string, unknown> = {
model,
messages,
stream: true,
max_tokens: 1024,
};
if (withTools) {
body.tools = TOOL_DEFINITIONS;
body.tool_choice = "auto";
}
const res = await fetch(ENDPOINT, {
method: "POST",
headers: headers(),
body: JSON.stringify(body),
});
if (!res.ok) {
const txt = await res.text();
const err = new Error(`openrouter HTTP ${res.status}: ${txt.slice(0, 300)}`);
if (res.status === 429 || res.status === 402) {
(err as Error & { isRateLimit?: boolean }).isRateLimit = true;
}
throw err;
}
return res;
}
/**
* Read OpenRouter SSE stream. Emits text_delta events via cb.emit. Returns
* assembled text, finish_reason, accumulated tool_calls, and usage.
*/
async function readSSE(
body: ReadableStream<Uint8Array>,
cb: StreamCallbacks,
): Promise<{
roundText: string;
finishReason: string | null;
toolCalls: OAToolCall[];
usage?: { prompt_tokens?: number; completion_tokens?: number };
}> {
const reader = body.getReader();
const decoder = new TextDecoder();
let buffer = "";
let roundText = "";
let finishReason: string | null = null;
// tool_calls arrive as deltas across many chunks; accumulate by index.
const toolBufs: Record<number, { id: string; name: string; args: string }> = {};
let usage: { prompt_tokens?: number; completion_tokens?: number } | undefined;
for (;;) {
const { value, done } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
// SSE messages are separated by \n\n
let idx: number;
while ((idx = buffer.indexOf("\n\n")) !== -1) {
const raw = buffer.slice(0, idx);
buffer = buffer.slice(idx + 2);
// Each message may have multiple lines; we care about "data: ..."
for (const line of raw.split("\n")) {
if (!line.startsWith("data: ")) continue;
const payload = line.slice(6).trim();
if (payload === "[DONE]") {
return { roundText, finishReason, toolCalls: collectToolCalls(toolBufs), usage };
}
let chunk: OAStreamChunk;
try { chunk = JSON.parse(payload); }
catch { continue; }
if (chunk.error) throw new Error(`openrouter stream error: ${chunk.error.message}`);
if (chunk.usage) usage = chunk.usage;
const choice = chunk.choices?.[0];
if (!choice) continue;
const d = choice.delta ?? {};
if (typeof d.content === "string" && d.content) {
roundText += d.content;
cb.emit({ type: "text_delta", delta: d.content });
}
if (Array.isArray(d.tool_calls)) {
for (const tc of d.tool_calls) {
const slot = (toolBufs[tc.index] ??= { id: "", name: "", args: "" });
if (tc.id) slot.id = tc.id;
if (tc.function?.name) slot.name = tc.function.name;
if (tc.function?.arguments) slot.args += tc.function.arguments;
}
}
if (choice.finish_reason) finishReason = choice.finish_reason;
}
}
}
return { roundText, finishReason, toolCalls: collectToolCalls(toolBufs), usage };
}
function collectToolCalls(bufs: Record<number, { id: string; name: string; args: string }>): OAToolCall[] {
return Object.values(bufs)
.filter((b) => b.id && b.name)
.map((b) => ({
id: b.id,
type: "function" as const,
function: { name: b.name, arguments: b.args || "{}" },
}));
}