Fase 3 onda 2 — entity synthesis at scale: - scripts/synthesize/20_entity_summary.py: queries DB for entities with total_mentions ≥ threshold + top-K verbatim chunk snippets via entity_mentions JOIN, prompts Sonnet (Holmes-Watson voice, bilingual), writes narrative_summary EN+PT-BR + summary_status=synthesized. Ran on 187 candidates (mentions ≥ 20) → 158 OK · 1 err · 29 skipped (no snippets). Combined with anchor curation: 20 curated + 158 synthesized = 178 entities with real narrative (vs 0 a day ago). Fase 4 — chat with typed artifacts + persistence: - lib/chat/agui.ts: AG-UI v1 typed Artifact union (citation, crop_image, entity_card, evidence_card, hypothesis_card, case_card, navigation_offer) alongside the existing event types. - lib/chat/tools.ts + openrouter.ts: hybrid_search emits up to 6 citation + crop_image artifacts per query. Provider collects them and returns in done.artifacts so the route can persist. - api/sessions/[id]/messages: persist artifacts to messages.citations. - components/chat-bubble.tsx: ArtifactCard renders inline cards (citation, crop_image, entity_card, navigation_offer) for streamed and persisted messages. activeId now persisted in localStorage so navigation between pages keeps the same conversation. New sessions are lazy (only when user has zero). loadMessages hydrates tools + artifacts from server. CRUD UI: rename (✎) + archive (🗑) buttons per session in the list. Home search: - doc-list-filters: input now fires hybrid_search (rerank=0 for speed) in parallel with the local title filter; chunk hits render above the doc grid with snippet + score + classification. - api/search/hybrid: accept ?rerank=0 to skip the cross-encoder (1.3s vs 60s). Auth flow: - infra: SMTP_HOST=mail.spacemail.com:587 + DMARC published; mail now lands in inbox. GOTRUE_MAILER_AUTOCONFIRM=false (real email verification). - kong.yml: proxy /auth/callback on api.disclosure.top → web:3000 so PKCE email links don't 404 at the gateway. - web/app/auth/callback: handle both ?code= (OAuth) and ?token=&type= (PKCE); redirect to the public site host before verifyOtp so the session cookie lands on the right domain. Audit deliverables: - .nirvana/outputs/disclosure-bureau/.../systems-atelier/: 5 docs (code analysis, tech debt, discovery brief, system arch, 5 ADRs) authored by sa-principal that produced this roadmap. Kept in-tree for traceability.
359 lines
12 KiB
TypeScript
359 lines
12 KiB
TypeScript
/**
|
|
* OpenRouter provider — OpenAI-compatible chat completions.
|
|
*
|
|
* Exports:
|
|
* sendOnce() — non-streaming, single call (no tools)
|
|
* streamWithTools() — streaming + tool-call loop (Pattern C)
|
|
*
|
|
* The tool-call loop runs locally:
|
|
* 1. Call model with messages + tools, stream=true.
|
|
* 2. Read SSE deltas: text → emit `text_delta`; tool_calls → buffer args by id.
|
|
* 3. When finish_reason='tool_calls': execute handlers locally, push results
|
|
* back as tool messages, call model again. Repeat until finish='stop'.
|
|
* 4. When done: emit `done`.
|
|
*
|
|
* Free models (May 2026):
|
|
* deepseek/deepseek-v4-flash:free — primary, solid tool calling
|
|
* nvidia/nemotron-3-super-120b-a12b:free — fallback
|
|
*/
|
|
import type { AGUIEvent } from "./agui";
|
|
import { TOOL_DEFINITIONS, TOOL_HANDLERS, type ToolHandlerContext } from "./tools";
|
|
|
|
const PRIMARY = process.env.OPENROUTER_MODEL || "deepseek/deepseek-v4-flash:free";
|
|
const FALLBACK = process.env.OPENROUTER_FALLBACK_MODEL || "nvidia/nemotron-3-super-120b-a12b:free";
|
|
const ENDPOINT = "https://openrouter.ai/api/v1/chat/completions";
|
|
|
|
type OAMsg =
|
|
| { role: "system" | "user"; content: string }
|
|
| { role: "assistant"; content?: string | null; tool_calls?: OAToolCall[] }
|
|
| { role: "tool"; content: string; tool_call_id: string; name?: string };
|
|
|
|
interface OAToolCall {
|
|
id: string;
|
|
type: "function";
|
|
function: { name: string; arguments: string };
|
|
}
|
|
|
|
interface OADelta {
|
|
role?: string;
|
|
content?: string;
|
|
tool_calls?: Array<{
|
|
index: number;
|
|
id?: string;
|
|
type?: string;
|
|
function?: { name?: string; arguments?: string };
|
|
}>;
|
|
}
|
|
|
|
interface OAStreamChunk {
|
|
choices?: Array<{
|
|
delta?: OADelta;
|
|
finish_reason?: string | null;
|
|
message?: { role: string; content?: string | null; tool_calls?: OAToolCall[] };
|
|
}>;
|
|
usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
|
|
model?: string;
|
|
error?: { message?: string };
|
|
}
|
|
|
|
function headers() {
|
|
const apiKey = process.env.OPENROUTER_API_KEY;
|
|
if (!apiKey) throw new Error("OPENROUTER_API_KEY not set");
|
|
return {
|
|
"Authorization": `Bearer ${apiKey}`,
|
|
"Content-Type": "application/json",
|
|
"HTTP-Referer": process.env.NEXT_PUBLIC_SITE_URL || "https://disclosure.top",
|
|
"X-Title": "The Disclosure Bureau",
|
|
};
|
|
}
|
|
|
|
export interface SendOnceReq {
|
|
system: string;
|
|
messages: Array<{ role: "user" | "assistant" | "system"; content: string }>;
|
|
maxTokens?: number;
|
|
}
|
|
|
|
/** Non-streaming single shot — used by claude-code fallback path and tests. */
|
|
export async function sendOnce(req: SendOnceReq, model = PRIMARY): Promise<{
|
|
content: string;
|
|
model: string;
|
|
tokensIn?: number;
|
|
tokensOut?: number;
|
|
}> {
|
|
const body = {
|
|
model,
|
|
messages: [
|
|
{ role: "system", content: req.system },
|
|
...req.messages.slice(-20),
|
|
],
|
|
max_tokens: req.maxTokens ?? 1024,
|
|
};
|
|
const res = await fetch(ENDPOINT, {
|
|
method: "POST",
|
|
headers: headers(),
|
|
body: JSON.stringify(body),
|
|
});
|
|
if (!res.ok) {
|
|
const txt = await res.text();
|
|
const err = new Error(`openrouter HTTP ${res.status}: ${txt.slice(0, 300)}`);
|
|
if (res.status === 429 || res.status === 402) {
|
|
(err as Error & { isRateLimit?: boolean }).isRateLimit = true;
|
|
}
|
|
throw err;
|
|
}
|
|
const data = await res.json();
|
|
if (data.error) throw new Error(`openrouter error: ${data.error.message}`);
|
|
return {
|
|
content: data.choices?.[0]?.message?.content ?? "",
|
|
model: data.model ?? model,
|
|
tokensIn: data.usage?.prompt_tokens,
|
|
tokensOut: data.usage?.completion_tokens,
|
|
};
|
|
}
|
|
|
|
/* ─── Pattern C: streaming + tool-call loop ─────────────────────────────── */
|
|
|
|
export interface StreamRequest {
|
|
system: string;
|
|
history: Array<{ role: "user" | "assistant"; content: string }>;
|
|
userTurn: string;
|
|
ctx: ToolHandlerContext;
|
|
maxTurns?: number; // max tool-call loop iterations
|
|
}
|
|
|
|
export interface StreamCallbacks {
|
|
emit: (ev: AGUIEvent) => void;
|
|
}
|
|
|
|
/**
|
|
* Stream with tool-call loop. Returns the final assistant content (assembled
|
|
* across deltas + after all tool calls resolved) plus token usage.
|
|
*
|
|
* The caller is responsible for closing the SSE stream after this resolves.
|
|
*/
|
|
export async function streamWithTools(
|
|
req: StreamRequest,
|
|
cb: StreamCallbacks,
|
|
): Promise<{
|
|
content: string;
|
|
model: string;
|
|
tokensIn: number;
|
|
tokensOut: number;
|
|
toolCalls: Array<{ name: string; args: Record<string, unknown>; result: unknown }>;
|
|
artifacts: import("./agui").Artifact[];
|
|
}> {
|
|
const maxTurns = req.maxTurns ?? 5;
|
|
// Capture every artifact emitted in this turn so the caller can persist
|
|
// them alongside the assistant message. The emit() bridge is unchanged.
|
|
const collectedArtifacts: import("./agui").Artifact[] = [];
|
|
const originalEmit = cb.emit;
|
|
cb = {
|
|
emit: (ev) => {
|
|
if (ev.type === "artifact") collectedArtifacts.push(ev.artifact);
|
|
originalEmit(ev);
|
|
},
|
|
};
|
|
const messages: OAMsg[] = [
|
|
{ role: "system", content: req.system },
|
|
...req.history.map((m): OAMsg => ({
|
|
role: m.role,
|
|
content: m.content,
|
|
})),
|
|
{ role: "user", content: req.userTurn },
|
|
];
|
|
|
|
let assembledText = "";
|
|
let totalIn = 0;
|
|
let totalOut = 0;
|
|
let modelUsed = PRIMARY;
|
|
const toolTrace: Array<{ name: string; args: Record<string, unknown>; result: unknown }> = [];
|
|
|
|
for (let turn = 0; turn < maxTurns; turn++) {
|
|
// Run one round.
|
|
let model = PRIMARY;
|
|
let res: Response;
|
|
try {
|
|
res = await openrouterStreamCall(messages, model);
|
|
} catch (e) {
|
|
if ((e as Error & { isRateLimit?: boolean }).isRateLimit) {
|
|
model = FALLBACK;
|
|
res = await openrouterStreamCall(messages, model);
|
|
} else throw e;
|
|
}
|
|
modelUsed = model;
|
|
|
|
if (!res.body) throw new Error("openrouter: no response body");
|
|
const { roundText, finishReason, toolCalls, usage } = await readSSE(res.body, cb);
|
|
assembledText += roundText;
|
|
totalIn += usage?.prompt_tokens ?? 0;
|
|
totalOut += usage?.completion_tokens ?? 0;
|
|
|
|
if (finishReason === "tool_calls" && toolCalls.length > 0) {
|
|
// Append the assistant's tool-call turn to message history
|
|
messages.push({
|
|
role: "assistant",
|
|
content: roundText || null,
|
|
tool_calls: toolCalls,
|
|
});
|
|
// Execute each tool locally
|
|
for (const tc of toolCalls) {
|
|
let argsObj: Record<string, unknown> = {};
|
|
try { argsObj = JSON.parse(tc.function.arguments || "{}"); }
|
|
catch { /* malformed — pass empty */ }
|
|
cb.emit({ type: "tool_start", id: tc.id, name: tc.function.name, args: argsObj });
|
|
const handler = TOOL_HANDLERS[tc.function.name];
|
|
const t0 = Date.now();
|
|
let result: unknown;
|
|
// Per-tool artifact sink: bridges typed artifacts produced by the
|
|
// handler into the SSE stream as `artifact` events.
|
|
const ctxWithSink: ToolHandlerContext = {
|
|
...req.ctx,
|
|
emitArtifact: (artifact) => cb.emit({ type: "artifact", artifact }),
|
|
};
|
|
try {
|
|
if (!handler) throw new Error(`unknown tool: ${tc.function.name}`);
|
|
result = await handler(argsObj, ctxWithSink);
|
|
} catch (e) {
|
|
result = { error: e instanceof Error ? e.message : String(e) };
|
|
}
|
|
const dt = Date.now() - t0;
|
|
cb.emit({ type: "tool_result", id: tc.id, result, durationMs: dt });
|
|
toolTrace.push({ name: tc.function.name, args: argsObj, result });
|
|
|
|
// Surface navigate_to as a UI event the frontend can render as a button
|
|
if (tc.function.name === "navigate_to") {
|
|
const target = String(argsObj.target ?? "");
|
|
const label = String(argsObj.label ?? target);
|
|
if (target.startsWith("/")) {
|
|
cb.emit({ type: "navigate", target, label });
|
|
}
|
|
}
|
|
|
|
// Append the tool result for the model
|
|
messages.push({
|
|
role: "tool",
|
|
tool_call_id: tc.id,
|
|
name: tc.function.name,
|
|
content: JSON.stringify(result).slice(0, 8000),
|
|
});
|
|
}
|
|
// Continue loop — model will see tool results and produce next turn
|
|
continue;
|
|
}
|
|
|
|
// No tool calls → done
|
|
break;
|
|
}
|
|
|
|
return {
|
|
content: assembledText,
|
|
model: modelUsed,
|
|
tokensIn: totalIn,
|
|
tokensOut: totalOut,
|
|
toolCalls: toolTrace,
|
|
artifacts: collectedArtifacts,
|
|
};
|
|
}
|
|
|
|
async function openrouterStreamCall(messages: OAMsg[], model: string): Promise<Response> {
|
|
const body = {
|
|
model,
|
|
messages,
|
|
tools: TOOL_DEFINITIONS,
|
|
tool_choice: "auto",
|
|
stream: true,
|
|
max_tokens: 1024,
|
|
};
|
|
const res = await fetch(ENDPOINT, {
|
|
method: "POST",
|
|
headers: headers(),
|
|
body: JSON.stringify(body),
|
|
});
|
|
if (!res.ok) {
|
|
const txt = await res.text();
|
|
const err = new Error(`openrouter HTTP ${res.status}: ${txt.slice(0, 300)}`);
|
|
if (res.status === 429 || res.status === 402) {
|
|
(err as Error & { isRateLimit?: boolean }).isRateLimit = true;
|
|
}
|
|
throw err;
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Read OpenRouter SSE stream. Emits text_delta events via cb.emit. Returns
|
|
* assembled text, finish_reason, accumulated tool_calls, and usage.
|
|
*/
|
|
async function readSSE(
|
|
body: ReadableStream<Uint8Array>,
|
|
cb: StreamCallbacks,
|
|
): Promise<{
|
|
roundText: string;
|
|
finishReason: string | null;
|
|
toolCalls: OAToolCall[];
|
|
usage?: { prompt_tokens?: number; completion_tokens?: number };
|
|
}> {
|
|
const reader = body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let buffer = "";
|
|
let roundText = "";
|
|
let finishReason: string | null = null;
|
|
// tool_calls arrive as deltas across many chunks; accumulate by index.
|
|
const toolBufs: Record<number, { id: string; name: string; args: string }> = {};
|
|
let usage: { prompt_tokens?: number; completion_tokens?: number } | undefined;
|
|
|
|
for (;;) {
|
|
const { value, done } = await reader.read();
|
|
if (done) break;
|
|
buffer += decoder.decode(value, { stream: true });
|
|
|
|
// SSE messages are separated by \n\n
|
|
let idx: number;
|
|
while ((idx = buffer.indexOf("\n\n")) !== -1) {
|
|
const raw = buffer.slice(0, idx);
|
|
buffer = buffer.slice(idx + 2);
|
|
|
|
// Each message may have multiple lines; we care about "data: ..."
|
|
for (const line of raw.split("\n")) {
|
|
if (!line.startsWith("data: ")) continue;
|
|
const payload = line.slice(6).trim();
|
|
if (payload === "[DONE]") {
|
|
return { roundText, finishReason, toolCalls: collectToolCalls(toolBufs), usage };
|
|
}
|
|
let chunk: OAStreamChunk;
|
|
try { chunk = JSON.parse(payload); }
|
|
catch { continue; }
|
|
if (chunk.error) throw new Error(`openrouter stream error: ${chunk.error.message}`);
|
|
if (chunk.usage) usage = chunk.usage;
|
|
|
|
const choice = chunk.choices?.[0];
|
|
if (!choice) continue;
|
|
const d = choice.delta ?? {};
|
|
if (typeof d.content === "string" && d.content) {
|
|
roundText += d.content;
|
|
cb.emit({ type: "text_delta", delta: d.content });
|
|
}
|
|
if (Array.isArray(d.tool_calls)) {
|
|
for (const tc of d.tool_calls) {
|
|
const slot = (toolBufs[tc.index] ??= { id: "", name: "", args: "" });
|
|
if (tc.id) slot.id = tc.id;
|
|
if (tc.function?.name) slot.name = tc.function.name;
|
|
if (tc.function?.arguments) slot.args += tc.function.arguments;
|
|
}
|
|
}
|
|
if (choice.finish_reason) finishReason = choice.finish_reason;
|
|
}
|
|
}
|
|
}
|
|
return { roundText, finishReason, toolCalls: collectToolCalls(toolBufs), usage };
|
|
}
|
|
|
|
function collectToolCalls(bufs: Record<number, { id: string; name: string; args: string }>): OAToolCall[] {
|
|
return Object.values(bufs)
|
|
.filter((b) => b.id && b.name)
|
|
.map((b) => ({
|
|
id: b.id,
|
|
type: "function" as const,
|
|
function: { name: b.name, arguments: b.args || "{}" },
|
|
}));
|
|
}
|