- TD#8 hybrid.ts: rerank_strategy {always|when_top_k_gt|never} + threshold
(default skips rerank for top_k ≤ 15; chat tool uses threshold 10)
- O11 vision.ts + tools.ts: analyze_image_region tool — sharp-crops the
bbox, claude CLI reads the temp PNG via Read tool, Sonnet vision answers
- TD#12 /graph: SigmaGraph replaces ForceGraphCanvas; react-force-graph-2d
uninstalled (-37 transitive deps); force-graph-canvas.tsx deleted
- TD#27 messages/route.ts gatherContext slice sizes via CTX_* env vars
- TD#22 tests/rag/: golden.yaml (15 queries) + run.py (Recall@k + MRR +
negative-pass rate) + baseline.json + CI job in .forgejo/workflows/ci.yml
- docs/adrs/: ADR-001..005 published from systems-atelier deliverables
Verified live on disclosure.top: top_k=5 path skips rerank (6.7s embed-only,
was 12-15s with rerank); rerank=always still available on demand.
First RAG baseline: Recall@5 = 0.2083, MRR = 0.25, Negative pass = 1.0.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
257 lines
11 KiB
TypeScript
257 lines
11 KiB
TypeScript
/**
|
|
* POST /api/sessions/:id/messages — streams the assistant turn over SSE.
|
|
*
|
|
* Flow:
|
|
* 1. Verify session belongs to user; check budget.
|
|
* 2. Insert user message immediately.
|
|
* 3. Build system prompt with current context (doc / page).
|
|
* 4. Load conversation history.
|
|
* 5. Open SSE stream → streamChat() emits text_delta, tool_start, tool_result,
|
|
* navigate, done events.
|
|
* 6. When stream done, persist the final assistant message + tool trace.
|
|
*
|
|
* The response body is `text/event-stream`. The client uses fetch + ReadableStream
|
|
* (see components/chat-bubble.tsx).
|
|
*/
|
|
import { NextResponse } from "next/server";
|
|
import { createClient, isSupabaseConfigured } from "@/lib/supabase/server";
|
|
import { readDocument, readPage } from "@/lib/wiki";
|
|
import { streamChat } from "@/lib/chat";
|
|
import { getLocale } from "@/components/locale-toggle";
|
|
import { withRequest } from "@/lib/logger";
|
|
|
|
/**
|
|
* Context size limits per artifact type. Override at runtime with:
|
|
* CTX_DOC_FRONTMATTER, CTX_DOC_BODY, CTX_PAGE_FRONTMATTER, CTX_PAGE_BODY.
|
|
* W2-TD#27 — was hard-coded 1200 / 1500 / 1500 / 1500. Default sizes raised
|
|
* slightly: the chat prompt has plenty of headroom and richer context up-front
|
|
* means fewer tool calls to re-fetch what the model just truncated.
|
|
*/
|
|
const CTX = {
|
|
doc_frontmatter: Number(process.env.CTX_DOC_FRONTMATTER || 1500),
|
|
doc_body: Number(process.env.CTX_DOC_BODY || 3000),
|
|
page_frontmatter: Number(process.env.CTX_PAGE_FRONTMATTER || 1800),
|
|
page_body: Number(process.env.CTX_PAGE_BODY || 2000),
|
|
} as const;
|
|
|
|
async function gatherContext(docId: string | null, pageId: string | null): Promise<string> {
|
|
const parts: string[] = [];
|
|
if (docId) {
|
|
const d = await readDocument(docId);
|
|
if (d) {
|
|
parts.push(`# Current document: ${docId}\n` +
|
|
`Frontmatter: ${JSON.stringify(d.fm, null, 2).slice(0, CTX.doc_frontmatter)}\n\n` +
|
|
`Body excerpt:\n${d.body.slice(0, CTX.doc_body)}`);
|
|
}
|
|
}
|
|
if (pageId) {
|
|
const [d, p] = pageId.split("/");
|
|
if (d && p) {
|
|
const md = await readPage(d, p);
|
|
if (md) {
|
|
parts.push(`# Current page: ${pageId}\n` +
|
|
`Frontmatter: ${JSON.stringify(md.fm, null, 2).slice(0, CTX.page_frontmatter)}\n\n` +
|
|
`Body excerpt:\n${md.body.slice(0, CTX.page_body)}`);
|
|
}
|
|
}
|
|
}
|
|
return parts.join("\n\n---\n\n");
|
|
}
|
|
|
|
function systemPrompt(context: string): string {
|
|
return `You are Sherlock, lead detective of The Disclosure Bureau — an AI investigation collective with DNA from Holmes, Poirot, Dupin, Locard, Schneier, Tetlock, and Taleb. You analyze the US Department of War's declassified UAP/UFO archive (war.gov/ufo) with rigor and procedural calibration.
|
|
|
|
Your knowledge base is the wiki of 116 declassified documents, 3435 pages, 14681 catalogued entities, with hundreds enriched via WebSearch.
|
|
|
|
CONTEXT IN CURRENT VIEW:
|
|
${context || "(no specific page selected — user is browsing)"}
|
|
|
|
VOCÊ TEM 12 FERRAMENTAS. Use AGRESSIVAMENTE — nunca especule quando retrieval pode responder.
|
|
|
|
🔍 RETRIEVAL PRIMÁRIO (semântico, BM25+dense+rerank sobre chunks):
|
|
- hybrid_search(query, lang?, doc_id?, type?, classification?, ufo_only?, top_k?) — sempre tente primeiro para perguntas de conteúdo
|
|
- read_chunk(doc_id, chunk_id) — texto verbatim completo de um chunk (após hybrid_search)
|
|
- get_page_chunks(doc_id, page) — todos chunks de uma página em ordem
|
|
- list_anomalies(kind, doc_id?, limit?) — chunks com flag UFO ou cryptid (sem precisar embed)
|
|
|
|
🔗 GRAFO (relações entre entidades):
|
|
- entity_neighbors(class, id, filter_classes?, limit?) — top co-mentionadas
|
|
- entity_path(from_class, from_id, to_class, to_id, max_hops?) — caminhos entre 2 entidades
|
|
- co_mention_chunks(a_class, a_id, b_class, b_id, limit?) — chunks onde ambas aparecem
|
|
|
|
📄 CONTEXTUAL (wiki + entidades):
|
|
- read_document(doc_id) — overview do documento (sumário, key entities)
|
|
- read_page(doc_id, page) — vision metadata legado de 1 página
|
|
- read_entity(class, id) — registro completo de entidade (aliases, external_sources)
|
|
- search_corpus(query, scope?) — busca legado só por nome (fallback)
|
|
|
|
🧭 UI:
|
|
- navigate_to(target, label) — botão clicável (ex: /d/<doc>#c0042)
|
|
|
|
═══════ EXEMPLOS DE WORKFLOW ═══════
|
|
|
|
Pergunta: "O que aconteceu em Olathe, Kansas em 1950?"
|
|
→ hybrid_search("Olathe Kansas 1950 avistamento", lang="pt", top_k=5)
|
|
→ pegar melhor chunk_id (digamos c0008 de doc-342)
|
|
→ read_chunk("doc-342-...", "c0008") para texto verbatim
|
|
→ responder citando [[doc-342-.../p001#c0008]]
|
|
→ navigate_to("/d/doc-342-...#c0008", "Ler relato completo")
|
|
|
|
Pergunta: "Quem está conectado a J. Edgar Hoover na investigação?"
|
|
→ entity_neighbors("person", "j-edgar-hoover", limit=10)
|
|
→ pegar top 3 nomes
|
|
→ co_mention_chunks("person", "j-edgar-hoover", "person", "<nome2>") para amostra de conexão
|
|
→ responder listando os 3 com citação chunk_id
|
|
|
|
Pergunta: "Quais avistamentos esféricos existem no corpus?"
|
|
→ list_anomalies(kind="ufo", limit=20) — agrupar por anomaly_type "spherical"
|
|
→ para os 3 mais relevantes: read_chunk verbatim
|
|
→ resposta com 3 citações + navigate_to ao mais notável
|
|
|
|
Pergunta: "Resuma o documento doc-X"
|
|
→ read_document("doc-X") — pega executive_summary
|
|
→ list_anomalies(kind="ufo", doc_id="doc-X") — anomalias específicas
|
|
→ responder + navigate_to V2
|
|
|
|
═══════ REGRAS DE CITAÇÃO ═══════
|
|
|
|
SEMPRE use a forma [[doc-id/p007#c0042]] quando citar — o frontend transforma em CARD CLICÁVEL com:
|
|
- Crop bbox do PNG original (mostra a parte exata do documento)
|
|
- Texto verbatim EN + PT-BR
|
|
- Link pro chunk anchor na página V2
|
|
|
|
NUNCA cite sem chunk_id se você sabe ele. Citações vagas tipo [[doc-id]] são fracas.
|
|
|
|
═══════ DISCIPLINA EPISTÊMICA ═══════
|
|
|
|
Use bandas de confiança Tetlock em claims não-triviais:
|
|
- (high) > 90% — evidência forte, múltiplos chunks confirmam
|
|
- (medium) 60-89% — 1 fonte ou correlação clara
|
|
- (low) 30-59% — inferência razoável, mas frágil
|
|
- (speculation) < 30% — explicitamente rotule como especulação
|
|
|
|
Quando 2 chunks dizem coisas contraditórias, mostre ambos.
|
|
Quando não tem evidência, diga "não há chunks no corpus sobre isso".
|
|
Quando ferramenta retorna error, tente fallback (search_corpus se hybrid_search down).
|
|
|
|
═══════ FORMATO ═══════
|
|
|
|
RESPONDA EM PORTUGUÊS BRASILEIRO (não europeu). Preserve acentos UTF-8.
|
|
Mantenha respostas ≤ 250 palavras a menos que peçam detalhe.
|
|
Quotes verbatim do documento mantêm idioma original (inglês), narração ao redor em PT-BR.`;
|
|
}
|
|
|
|
export async function POST(request: Request, ctx: { params: Promise<{ id: string }> }) {
|
|
const { id: sessionId } = await ctx.params;
|
|
const t0 = Date.now();
|
|
const baseLog = withRequest(request).child({ session_id: sessionId.slice(0, 8) });
|
|
const log = (stage: string, extra: Record<string, unknown> = {}) =>
|
|
baseLog.info({ stage, dt_ms: Date.now() - t0, ...extra }, stage);
|
|
log("POST received");
|
|
|
|
if (!isSupabaseConfigured()) {
|
|
log("auth_disabled");
|
|
return NextResponse.json({ error: "auth_disabled" }, { status: 503 });
|
|
}
|
|
const supabase = await createClient();
|
|
const { data: { user }, error: authError } = await supabase.auth.getUser();
|
|
if (!user) {
|
|
log("unauthenticated", { authError: authError?.message });
|
|
return NextResponse.json({ error: "unauthenticated" }, { status: 401 });
|
|
}
|
|
log("user resolved", { user_id: user.id, email: user.email });
|
|
|
|
const body = (await request.json().catch(() => ({}))) as { content: string };
|
|
if (!body.content?.trim()) {
|
|
log("empty_message", { received: body });
|
|
return NextResponse.json({ error: "empty_message" }, { status: 400 });
|
|
}
|
|
log("body parsed", { content_len: body.content.length });
|
|
|
|
const { data: session, error: sessErr } = await supabase
|
|
.from("chat_sessions").select("*").eq("id", sessionId).maybeSingle();
|
|
if (!session) {
|
|
log("session_not_found", { sessErr: sessErr?.message });
|
|
return NextResponse.json({ error: "not_found" }, { status: 404 });
|
|
}
|
|
log("session ok", { context_doc_id: session.context_doc_id });
|
|
|
|
const { data: budgetOk, error: budgetErr } = await supabase.rpc("check_budget", { p_user_id: user.id });
|
|
log("budget checked", { budgetOk, budgetErr: budgetErr?.message });
|
|
if (budgetOk === false) {
|
|
return NextResponse.json({ error: "budget_exceeded" }, { status: 429 });
|
|
}
|
|
|
|
// Persist the user message before streaming
|
|
const { error: userInsertErr } = await supabase.from("messages").insert({
|
|
session_id: sessionId,
|
|
role: "user",
|
|
content: body.content,
|
|
});
|
|
if (userInsertErr) {
|
|
log("user_msg insert FAILED", { error: userInsertErr.message });
|
|
return NextResponse.json({ error: "persist_failed", message: userInsertErr.message }, { status: 500 });
|
|
}
|
|
log("user_msg persisted");
|
|
|
|
const context = await gatherContext(session.context_doc_id, session.context_page_id);
|
|
|
|
const { data: history } = await supabase
|
|
.from("messages")
|
|
.select("role, content")
|
|
.eq("session_id", sessionId)
|
|
.order("created_at", { ascending: true })
|
|
.limit(20);
|
|
|
|
// Drop the latest user msg from history (it's now in body.content as userTurn)
|
|
const historyTurns = (history ?? [])
|
|
.filter((m) => m.role === "user" || m.role === "assistant")
|
|
.slice(0, -1)
|
|
.map((m) => ({ role: m.role as "user" | "assistant", content: m.content }));
|
|
|
|
const { stream, done } = streamChat({
|
|
system: systemPrompt(context),
|
|
history: historyTurns,
|
|
userTurn: body.content,
|
|
ctx: {
|
|
doc_id: session.context_doc_id,
|
|
page_id: session.context_page_id,
|
|
lang: (await getLocale()) === "en" ? "en" : "pt",
|
|
},
|
|
});
|
|
|
|
// Persist the final assistant message AFTER the stream completes.
|
|
// Note: this runs concurrently with the response — it does NOT block.
|
|
done.then(async (result) => {
|
|
log("stream done", {
|
|
content_len: result.content.length,
|
|
tool_calls: result.toolCalls.length,
|
|
artifacts: result.artifacts?.length ?? 0,
|
|
tokens: `${result.tokensIn}/${result.tokensOut}`,
|
|
});
|
|
const { error: insertErr } = await supabase.from("messages").insert({
|
|
session_id: sessionId,
|
|
role: "assistant",
|
|
content: result.content,
|
|
model: `openrouter:${result.model}`,
|
|
tokens_in: result.tokensIn || null,
|
|
tokens_out: result.tokensOut || null,
|
|
cost_usd: 0,
|
|
tool_calls: result.toolCalls.length > 0 ? result.toolCalls : null,
|
|
citations: result.artifacts && result.artifacts.length > 0 ? result.artifacts : null,
|
|
});
|
|
if (insertErr) log("assistant insert FAILED", { error: insertErr.message });
|
|
else log("assistant persisted");
|
|
}).catch((e) => {
|
|
log("STREAM ERROR", { error: e instanceof Error ? e.message : String(e) });
|
|
});
|
|
|
|
return new Response(stream, {
|
|
headers: {
|
|
"Content-Type": "text/event-stream",
|
|
"Cache-Control": "no-cache, no-transform",
|
|
"Connection": "keep-alive",
|
|
"X-Accel-Buffering": "no",
|
|
},
|
|
});
|
|
}
|