/** * POST /api/sessions/:id/messages — streams the assistant turn over SSE. * * Flow: * 1. Verify session belongs to user; check budget. * 2. Insert user message immediately. * 3. Build system prompt with current context (doc / page). * 4. Load conversation history. * 5. Open SSE stream → streamChat() emits text_delta, tool_start, tool_result, * navigate, done events. * 6. When stream done, persist the final assistant message + tool trace. * * The response body is `text/event-stream`. The client uses fetch + ReadableStream * (see components/chat-bubble.tsx). */ import { NextResponse } from "next/server"; import { createClient, isSupabaseConfigured } from "@/lib/supabase/server"; import { readDocument, readPage } from "@/lib/wiki"; import { streamChat } from "@/lib/chat"; import { getLocale } from "@/components/locale-toggle"; import { withRequest } from "@/lib/logger"; /** * Context size limits per artifact type. Override at runtime with: * CTX_DOC_FRONTMATTER, CTX_DOC_BODY, CTX_PAGE_FRONTMATTER, CTX_PAGE_BODY. * W2-TD#27 — was hard-coded 1200 / 1500 / 1500 / 1500. Default sizes raised * slightly: the chat prompt has plenty of headroom and richer context up-front * means fewer tool calls to re-fetch what the model just truncated. */ const CTX = { doc_frontmatter: Number(process.env.CTX_DOC_FRONTMATTER || 1500), doc_body: Number(process.env.CTX_DOC_BODY || 3000), page_frontmatter: Number(process.env.CTX_PAGE_FRONTMATTER || 1800), page_body: Number(process.env.CTX_PAGE_BODY || 2000), } as const; async function gatherContext(docId: string | null, pageId: string | null): Promise { const parts: string[] = []; if (docId) { const d = await readDocument(docId); if (d) { parts.push(`# Current document: ${docId}\n` + `Frontmatter: ${JSON.stringify(d.fm, null, 2).slice(0, CTX.doc_frontmatter)}\n\n` + `Body excerpt:\n${d.body.slice(0, CTX.doc_body)}`); } } if (pageId) { const [d, p] = pageId.split("/"); if (d && p) { const md = await readPage(d, p); if (md) { parts.push(`# Current page: ${pageId}\n` + `Frontmatter: ${JSON.stringify(md.fm, null, 2).slice(0, CTX.page_frontmatter)}\n\n` + `Body excerpt:\n${md.body.slice(0, CTX.page_body)}`); } } } return parts.join("\n\n---\n\n"); } function systemPrompt(context: string): string { return `You are Sherlock, lead detective of The Disclosure Bureau — an AI investigation collective with DNA from Holmes, Poirot, Dupin, Locard, Schneier, Tetlock, and Taleb. You analyze the US Department of War's declassified UAP/UFO archive (war.gov/ufo) with rigor and procedural calibration. Your knowledge base is the wiki of 116 declassified documents, 3435 pages, 14681 catalogued entities, with hundreds enriched via WebSearch. CONTEXT IN CURRENT VIEW: ${context || "(no specific page selected — user is browsing)"} VOCÊ TEM 12 FERRAMENTAS. Use AGRESSIVAMENTE — nunca especule quando retrieval pode responder. 🔍 RETRIEVAL PRIMÁRIO (semântico, BM25+dense+rerank sobre chunks): - hybrid_search(query, lang?, doc_id?, type?, classification?, ufo_only?, top_k?) — sempre tente primeiro para perguntas de conteúdo - read_chunk(doc_id, chunk_id) — texto verbatim completo de um chunk (após hybrid_search) - get_page_chunks(doc_id, page) — todos chunks de uma página em ordem - list_anomalies(kind, doc_id?, limit?) — chunks com flag UFO ou cryptid (sem precisar embed) 🔗 GRAFO (relações entre entidades): - entity_neighbors(class, id, filter_classes?, limit?) — top co-mentionadas - entity_path(from_class, from_id, to_class, to_id, max_hops?) — caminhos entre 2 entidades - co_mention_chunks(a_class, a_id, b_class, b_id, limit?) — chunks onde ambas aparecem 📄 CONTEXTUAL (wiki + entidades): - read_document(doc_id) — overview do documento (sumário, key entities) - read_page(doc_id, page) — vision metadata legado de 1 página - read_entity(class, id) — registro completo de entidade (aliases, external_sources) - search_corpus(query, scope?) — busca legado só por nome (fallback) 🧭 UI: - navigate_to(target, label) — botão clicável (ex: /d/#c0042) ═══════ EXEMPLOS DE WORKFLOW ═══════ Pergunta: "O que aconteceu em Olathe, Kansas em 1950?" → hybrid_search("Olathe Kansas 1950 avistamento", lang="pt", top_k=5) → pegar melhor chunk_id (digamos c0008 de doc-342) → read_chunk("doc-342-...", "c0008") para texto verbatim → responder citando [[doc-342-.../p001#c0008]] → navigate_to("/d/doc-342-...#c0008", "Ler relato completo") Pergunta: "Quem está conectado a J. Edgar Hoover na investigação?" → entity_neighbors("person", "j-edgar-hoover", limit=10) → pegar top 3 nomes → co_mention_chunks("person", "j-edgar-hoover", "person", "") para amostra de conexão → responder listando os 3 com citação chunk_id Pergunta: "Quais avistamentos esféricos existem no corpus?" → list_anomalies(kind="ufo", limit=20) — agrupar por anomaly_type "spherical" → para os 3 mais relevantes: read_chunk verbatim → resposta com 3 citações + navigate_to ao mais notável Pergunta: "Resuma o documento doc-X" → read_document("doc-X") — pega executive_summary → list_anomalies(kind="ufo", doc_id="doc-X") — anomalias específicas → responder + navigate_to V2 ═══════ REGRAS DE CITAÇÃO ═══════ SEMPRE use a forma [[doc-id/p007#c0042]] quando citar — o frontend transforma em CARD CLICÁVEL com: - Crop bbox do PNG original (mostra a parte exata do documento) - Texto verbatim EN + PT-BR - Link pro chunk anchor na página V2 NUNCA cite sem chunk_id se você sabe ele. Citações vagas tipo [[doc-id]] são fracas. ═══════ DISCIPLINA EPISTÊMICA ═══════ Use bandas de confiança Tetlock em claims não-triviais: - (high) > 90% — evidência forte, múltiplos chunks confirmam - (medium) 60-89% — 1 fonte ou correlação clara - (low) 30-59% — inferência razoável, mas frágil - (speculation) < 30% — explicitamente rotule como especulação Quando 2 chunks dizem coisas contraditórias, mostre ambos. Quando não tem evidência, diga "não há chunks no corpus sobre isso". Quando ferramenta retorna error, tente fallback (search_corpus se hybrid_search down). ═══════ FORMATO ═══════ RESPONDA EM PORTUGUÊS BRASILEIRO (não europeu). Preserve acentos UTF-8. Mantenha respostas ≤ 250 palavras a menos que peçam detalhe. Quotes verbatim do documento mantêm idioma original (inglês), narração ao redor em PT-BR.`; } export async function POST(request: Request, ctx: { params: Promise<{ id: string }> }) { const { id: sessionId } = await ctx.params; const t0 = Date.now(); const baseLog = withRequest(request).child({ session_id: sessionId.slice(0, 8) }); const log = (stage: string, extra: Record = {}) => baseLog.info({ stage, dt_ms: Date.now() - t0, ...extra }, stage); log("POST received"); if (!isSupabaseConfigured()) { log("auth_disabled"); return NextResponse.json({ error: "auth_disabled" }, { status: 503 }); } const supabase = await createClient(); const { data: { user }, error: authError } = await supabase.auth.getUser(); if (!user) { log("unauthenticated", { authError: authError?.message }); return NextResponse.json({ error: "unauthenticated" }, { status: 401 }); } log("user resolved", { user_id: user.id, email: user.email }); const body = (await request.json().catch(() => ({}))) as { content: string }; if (!body.content?.trim()) { log("empty_message", { received: body }); return NextResponse.json({ error: "empty_message" }, { status: 400 }); } log("body parsed", { content_len: body.content.length }); const { data: session, error: sessErr } = await supabase .from("chat_sessions").select("*").eq("id", sessionId).maybeSingle(); if (!session) { log("session_not_found", { sessErr: sessErr?.message }); return NextResponse.json({ error: "not_found" }, { status: 404 }); } log("session ok", { context_doc_id: session.context_doc_id }); const { data: budgetOk, error: budgetErr } = await supabase.rpc("check_budget", { p_user_id: user.id }); log("budget checked", { budgetOk, budgetErr: budgetErr?.message }); if (budgetOk === false) { return NextResponse.json({ error: "budget_exceeded" }, { status: 429 }); } // Persist the user message before streaming const { error: userInsertErr } = await supabase.from("messages").insert({ session_id: sessionId, role: "user", content: body.content, }); if (userInsertErr) { log("user_msg insert FAILED", { error: userInsertErr.message }); return NextResponse.json({ error: "persist_failed", message: userInsertErr.message }, { status: 500 }); } log("user_msg persisted"); const context = await gatherContext(session.context_doc_id, session.context_page_id); const { data: history } = await supabase .from("messages") .select("role, content") .eq("session_id", sessionId) .order("created_at", { ascending: true }) .limit(20); // Drop the latest user msg from history (it's now in body.content as userTurn) const historyTurns = (history ?? []) .filter((m) => m.role === "user" || m.role === "assistant") .slice(0, -1) .map((m) => ({ role: m.role as "user" | "assistant", content: m.content })); const { stream, done } = streamChat({ system: systemPrompt(context), history: historyTurns, userTurn: body.content, ctx: { doc_id: session.context_doc_id, page_id: session.context_page_id, lang: (await getLocale()) === "en" ? "en" : "pt", }, }); // Persist the final assistant message AFTER the stream completes. // Note: this runs concurrently with the response — it does NOT block. done.then(async (result) => { log("stream done", { content_len: result.content.length, tool_calls: result.toolCalls.length, artifacts: result.artifacts?.length ?? 0, tokens: `${result.tokensIn}/${result.tokensOut}`, }); const { error: insertErr } = await supabase.from("messages").insert({ session_id: sessionId, role: "assistant", content: result.content, model: `openrouter:${result.model}`, tokens_in: result.tokensIn || null, tokens_out: result.tokensOut || null, cost_usd: 0, tool_calls: result.toolCalls.length > 0 ? result.toolCalls : null, citations: result.artifacts && result.artifacts.length > 0 ? result.artifacts : null, }); if (insertErr) log("assistant insert FAILED", { error: insertErr.message }); else log("assistant persisted"); }).catch((e) => { log("STREAM ERROR", { error: e instanceof Error ? e.message : String(e) }); }); return new Response(stream, { headers: { "Content-Type": "text/event-stream", "Cache-Control": "no-cache, no-transform", "Connection": "keep-alive", "X-Accel-Buffering": "no", }, }); }