disclosure-bureau/web/app/api/sessions/[id]/messages/route.ts

/**
 * POST /api/sessions/:id/messages — streams the assistant turn over SSE.
 *
 * Flow:
 *   1. Verify session belongs to user; check budget.
 *   2. Insert user message immediately.
 *   3. Build system prompt with current context (doc / page).
 *   4. Load conversation history.
 *   5. Open SSE stream → streamChat() emits text_delta, tool_start, tool_result,
 *      navigate, done events.
 *   6. When stream done, persist the final assistant message + tool trace.
 *
 * The response body is `text/event-stream`. The client uses fetch + ReadableStream
 * (see components/chat-bubble.tsx).
 */
import { NextResponse } from "next/server";
import { createClient, isSupabaseConfigured } from "@/lib/supabase/server";
import { readDocument, readPage } from "@/lib/wiki";
import { streamChat } from "@/lib/chat";
import { getLocale } from "@/components/locale-toggle";
import { withRequest } from "@/lib/logger";

/**
 * Context size limits per artifact type. Override at runtime with:
 *   CTX_DOC_FRONTMATTER, CTX_DOC_BODY, CTX_PAGE_FRONTMATTER, CTX_PAGE_BODY.
 * W2-TD#27 — was hard-coded 1200 / 1500 / 1500 / 1500. Default sizes raised
 * slightly: the chat prompt has plenty of headroom and richer context up-front
 * means fewer tool calls to re-fetch what the model just truncated.
 */
const CTX = {
  doc_frontmatter: Number(process.env.CTX_DOC_FRONTMATTER || 1500),
  doc_body:        Number(process.env.CTX_DOC_BODY        || 3000),
  page_frontmatter: Number(process.env.CTX_PAGE_FRONTMATTER || 1800),
  page_body:        Number(process.env.CTX_PAGE_BODY        || 2000),
} as const;

async function gatherContext(docId: string | null, pageId: string | null): Promise<string> {
  const parts: string[] = [];
  if (docId) {
    const d = await readDocument(docId);
    if (d) {
      parts.push(`# Current document: ${docId}\n` +
                 `Frontmatter: ${JSON.stringify(d.fm, null, 2).slice(0, CTX.doc_frontmatter)}\n\n` +
                 `Body excerpt:\n${d.body.slice(0, CTX.doc_body)}`);
    }
  }
  if (pageId) {
    const [d, p] = pageId.split("/");
    if (d && p) {
      const md = await readPage(d, p);
      if (md) {
        parts.push(`# Current page: ${pageId}\n` +
                   `Frontmatter: ${JSON.stringify(md.fm, null, 2).slice(0, CTX.page_frontmatter)}\n\n` +
                   `Body excerpt:\n${md.body.slice(0, CTX.page_body)}`);
      }
    }
  }
  return parts.join("\n\n---\n\n");
}

function systemPrompt(context: string): string {
  return `You are Sherlock, lead detective of The Disclosure Bureau — an AI investigation collective with DNA from Holmes, Poirot, Dupin, Locard, Schneier, Tetlock, and Taleb. You analyze the US Department of War's declassified UAP/UFO archive (war.gov/ufo) with rigor and procedural calibration.

Your knowledge base is the wiki of 116 declassified documents, 3435 pages, 14681 catalogued entities, with hundreds enriched via WebSearch.

CONTEXT IN CURRENT VIEW:
${context || "(no specific page selected — user is browsing)"}

VOCÊ TEM 12 FERRAMENTAS. Use AGRESSIVAMENTE — nunca especule quando retrieval pode responder.

🔍 RETRIEVAL PRIMÁRIO (semântico, BM25+dense+rerank sobre chunks):
- hybrid_search(query, lang?, doc_id?, type?, classification?, ufo_only?, top_k?) — sempre tente primeiro para perguntas de conteúdo
- read_chunk(doc_id, chunk_id) — texto verbatim completo de um chunk (após hybrid_search)
- get_page_chunks(doc_id, page) — todos chunks de uma página em ordem
- list_anomalies(kind, doc_id?, limit?) — chunks com flag UFO ou cryptid (sem precisar embed)

🔗 GRAFO (relações entre entidades):
- entity_neighbors(class, id, filter_classes?, limit?) — top co-mentionadas
- entity_path(from_class, from_id, to_class, to_id, max_hops?) — caminhos entre 2 entidades
- co_mention_chunks(a_class, a_id, b_class, b_id, limit?) — chunks onde ambas aparecem

📄 CONTEXTUAL (wiki + entidades):
- read_document(doc_id) — overview do documento (sumário, key entities)
- read_page(doc_id, page) — vision metadata legado de 1 página
- read_entity(class, id) — registro completo de entidade (aliases, external_sources)
- search_corpus(query, scope?) — busca legado só por nome (fallback)

🧭 UI:
- navigate_to(target, label) — botão clicável (ex: /d/<doc>#c0042)

═══════ EXEMPLOS DE WORKFLOW ═══════

Pergunta: "O que aconteceu em Olathe, Kansas em 1950?"
→ hybrid_search("Olathe Kansas 1950 avistamento", lang="pt", top_k=5)
→ pegar melhor chunk_id (digamos c0008 de doc-342)
→ read_chunk("doc-342-...", "c0008") para texto verbatim
→ responder citando [[doc-342-.../p001#c0008]]
→ navigate_to("/d/doc-342-...#c0008", "Ler relato completo")

Pergunta: "Quem está conectado a J. Edgar Hoover na investigação?"
→ entity_neighbors("person", "j-edgar-hoover", limit=10)
→ pegar top 3 nomes
→ co_mention_chunks("person", "j-edgar-hoover", "person", "<nome2>") para amostra de conexão
→ responder listando os 3 com citação chunk_id

Pergunta: "Quais avistamentos esféricos existem no corpus?"
→ list_anomalies(kind="ufo", limit=20) — agrupar por anomaly_type "spherical"
→ para os 3 mais relevantes: read_chunk verbatim
→ resposta com 3 citações + navigate_to ao mais notável

Pergunta: "Resuma o documento doc-X"
→ read_document("doc-X") — pega executive_summary
→ list_anomalies(kind="ufo", doc_id="doc-X") — anomalias específicas
→ responder + navigate_to V2

═══════ REGRAS DE CITAÇÃO ═══════

SEMPRE use a forma [[doc-id/p007#c0042]] quando citar — o frontend transforma em CARD CLICÁVEL com:
- Crop bbox do PNG original (mostra a parte exata do documento)
- Texto verbatim EN + PT-BR
- Link pro chunk anchor na página V2

NUNCA cite sem chunk_id se você sabe ele. Citações vagas tipo [[doc-id]] são fracas.

═══════ DISCIPLINA EPISTÊMICA ═══════

Use bandas de confiança Tetlock em claims não-triviais:
- (high) > 90% — evidência forte, múltiplos chunks confirmam
- (medium) 60-89% — 1 fonte ou correlação clara
- (low) 30-59% — inferência razoável, mas frágil
- (speculation) < 30% — explicitamente rotule como especulação

Quando 2 chunks dizem coisas contraditórias, mostre ambos.
Quando não tem evidência, diga "não há chunks no corpus sobre isso".
Quando ferramenta retorna error, tente fallback (search_corpus se hybrid_search down).

═══════ FORMATO ═══════

RESPONDA EM PORTUGUÊS BRASILEIRO (não europeu). Preserve acentos UTF-8.
Mantenha respostas ≤ 250 palavras a menos que peçam detalhe.
Quotes verbatim do documento mantêm idioma original (inglês), narração ao redor em PT-BR.`;
}

export async function POST(request: Request, ctx: { params: Promise<{ id: string }> }) {
  const { id: sessionId } = await ctx.params;
  const t0 = Date.now();
  const baseLog = withRequest(request).child({ session_id: sessionId.slice(0, 8) });
  const log = (stage: string, extra: Record<string, unknown> = {}) =>
    baseLog.info({ stage, dt_ms: Date.now() - t0, ...extra }, stage);
  log("POST received");

  if (!isSupabaseConfigured()) {
    log("auth_disabled");
    return NextResponse.json({ error: "auth_disabled" }, { status: 503 });
  }
  const supabase = await createClient();
  const { data: { user }, error: authError } = await supabase.auth.getUser();
  if (!user) {
    log("unauthenticated", { authError: authError?.message });
    return NextResponse.json({ error: "unauthenticated" }, { status: 401 });
  }
  log("user resolved", { user_id: user.id, email: user.email });

  const body = (await request.json().catch(() => ({}))) as { content: string };
  if (!body.content?.trim()) {
    log("empty_message", { received: body });
    return NextResponse.json({ error: "empty_message" }, { status: 400 });
  }
  log("body parsed", { content_len: body.content.length });

  const { data: session, error: sessErr } = await supabase
    .from("chat_sessions").select("*").eq("id", sessionId).maybeSingle();
  if (!session) {
    log("session_not_found", { sessErr: sessErr?.message });
    return NextResponse.json({ error: "not_found" }, { status: 404 });
  }
  log("session ok", { context_doc_id: session.context_doc_id });

  const { data: budgetOk, error: budgetErr } = await supabase.rpc("check_budget", { p_user_id: user.id });
  log("budget checked", { budgetOk, budgetErr: budgetErr?.message });
  if (budgetOk === false) {
    return NextResponse.json({ error: "budget_exceeded" }, { status: 429 });
  }

  // Persist the user message before streaming
  const { error: userInsertErr } = await supabase.from("messages").insert({
    session_id: sessionId,
    role: "user",
    content: body.content,
  });
  if (userInsertErr) {
    log("user_msg insert FAILED", { error: userInsertErr.message });
    return NextResponse.json({ error: "persist_failed", message: userInsertErr.message }, { status: 500 });
  }
  log("user_msg persisted");

  const context = await gatherContext(session.context_doc_id, session.context_page_id);

  const { data: history } = await supabase
    .from("messages")
    .select("role, content")
    .eq("session_id", sessionId)
    .order("created_at", { ascending: true })
    .limit(20);

  // Drop the latest user msg from history (it's now in body.content as userTurn)
  const historyTurns = (history ?? [])
    .filter((m) => m.role === "user" || m.role === "assistant")
    .slice(0, -1)
    .map((m) => ({ role: m.role as "user" | "assistant", content: m.content }));

  const { stream, done } = streamChat({
    system: systemPrompt(context),
    history: historyTurns,
    userTurn: body.content,
    ctx: {
      doc_id: session.context_doc_id,
      page_id: session.context_page_id,
      lang: (await getLocale()) === "en" ? "en" : "pt",
    },
  });

  // Persist the final assistant message AFTER the stream completes.
  // Note: this runs concurrently with the response — it does NOT block.
  done.then(async (result) => {
    log("stream done", {
      content_len: result.content.length,
      tool_calls: result.toolCalls.length,
      artifacts: result.artifacts?.length ?? 0,
      tokens: `${result.tokensIn}/${result.tokensOut}`,
    });
    const { error: insertErr } = await supabase.from("messages").insert({
      session_id: sessionId,
      role: "assistant",
      content: result.content,
      model: `openrouter:${result.model}`,
      tokens_in: result.tokensIn || null,
      tokens_out: result.tokensOut || null,
      cost_usd: 0,
      tool_calls: result.toolCalls.length > 0 ? result.toolCalls : null,
      citations: result.artifacts && result.artifacts.length > 0 ? result.artifacts : null,
    });
    if (insertErr) log("assistant insert FAILED", { error: insertErr.message });
    else log("assistant persisted");
  }).catch((e) => {
    log("STREAM ERROR", { error: e instanceof Error ? e.message : String(e) });
  });

  return new Response(stream, {
    headers: {
      "Content-Type": "text/event-stream",
      "Cache-Control": "no-cache, no-transform",
      "Connection": "keep-alive",
      "X-Accel-Buffering": "no",
    },
  });
}