From 9889308bf4203fdbcc53835ae2912496fc2b264e Mon Sep 17 00:00:00 2001 From: guto Date: Mon, 18 May 2026 15:39:46 -0300 Subject: [PATCH] fix chat: force synthesis pass + fix ambiguous-column trigger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs combined to make the chat reply with only cards and no prose: 1. SQL trigger rollup_session_stats was failing with "column reference total_cost_usd is ambiguous" because the UPDATE on public.profiles had a FROM public.chat_sessions clause and both tables expose that column. Persistence of every user message died at this point — sessions were created in the DB but had message_count=0 forever. Applied SQL fix that qualifies columns with p./s. aliases (production DB updated; ALTER FUNCTION run live, not yet codified in a migration file). 2. The free-tier model (nemotron-3-super:free) spent all 5 tool-loop turns on hybrid_search calls and never wrote any prose, returning content_len=0. Added a forced-synthesis pass in openrouter.ts: when the loop exits with empty assembledText but the model did call tools, we send ONE final turn with tools omitted from the request payload and a user message instructing the model to answer in 3-8 sentences citing chunks. openrouterStreamCall now accepts a `withTools` opt so the synthesis call can disable tool calling entirely. Verified end-to-end with the actual user query "O que os astronautas viram? Quem foi que viu?" on /d/nasa-uap-d6-apollo-17-...: - content_len: 0 → 947 chars (real synthesis citing Schmitt) - artifacts: 44 preserved - assistant message persisted with tool_calls + citations columns --- web/app/api/sessions/[id]/messages/route.ts | 45 +++++++++++++++++---- web/lib/chat/openrouter.ts | 44 ++++++++++++++++++-- 2 files changed, 77 insertions(+), 12 deletions(-) diff --git a/web/app/api/sessions/[id]/messages/route.ts b/web/app/api/sessions/[id]/messages/route.ts index 0a7c1c0..922dd95 100644 --- a/web/app/api/sessions/[id]/messages/route.ts +++ b/web/app/api/sessions/[id]/messages/route.ts @@ -128,34 +128,55 @@ Quotes verbatim do documento mantêm idioma original (inglês), narração ao re export async function POST(request: Request, ctx: { params: Promise<{ id: string }> }) { const { id: sessionId } = await ctx.params; + const t0 = Date.now(); + const log = (stage: string, extra: Record = {}) => + console.log(`[chat ${sessionId.slice(0, 8)}] ${stage}`, { dt: Date.now() - t0, ...extra }); + log("POST received"); if (!isSupabaseConfigured()) { + log("auth_disabled"); return NextResponse.json({ error: "auth_disabled" }, { status: 503 }); } const supabase = await createClient(); - const { data: { user } } = await supabase.auth.getUser(); - if (!user) return NextResponse.json({ error: "unauthenticated" }, { status: 401 }); + const { data: { user }, error: authError } = await supabase.auth.getUser(); + if (!user) { + log("unauthenticated", { authError: authError?.message }); + return NextResponse.json({ error: "unauthenticated" }, { status: 401 }); + } + log("user resolved", { user_id: user.id, email: user.email }); const body = (await request.json().catch(() => ({}))) as { content: string }; if (!body.content?.trim()) { + log("empty_message", { received: body }); return NextResponse.json({ error: "empty_message" }, { status: 400 }); } + log("body parsed", { content_len: body.content.length }); - const { data: session } = await supabase + const { data: session, error: sessErr } = await supabase .from("chat_sessions").select("*").eq("id", sessionId).maybeSingle(); - if (!session) return NextResponse.json({ error: "not_found" }, { status: 404 }); + if (!session) { + log("session_not_found", { sessErr: sessErr?.message }); + return NextResponse.json({ error: "not_found" }, { status: 404 }); + } + log("session ok", { context_doc_id: session.context_doc_id }); - const { data: budgetOk } = await supabase.rpc("check_budget", { p_user_id: user.id }); + const { data: budgetOk, error: budgetErr } = await supabase.rpc("check_budget", { p_user_id: user.id }); + log("budget checked", { budgetOk, budgetErr: budgetErr?.message }); if (budgetOk === false) { return NextResponse.json({ error: "budget_exceeded" }, { status: 429 }); } // Persist the user message before streaming - await supabase.from("messages").insert({ + const { error: userInsertErr } = await supabase.from("messages").insert({ session_id: sessionId, role: "user", content: body.content, }); + if (userInsertErr) { + log("user_msg insert FAILED", { error: userInsertErr.message }); + return NextResponse.json({ error: "persist_failed", message: userInsertErr.message }, { status: 500 }); + } + log("user_msg persisted"); const context = await gatherContext(session.context_doc_id, session.context_page_id); @@ -186,7 +207,13 @@ export async function POST(request: Request, ctx: { params: Promise<{ id: string // Persist the final assistant message AFTER the stream completes. // Note: this runs concurrently with the response — it does NOT block. done.then(async (result) => { - await supabase.from("messages").insert({ + log("stream done", { + content_len: result.content.length, + tool_calls: result.toolCalls.length, + artifacts: result.artifacts?.length ?? 0, + tokens: `${result.tokensIn}/${result.tokensOut}`, + }); + const { error: insertErr } = await supabase.from("messages").insert({ session_id: sessionId, role: "assistant", content: result.content, @@ -197,8 +224,10 @@ export async function POST(request: Request, ctx: { params: Promise<{ id: string tool_calls: result.toolCalls.length > 0 ? result.toolCalls : null, citations: result.artifacts && result.artifacts.length > 0 ? result.artifacts : null, }); + if (insertErr) log("assistant insert FAILED", { error: insertErr.message }); + else log("assistant persisted"); }).catch((e) => { - console.error("[chat] persist failed:", e); + log("STREAM ERROR", { error: e instanceof Error ? e.message : String(e) }); }); return new Response(stream, { diff --git a/web/lib/chat/openrouter.ts b/web/lib/chat/openrouter.ts index 0e89297..71824e2 100644 --- a/web/lib/chat/openrouter.ts +++ b/web/lib/chat/openrouter.ts @@ -245,6 +245,35 @@ export async function streamWithTools( break; } + // Forced synthesis: free-tier models often exhaust the tool-call budget + // without ever producing prose, returning content_len=0. If the loop ended + // with empty text but the model did call tools, force ONE more turn without + // tools so the model must answer in plain text. + if (!assembledText.trim() && toolTrace.length > 0) { + cb.emit({ + type: "text_delta", + delta: "", + }); + messages.push({ + role: "user", + content: + "Com base nas ferramentas que você acabou de chamar e nos resultados acima, " + + "responda agora ao usuário EM TEXTO (3-8 frases), em português brasileiro. " + + "Cite os chunks no formato [[doc-id/p007#c0042]]. Não chame mais nenhuma ferramenta.", + }); + try { + const res = await openrouterStreamCall(messages, modelUsed, { withTools: false }); + if (res.body) { + const final = await readSSE(res.body, cb); + assembledText += final.roundText; + totalIn += final.usage?.prompt_tokens ?? 0; + totalOut += final.usage?.completion_tokens ?? 0; + } + } catch (e) { + cb.emit({ type: "error", message: `synthesis failed: ${e instanceof Error ? e.message : String(e)}` }); + } + } + return { content: assembledText, model: modelUsed, @@ -255,15 +284,22 @@ export async function streamWithTools( }; } -async function openrouterStreamCall(messages: OAMsg[], model: string): Promise { - const body = { +async function openrouterStreamCall( + messages: OAMsg[], + model: string, + opts: { withTools?: boolean } = {}, +): Promise { + const withTools = opts.withTools !== false; + const body: Record = { model, messages, - tools: TOOL_DEFINITIONS, - tool_choice: "auto", stream: true, max_tokens: 1024, }; + if (withTools) { + body.tools = TOOL_DEFINITIONS; + body.tool_choice = "auto"; + } const res = await fetch(ENDPOINT, { method: "POST", headers: headers(),