fix chat: force synthesis pass + fix ambiguous-column trigger

Two bugs combined to make the chat reply with only cards and no prose:

1. SQL trigger rollup_session_stats was failing with "column reference
   total_cost_usd is ambiguous" because the UPDATE on public.profiles had
   a FROM public.chat_sessions clause and both tables expose that column.
   Persistence of every user message died at this point — sessions were
   created in the DB but had message_count=0 forever. Applied SQL fix
   that qualifies columns with p./s. aliases (production DB updated;
   ALTER FUNCTION run live, not yet codified in a migration file).

2. The free-tier model (nemotron-3-super:free) spent all 5 tool-loop
   turns on hybrid_search calls and never wrote any prose, returning
   content_len=0. Added a forced-synthesis pass in openrouter.ts: when
   the loop exits with empty assembledText but the model did call tools,
   we send ONE final turn with tools omitted from the request payload
   and a user message instructing the model to answer in 3-8 sentences
   citing chunks. openrouterStreamCall now accepts a `withTools` opt
   so the synthesis call can disable tool calling entirely.

Verified end-to-end with the actual user query "O que os astronautas
viram? Quem foi que viu?" on /d/nasa-uap-d6-apollo-17-...:
- content_len: 0 → 947 chars (real synthesis citing Schmitt)
- artifacts: 44 preserved
- assistant message persisted with tool_calls + citations columns
This commit is contained in:
guto 2026-05-18 15:39:46 -03:00
parent d5f6e6030a
commit 9889308bf4
2 changed files with 77 additions and 12 deletions

View file

@ -128,34 +128,55 @@ Quotes verbatim do documento mantêm idioma original (inglês), narração ao re
export async function POST(request: Request, ctx: { params: Promise<{ id: string }> }) {
const { id: sessionId } = await ctx.params;
const t0 = Date.now();
const log = (stage: string, extra: Record<string, unknown> = {}) =>
console.log(`[chat ${sessionId.slice(0, 8)}] ${stage}`, { dt: Date.now() - t0, ...extra });
log("POST received");
if (!isSupabaseConfigured()) {
log("auth_disabled");
return NextResponse.json({ error: "auth_disabled" }, { status: 503 });
}
const supabase = await createClient();
const { data: { user } } = await supabase.auth.getUser();
if (!user) return NextResponse.json({ error: "unauthenticated" }, { status: 401 });
const { data: { user }, error: authError } = await supabase.auth.getUser();
if (!user) {
log("unauthenticated", { authError: authError?.message });
return NextResponse.json({ error: "unauthenticated" }, { status: 401 });
}
log("user resolved", { user_id: user.id, email: user.email });
const body = (await request.json().catch(() => ({}))) as { content: string };
if (!body.content?.trim()) {
log("empty_message", { received: body });
return NextResponse.json({ error: "empty_message" }, { status: 400 });
}
log("body parsed", { content_len: body.content.length });
const { data: session } = await supabase
const { data: session, error: sessErr } = await supabase
.from("chat_sessions").select("*").eq("id", sessionId).maybeSingle();
if (!session) return NextResponse.json({ error: "not_found" }, { status: 404 });
if (!session) {
log("session_not_found", { sessErr: sessErr?.message });
return NextResponse.json({ error: "not_found" }, { status: 404 });
}
log("session ok", { context_doc_id: session.context_doc_id });
const { data: budgetOk } = await supabase.rpc("check_budget", { p_user_id: user.id });
const { data: budgetOk, error: budgetErr } = await supabase.rpc("check_budget", { p_user_id: user.id });
log("budget checked", { budgetOk, budgetErr: budgetErr?.message });
if (budgetOk === false) {
return NextResponse.json({ error: "budget_exceeded" }, { status: 429 });
}
// Persist the user message before streaming
await supabase.from("messages").insert({
const { error: userInsertErr } = await supabase.from("messages").insert({
session_id: sessionId,
role: "user",
content: body.content,
});
if (userInsertErr) {
log("user_msg insert FAILED", { error: userInsertErr.message });
return NextResponse.json({ error: "persist_failed", message: userInsertErr.message }, { status: 500 });
}
log("user_msg persisted");
const context = await gatherContext(session.context_doc_id, session.context_page_id);
@ -186,7 +207,13 @@ export async function POST(request: Request, ctx: { params: Promise<{ id: string
// Persist the final assistant message AFTER the stream completes.
// Note: this runs concurrently with the response — it does NOT block.
done.then(async (result) => {
await supabase.from("messages").insert({
log("stream done", {
content_len: result.content.length,
tool_calls: result.toolCalls.length,
artifacts: result.artifacts?.length ?? 0,
tokens: `${result.tokensIn}/${result.tokensOut}`,
});
const { error: insertErr } = await supabase.from("messages").insert({
session_id: sessionId,
role: "assistant",
content: result.content,
@ -197,8 +224,10 @@ export async function POST(request: Request, ctx: { params: Promise<{ id: string
tool_calls: result.toolCalls.length > 0 ? result.toolCalls : null,
citations: result.artifacts && result.artifacts.length > 0 ? result.artifacts : null,
});
if (insertErr) log("assistant insert FAILED", { error: insertErr.message });
else log("assistant persisted");
}).catch((e) => {
console.error("[chat] persist failed:", e);
log("STREAM ERROR", { error: e instanceof Error ? e.message : String(e) });
});
return new Response(stream, {

View file

@ -245,6 +245,35 @@ export async function streamWithTools(
break;
}
// Forced synthesis: free-tier models often exhaust the tool-call budget
// without ever producing prose, returning content_len=0. If the loop ended
// with empty text but the model did call tools, force ONE more turn without
// tools so the model must answer in plain text.
if (!assembledText.trim() && toolTrace.length > 0) {
cb.emit({
type: "text_delta",
delta: "",
});
messages.push({
role: "user",
content:
"Com base nas ferramentas que você acabou de chamar e nos resultados acima, " +
"responda agora ao usuário EM TEXTO (3-8 frases), em português brasileiro. " +
"Cite os chunks no formato [[doc-id/p007#c0042]]. Não chame mais nenhuma ferramenta.",
});
try {
const res = await openrouterStreamCall(messages, modelUsed, { withTools: false });
if (res.body) {
const final = await readSSE(res.body, cb);
assembledText += final.roundText;
totalIn += final.usage?.prompt_tokens ?? 0;
totalOut += final.usage?.completion_tokens ?? 0;
}
} catch (e) {
cb.emit({ type: "error", message: `synthesis failed: ${e instanceof Error ? e.message : String(e)}` });
}
}
return {
content: assembledText,
model: modelUsed,
@ -255,15 +284,22 @@ export async function streamWithTools(
};
}
async function openrouterStreamCall(messages: OAMsg[], model: string): Promise<Response> {
const body = {
async function openrouterStreamCall(
messages: OAMsg[],
model: string,
opts: { withTools?: boolean } = {},
): Promise<Response> {
const withTools = opts.withTools !== false;
const body: Record<string, unknown> = {
model,
messages,
tools: TOOL_DEFINITIONS,
tool_choice: "auto",
stream: true,
max_tokens: 1024,
};
if (withTools) {
body.tools = TOOL_DEFINITIONS;
body.tool_choice = "auto";
}
const res = await fetch(ENDPOINT, {
method: "POST",
headers: headers(),