/** * /api/admin/batch — Live batch rebuild progress from raw/_batch-rebuild/progress.jsonl. * * No auth (read-only public status, no secrets). Returns aggregated stats + * the last N events. */ import fs from "node:fs/promises"; import path from "node:path"; import { UFO_ROOT } from "@/lib/wiki"; export const runtime = "nodejs"; export const dynamic = "force-dynamic"; interface ProgressRow { doc_id: string; page_count?: number; started_at?: string; finished_at?: string; wall_seconds?: number; returncode?: number; timed_out?: boolean; success?: boolean; chunks_count?: number; images_count?: number; total_cost_usd?: number | null; quota_error?: boolean; result_excerpt?: string; } function json(data: unknown, status = 200) { return new Response(JSON.stringify(data), { status, headers: { "content-type": "application/json", "cache-control": "no-cache" }, }); } export async function GET() { const logPath = path.join(UFO_ROOT, "raw", "_batch-rebuild", "progress.jsonl"); let raw: string; try { raw = await fs.readFile(logPath, "utf-8"); } catch { return json({ status: "no_log", docs: [], stats: null }); } const allRows: ProgressRow[] = raw .split("\n") .filter(Boolean) .map((l) => { try { return JSON.parse(l) as ProgressRow; } catch { return null; } }) .filter((r): r is ProgressRow => Boolean(r)); // Dedupe by doc_id, keeping the LAST entry per doc (retries supersede earlier attempts). const byDoc = new Map(); for (const r of allRows) byDoc.set(r.doc_id, r); const rows = Array.from(byDoc.values()); // Detect quota-block state from ALL rows (not dedup-filtered, to catch latest) const quotaRows = allRows.filter((r) => r.quota_error); const latestQuota = quotaRows.length ? quotaRows[quotaRows.length - 1].finished_at ?? null : null; let quota_state: "ok" | "throttled" = "ok"; let quota_resume_eta_minutes: number | null = null; if (latestQuota) { // Anthropic Max 20x: 5h rolling window const lastMs = Date.parse(latestQuota); const resetAtMs = lastMs + 5 * 60 * 60 * 1000; const remainingMs = resetAtMs - Date.now(); if (remainingMs > 0) { quota_state = "throttled"; quota_resume_eta_minutes = Math.ceil(remainingMs / 60_000); } } const successes = rows.filter((r) => r.success); const failures = rows.filter((r) => !r.success); const totalCost = successes.reduce((s, r) => s + (r.total_cost_usd ?? 0), 0); const totalChunks = successes.reduce((s, r) => s + (r.chunks_count ?? 0), 0); const totalImages = successes.reduce((s, r) => s + (r.images_count ?? 0), 0); const totalPages = successes.reduce((s, r) => s + (r.page_count ?? 0), 0); const wallSum = successes.reduce((s, r) => s + (r.wall_seconds ?? 0), 0); // Compute throughput (docs/hour) over last 10 successes const recent = successes.slice(-10); let throughput_docs_per_hour: number | null = null; if (recent.length >= 2 && recent[0].started_at && recent[recent.length - 1].finished_at) { const startMs = Date.parse(recent[0].started_at!); const endMs = Date.parse(recent[recent.length - 1].finished_at!); const elapsedH = Math.max(0.001, (endMs - startMs) / 3_600_000); throughput_docs_per_hour = recent.length / elapsedH; } return json({ status: "ok", queue_total: 115, completed: rows.length, successes: successes.length, failures: failures.length, progress_pct: Math.round((rows.length / 115) * 100), quota_state, quota_resume_eta_minutes, latest_quota_at: latestQuota, stats: { total_cost_usd: Number(totalCost.toFixed(2)), total_chunks: totalChunks, total_images: totalImages, total_pages_processed: totalPages, avg_seconds_per_doc: successes.length ? Math.round(wallSum / successes.length) : null, avg_chunks_per_doc: successes.length ? Math.round(totalChunks / successes.length) : null, throughput_docs_per_hour: throughput_docs_per_hour ? Number(throughput_docs_per_hour.toFixed(2)) : null, eta_minutes: throughput_docs_per_hour && throughput_docs_per_hour > 0 ? Math.round(((115 - rows.length) / throughput_docs_per_hour) * 60) : null, }, recent_docs: rows .slice(-20) .reverse() .map((r) => ({ doc_id: r.doc_id, pages: r.page_count, chunks: r.chunks_count, cost_usd: r.total_cost_usd, wall_s: r.wall_seconds, success: r.success, finished_at: r.finished_at, })), failed_docs: failures.map((r) => ({ doc_id: r.doc_id, timed_out: r.timed_out, returncode: r.returncode, })), }); }