disclosure-bureau/web/app/api/admin/batch/route.ts

142 lines
4.7 KiB
TypeScript
Raw Normal View History

/**
* /api/admin/batch Live batch rebuild progress from raw/_batch-rebuild/progress.jsonl.
*
* No auth (read-only public status, no secrets). Returns aggregated stats +
* the last N events.
*/
import fs from "node:fs/promises";
import path from "node:path";
import { UFO_ROOT } from "@/lib/wiki";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
interface ProgressRow {
doc_id: string;
page_count?: number;
started_at?: string;
finished_at?: string;
wall_seconds?: number;
returncode?: number;
timed_out?: boolean;
success?: boolean;
chunks_count?: number;
images_count?: number;
total_cost_usd?: number | null;
quota_error?: boolean;
result_excerpt?: string;
}
function json(data: unknown, status = 200) {
return new Response(JSON.stringify(data), {
status,
headers: { "content-type": "application/json", "cache-control": "no-cache" },
});
}
export async function GET() {
const logPath = path.join(UFO_ROOT, "raw", "_batch-rebuild", "progress.jsonl");
let raw: string;
try {
raw = await fs.readFile(logPath, "utf-8");
} catch {
return json({ status: "no_log", docs: [], stats: null });
}
const allRows: ProgressRow[] = raw
.split("\n")
.filter(Boolean)
.map((l) => {
try {
return JSON.parse(l) as ProgressRow;
} catch {
return null;
}
})
.filter((r): r is ProgressRow => Boolean(r));
// Dedupe by doc_id, keeping the LAST entry per doc (retries supersede earlier attempts).
const byDoc = new Map<string, ProgressRow>();
for (const r of allRows) byDoc.set(r.doc_id, r);
const rows = Array.from(byDoc.values());
// Detect quota-block state from ALL rows (not dedup-filtered, to catch latest)
const quotaRows = allRows.filter((r) => r.quota_error);
const latestQuota = quotaRows.length
? quotaRows[quotaRows.length - 1].finished_at ?? null
: null;
let quota_state: "ok" | "throttled" = "ok";
let quota_resume_eta_minutes: number | null = null;
if (latestQuota) {
// Anthropic Max 20x: 5h rolling window
const lastMs = Date.parse(latestQuota);
const resetAtMs = lastMs + 5 * 60 * 60 * 1000;
const remainingMs = resetAtMs - Date.now();
if (remainingMs > 0) {
quota_state = "throttled";
quota_resume_eta_minutes = Math.ceil(remainingMs / 60_000);
}
}
const successes = rows.filter((r) => r.success);
const failures = rows.filter((r) => !r.success);
const totalCost = successes.reduce((s, r) => s + (r.total_cost_usd ?? 0), 0);
const totalChunks = successes.reduce((s, r) => s + (r.chunks_count ?? 0), 0);
const totalImages = successes.reduce((s, r) => s + (r.images_count ?? 0), 0);
const totalPages = successes.reduce((s, r) => s + (r.page_count ?? 0), 0);
const wallSum = successes.reduce((s, r) => s + (r.wall_seconds ?? 0), 0);
// Compute throughput (docs/hour) over last 10 successes
const recent = successes.slice(-10);
let throughput_docs_per_hour: number | null = null;
if (recent.length >= 2 && recent[0].started_at && recent[recent.length - 1].finished_at) {
const startMs = Date.parse(recent[0].started_at!);
const endMs = Date.parse(recent[recent.length - 1].finished_at!);
const elapsedH = Math.max(0.001, (endMs - startMs) / 3_600_000);
throughput_docs_per_hour = recent.length / elapsedH;
}
return json({
status: "ok",
queue_total: 115,
completed: rows.length,
successes: successes.length,
failures: failures.length,
progress_pct: Math.round((rows.length / 115) * 100),
quota_state,
quota_resume_eta_minutes,
latest_quota_at: latestQuota,
stats: {
total_cost_usd: Number(totalCost.toFixed(2)),
total_chunks: totalChunks,
total_images: totalImages,
total_pages_processed: totalPages,
avg_seconds_per_doc: successes.length ? Math.round(wallSum / successes.length) : null,
avg_chunks_per_doc: successes.length ? Math.round(totalChunks / successes.length) : null,
throughput_docs_per_hour: throughput_docs_per_hour
? Number(throughput_docs_per_hour.toFixed(2))
: null,
eta_minutes:
throughput_docs_per_hour && throughput_docs_per_hour > 0
? Math.round(((115 - rows.length) / throughput_docs_per_hour) * 60)
: null,
},
recent_docs: rows
.slice(-20)
.reverse()
.map((r) => ({
doc_id: r.doc_id,
pages: r.page_count,
chunks: r.chunks_count,
cost_usd: r.total_cost_usd,
wall_s: r.wall_seconds,
success: r.success,
finished_at: r.finished_at,
})),
failed_docs: failures.map((r) => ({
doc_id: r.doc_id,
timed_out: r.timed_out,
returncode: r.returncode,
})),
});
}