141 lines
4.7 KiB
TypeScript
141 lines
4.7 KiB
TypeScript
/**
|
|
* /api/admin/batch — Live batch rebuild progress from raw/_batch-rebuild/progress.jsonl.
|
|
*
|
|
* No auth (read-only public status, no secrets). Returns aggregated stats +
|
|
* the last N events.
|
|
*/
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
import { UFO_ROOT } from "@/lib/wiki";
|
|
|
|
export const runtime = "nodejs";
|
|
export const dynamic = "force-dynamic";
|
|
|
|
interface ProgressRow {
|
|
doc_id: string;
|
|
page_count?: number;
|
|
started_at?: string;
|
|
finished_at?: string;
|
|
wall_seconds?: number;
|
|
returncode?: number;
|
|
timed_out?: boolean;
|
|
success?: boolean;
|
|
chunks_count?: number;
|
|
images_count?: number;
|
|
total_cost_usd?: number | null;
|
|
quota_error?: boolean;
|
|
result_excerpt?: string;
|
|
}
|
|
|
|
function json(data: unknown, status = 200) {
|
|
return new Response(JSON.stringify(data), {
|
|
status,
|
|
headers: { "content-type": "application/json", "cache-control": "no-cache" },
|
|
});
|
|
}
|
|
|
|
export async function GET() {
|
|
const logPath = path.join(UFO_ROOT, "raw", "_batch-rebuild", "progress.jsonl");
|
|
let raw: string;
|
|
try {
|
|
raw = await fs.readFile(logPath, "utf-8");
|
|
} catch {
|
|
return json({ status: "no_log", docs: [], stats: null });
|
|
}
|
|
const allRows: ProgressRow[] = raw
|
|
.split("\n")
|
|
.filter(Boolean)
|
|
.map((l) => {
|
|
try {
|
|
return JSON.parse(l) as ProgressRow;
|
|
} catch {
|
|
return null;
|
|
}
|
|
})
|
|
.filter((r): r is ProgressRow => Boolean(r));
|
|
|
|
// Dedupe by doc_id, keeping the LAST entry per doc (retries supersede earlier attempts).
|
|
const byDoc = new Map<string, ProgressRow>();
|
|
for (const r of allRows) byDoc.set(r.doc_id, r);
|
|
const rows = Array.from(byDoc.values());
|
|
|
|
// Detect quota-block state from ALL rows (not dedup-filtered, to catch latest)
|
|
const quotaRows = allRows.filter((r) => r.quota_error);
|
|
const latestQuota = quotaRows.length
|
|
? quotaRows[quotaRows.length - 1].finished_at ?? null
|
|
: null;
|
|
let quota_state: "ok" | "throttled" = "ok";
|
|
let quota_resume_eta_minutes: number | null = null;
|
|
if (latestQuota) {
|
|
// Anthropic Max 20x: 5h rolling window
|
|
const lastMs = Date.parse(latestQuota);
|
|
const resetAtMs = lastMs + 5 * 60 * 60 * 1000;
|
|
const remainingMs = resetAtMs - Date.now();
|
|
if (remainingMs > 0) {
|
|
quota_state = "throttled";
|
|
quota_resume_eta_minutes = Math.ceil(remainingMs / 60_000);
|
|
}
|
|
}
|
|
|
|
const successes = rows.filter((r) => r.success);
|
|
const failures = rows.filter((r) => !r.success);
|
|
const totalCost = successes.reduce((s, r) => s + (r.total_cost_usd ?? 0), 0);
|
|
const totalChunks = successes.reduce((s, r) => s + (r.chunks_count ?? 0), 0);
|
|
const totalImages = successes.reduce((s, r) => s + (r.images_count ?? 0), 0);
|
|
const totalPages = successes.reduce((s, r) => s + (r.page_count ?? 0), 0);
|
|
const wallSum = successes.reduce((s, r) => s + (r.wall_seconds ?? 0), 0);
|
|
|
|
// Compute throughput (docs/hour) over last 10 successes
|
|
const recent = successes.slice(-10);
|
|
let throughput_docs_per_hour: number | null = null;
|
|
if (recent.length >= 2 && recent[0].started_at && recent[recent.length - 1].finished_at) {
|
|
const startMs = Date.parse(recent[0].started_at!);
|
|
const endMs = Date.parse(recent[recent.length - 1].finished_at!);
|
|
const elapsedH = Math.max(0.001, (endMs - startMs) / 3_600_000);
|
|
throughput_docs_per_hour = recent.length / elapsedH;
|
|
}
|
|
|
|
return json({
|
|
status: "ok",
|
|
queue_total: 115,
|
|
completed: rows.length,
|
|
successes: successes.length,
|
|
failures: failures.length,
|
|
progress_pct: Math.round((rows.length / 115) * 100),
|
|
quota_state,
|
|
quota_resume_eta_minutes,
|
|
latest_quota_at: latestQuota,
|
|
stats: {
|
|
total_cost_usd: Number(totalCost.toFixed(2)),
|
|
total_chunks: totalChunks,
|
|
total_images: totalImages,
|
|
total_pages_processed: totalPages,
|
|
avg_seconds_per_doc: successes.length ? Math.round(wallSum / successes.length) : null,
|
|
avg_chunks_per_doc: successes.length ? Math.round(totalChunks / successes.length) : null,
|
|
throughput_docs_per_hour: throughput_docs_per_hour
|
|
? Number(throughput_docs_per_hour.toFixed(2))
|
|
: null,
|
|
eta_minutes:
|
|
throughput_docs_per_hour && throughput_docs_per_hour > 0
|
|
? Math.round(((115 - rows.length) / throughput_docs_per_hour) * 60)
|
|
: null,
|
|
},
|
|
recent_docs: rows
|
|
.slice(-20)
|
|
.reverse()
|
|
.map((r) => ({
|
|
doc_id: r.doc_id,
|
|
pages: r.page_count,
|
|
chunks: r.chunks_count,
|
|
cost_usd: r.total_cost_usd,
|
|
wall_s: r.wall_seconds,
|
|
success: r.success,
|
|
finished_at: r.finished_at,
|
|
})),
|
|
failed_docs: failures.map((r) => ({
|
|
doc_id: r.doc_id,
|
|
timed_out: r.timed_out,
|
|
returncode: r.returncode,
|
|
})),
|
|
});
|
|
}
|