/** * /api/admin/indexer — Status of the Postgres indexing layer. * * Compares: * - docs on disk → raw/--subagent/_index.json exists * - docs in DB → public.documents rows * - chunks on disk → sum of raw/--subagent/chunks/*.md * - chunks in DB → COUNT(*) FROM public.chunks * - chunks embedded → COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL * - entity_mentions → COUNT(*) FROM public.entity_mentions */ import fs from "node:fs/promises"; import path from "node:path"; import { UFO_ROOT } from "@/lib/wiki"; import { pgQuery } from "@/lib/retrieval/db"; export const runtime = "nodejs"; export const dynamic = "force-dynamic"; interface DbStat { documents_count: number; chunks_count: number; chunks_with_embedding: number; entities_count: number; entity_mentions_count: number; } function json(data: unknown, status = 200) { return new Response(JSON.stringify(data), { status, headers: { "content-type": "application/json", "cache-control": "no-cache" }, }); } async function diskStats() { const rawRoot = path.join(UFO_ROOT, "raw"); let docsOnDisk: string[] = []; try { const entries = await fs.readdir(rawRoot); docsOnDisk = entries.filter((e) => e.endsWith("--subagent")); } catch { /* missing dir */ } let chunksOnDisk = 0; for (const d of docsOnDisk) { try { const dir = await fs.readdir(path.join(rawRoot, d, "chunks")); chunksOnDisk += dir.filter((f) => f.startsWith("c") && f.endsWith(".md")).length; } catch { /* missing */ } } return { docs_on_disk: docsOnDisk.length, chunks_on_disk: chunksOnDisk }; } export async function GET() { const disk = await diskStats(); let db: DbStat | null = null; let dbError: string | null = null; try { const rows = await pgQuery( `SELECT (SELECT COUNT(*) FROM public.documents)::INT AS documents_count, (SELECT COUNT(*) FROM public.chunks)::INT AS chunks_count, (SELECT COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL)::INT AS chunks_with_embedding, (SELECT COUNT(*) FROM public.entities)::INT AS entities_count, (SELECT COUNT(*) FROM public.entity_mentions)::INT AS entity_mentions_count`, [], ); db = rows[0] ?? null; } catch (e) { dbError = (e as Error).message; } return json({ disk, db, db_error: dbError, gap: db ? { docs_to_index: Math.max(0, disk.docs_on_disk - db.documents_count), chunks_to_index: Math.max(0, disk.chunks_on_disk - db.chunks_count), chunks_without_embedding: Math.max(0, db.chunks_count - db.chunks_with_embedding), ready_for_retrieval: db.chunks_with_embedding > 0, } : null, }); }