disclosure-bureau/web/app/api/admin/indexer/route.ts

89 lines
2.8 KiB
TypeScript

/**
* /api/admin/indexer — Status of the Postgres indexing layer.
*
* Compares:
* - docs on disk → raw/<doc>--subagent/_index.json exists
* - docs in DB → public.documents rows
* - chunks on disk → sum of raw/<doc>--subagent/chunks/*.md
* - chunks in DB → COUNT(*) FROM public.chunks
* - chunks embedded → COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL
* - entity_mentions → COUNT(*) FROM public.entity_mentions
*/
import fs from "node:fs/promises";
import path from "node:path";
import { UFO_ROOT } from "@/lib/wiki";
import { pgQuery } from "@/lib/retrieval/db";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
interface DbStat {
documents_count: number;
chunks_count: number;
chunks_with_embedding: number;
entities_count: number;
entity_mentions_count: number;
}
function json(data: unknown, status = 200) {
return new Response(JSON.stringify(data), {
status,
headers: { "content-type": "application/json", "cache-control": "no-cache" },
});
}
async function diskStats() {
const rawRoot = path.join(UFO_ROOT, "raw");
let docsOnDisk: string[] = [];
try {
const entries = await fs.readdir(rawRoot);
docsOnDisk = entries.filter((e) => e.endsWith("--subagent"));
} catch {
/* missing dir */
}
let chunksOnDisk = 0;
for (const d of docsOnDisk) {
try {
const dir = await fs.readdir(path.join(rawRoot, d, "chunks"));
chunksOnDisk += dir.filter((f) => f.startsWith("c") && f.endsWith(".md")).length;
} catch {
/* missing */
}
}
return { docs_on_disk: docsOnDisk.length, chunks_on_disk: chunksOnDisk };
}
export async function GET() {
const disk = await diskStats();
let db: DbStat | null = null;
let dbError: string | null = null;
try {
const rows = await pgQuery<DbStat>(
`SELECT
(SELECT COUNT(*) FROM public.documents)::INT AS documents_count,
(SELECT COUNT(*) FROM public.chunks)::INT AS chunks_count,
(SELECT COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL)::INT AS chunks_with_embedding,
(SELECT COUNT(*) FROM public.entities)::INT AS entities_count,
(SELECT COUNT(*) FROM public.entity_mentions)::INT AS entity_mentions_count`,
[],
);
db = rows[0] ?? null;
} catch (e) {
dbError = (e as Error).message;
}
return json({
disk,
db,
db_error: dbError,
gap: db
? {
docs_to_index: Math.max(0, disk.docs_on_disk - db.documents_count),
chunks_to_index: Math.max(0, disk.chunks_on_disk - db.chunks_count),
chunks_without_embedding: Math.max(0, db.chunks_count - db.chunks_with_embedding),
ready_for_retrieval: db.chunks_with_embedding > 0,
}
: null,
});
}