90 lines
2.8 KiB
TypeScript
90 lines
2.8 KiB
TypeScript
|
|
/**
|
||
|
|
* /api/admin/indexer — Status of the Postgres indexing layer.
|
||
|
|
*
|
||
|
|
* Compares:
|
||
|
|
* - docs on disk → raw/<doc>--subagent/_index.json exists
|
||
|
|
* - docs in DB → public.documents rows
|
||
|
|
* - chunks on disk → sum of raw/<doc>--subagent/chunks/*.md
|
||
|
|
* - chunks in DB → COUNT(*) FROM public.chunks
|
||
|
|
* - chunks embedded → COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL
|
||
|
|
* - entity_mentions → COUNT(*) FROM public.entity_mentions
|
||
|
|
*/
|
||
|
|
import fs from "node:fs/promises";
|
||
|
|
import path from "node:path";
|
||
|
|
import { UFO_ROOT } from "@/lib/wiki";
|
||
|
|
import { pgQuery } from "@/lib/retrieval/db";
|
||
|
|
|
||
|
|
export const runtime = "nodejs";
|
||
|
|
export const dynamic = "force-dynamic";
|
||
|
|
|
||
|
|
interface DbStat {
|
||
|
|
documents_count: number;
|
||
|
|
chunks_count: number;
|
||
|
|
chunks_with_embedding: number;
|
||
|
|
entities_count: number;
|
||
|
|
entity_mentions_count: number;
|
||
|
|
}
|
||
|
|
|
||
|
|
function json(data: unknown, status = 200) {
|
||
|
|
return new Response(JSON.stringify(data), {
|
||
|
|
status,
|
||
|
|
headers: { "content-type": "application/json", "cache-control": "no-cache" },
|
||
|
|
});
|
||
|
|
}
|
||
|
|
|
||
|
|
async function diskStats() {
|
||
|
|
const rawRoot = path.join(UFO_ROOT, "raw");
|
||
|
|
let docsOnDisk: string[] = [];
|
||
|
|
try {
|
||
|
|
const entries = await fs.readdir(rawRoot);
|
||
|
|
docsOnDisk = entries.filter((e) => e.endsWith("--subagent"));
|
||
|
|
} catch {
|
||
|
|
/* missing dir */
|
||
|
|
}
|
||
|
|
let chunksOnDisk = 0;
|
||
|
|
for (const d of docsOnDisk) {
|
||
|
|
try {
|
||
|
|
const dir = await fs.readdir(path.join(rawRoot, d, "chunks"));
|
||
|
|
chunksOnDisk += dir.filter((f) => f.startsWith("c") && f.endsWith(".md")).length;
|
||
|
|
} catch {
|
||
|
|
/* missing */
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return { docs_on_disk: docsOnDisk.length, chunks_on_disk: chunksOnDisk };
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function GET() {
|
||
|
|
const disk = await diskStats();
|
||
|
|
let db: DbStat | null = null;
|
||
|
|
let dbError: string | null = null;
|
||
|
|
|
||
|
|
try {
|
||
|
|
const rows = await pgQuery<DbStat>(
|
||
|
|
`SELECT
|
||
|
|
(SELECT COUNT(*) FROM public.documents)::INT AS documents_count,
|
||
|
|
(SELECT COUNT(*) FROM public.chunks)::INT AS chunks_count,
|
||
|
|
(SELECT COUNT(*) FROM public.chunks WHERE embedding IS NOT NULL)::INT AS chunks_with_embedding,
|
||
|
|
(SELECT COUNT(*) FROM public.entities)::INT AS entities_count,
|
||
|
|
(SELECT COUNT(*) FROM public.entity_mentions)::INT AS entity_mentions_count`,
|
||
|
|
[],
|
||
|
|
);
|
||
|
|
db = rows[0] ?? null;
|
||
|
|
} catch (e) {
|
||
|
|
dbError = (e as Error).message;
|
||
|
|
}
|
||
|
|
|
||
|
|
return json({
|
||
|
|
disk,
|
||
|
|
db,
|
||
|
|
db_error: dbError,
|
||
|
|
gap: db
|
||
|
|
? {
|
||
|
|
docs_to_index: Math.max(0, disk.docs_on_disk - db.documents_count),
|
||
|
|
chunks_to_index: Math.max(0, disk.chunks_on_disk - db.chunks_count),
|
||
|
|
chunks_without_embedding: Math.max(0, db.chunks_count - db.chunks_with_embedding),
|
||
|
|
ready_for_retrieval: db.chunks_with_embedding > 0,
|
||
|
|
}
|
||
|
|
: null,
|
||
|
|
});
|
||
|
|
}
|