disclosure-bureau/web/app/api/chunk/route.ts

70 lines
2 KiB
TypeScript
Raw Permalink Normal View History

/**
* /api/chunk?doc=<doc-id>&chunk=<c0042>
*
* Returns one chunk's payload (page, type, bbox, content_en, content_pt,
* classification, anomaly flags). Used by client-side InlineCitation
* component to expand `[[doc/p007#c0042]]` links into rich cards.
*
* Tries DB first (with embeddings already indexed), falls back to filesystem
* (raw/<doc>--subagent/chunks/cXXXX.md) so the UX works even before the
* indexer has run on a particular doc.
*/
import { NextRequest } from "next/server";
import { getChunk } from "@/lib/retrieval/hybrid";
import { readChunk as readChunkFs } from "@/lib/chunks";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
function json(data: unknown, status = 200) {
return new Response(JSON.stringify(data), {
status,
headers: {
"content-type": "application/json",
"cache-control": "public, max-age=300",
},
});
}
export async function GET(req: NextRequest) {
const u = new URL(req.url);
const doc = u.searchParams.get("doc")?.trim();
const chunk = u.searchParams.get("chunk")?.trim();
if (!doc || !chunk) return json({ error: "doc and chunk required" }, 400);
// 1. Try DB
try {
const c = await getChunk(doc, chunk);
if (c) {
return json({
source: "db",
chunk_id: c.chunk_id,
doc_id: c.doc_id,
page: c.page,
type: c.type,
bbox: c.bbox,
classification: c.classification,
content_en: c.content_en,
content_pt: c.content_pt,
});
}
} catch {
// db unavailable → fall through to fs
}
// 2. Filesystem fallback
const fs = await readChunkFs(doc, chunk);
if (!fs) return json({ error: "not_found", doc_id: doc, chunk_id: chunk }, 404);
return json({
source: "fs",
chunk_id: fs.fm.chunk_id,
doc_id: doc,
page: fs.fm.page,
type: fs.fm.type,
bbox: fs.fm.bbox,
classification: fs.fm.classification,
content_en: fs.content_en,
content_pt: fs.content_pt,
});
}