disclosure-bureau/web/app/api/timeline/route.ts
Luiz Gustavo a7e9dce6d2 rebuild entity layer from Sonnet-vision reextract pipeline
Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity
JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page
extraction. Add synthesize scripts to regenerate wiki/entities from the 116
_reextract.json (30), aggregate missing page.md from chunks (31), and reprocess
805 pages the doc-rebuilder agent dropped on context overflow (32). Add
maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and
typed relation extraction.

Web: wire relations API + entity-relations component; entity/timeline/doc
pages consume the rebuilt layer.

Note: raw/, processing/, wiki/ remain gitignored (bulk data managed
separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on
disk only. The 27 curated anchor events under wiki/entities/events/ are
preserved.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 12:20:24 -03:00

137 lines
4.9 KiB
TypeScript

/**
* /api/timeline?from=1940&to=2026&class=event&limit=200
*
* Returns events sorted by date_start from wiki/entities/events/*.md frontmatter.
* No DB required — pure filesystem read of YAML frontmatter.
*
* Optional filters:
* - from / to: ISO dates or year strings (e.g. "1947" or "1947-06-24")
* - class: "event" | "uap_object" | "operation" (default: event)
* - limit: 1..500 (default 200)
* - q: substring match in canonical_name or narrative_summary
*/
import fs from "node:fs/promises";
import path from "node:path";
import matter from "gray-matter";
import { WIKI } from "@/lib/wiki";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
const CLASS_FOLDER: Record<string, string> = {
event: "events",
uap_object: "uap-objects",
operation: "operations",
};
interface TimelineEntry {
entity_class: string;
entity_id: string;
canonical_name: string;
date_start: string | null;
date_end: string | null;
primary_location?: string | null;
narrative_summary?: string | null;
summary_status: "none" | "synthesized" | "curated" | "red_teamed";
total_mentions?: number;
href: string;
}
function json(data: unknown, status = 200) {
return new Response(JSON.stringify(data), {
status,
headers: { "content-type": "application/json", "cache-control": "public, max-age=60" },
});
}
function pickDate(fm: Record<string, unknown>): string | null {
const cand =
fm.date_start ?? fm.date ?? fm.event_date ?? fm.observation_date ?? fm.start_date ?? null;
if (!cand) return null;
return String(cand).trim();
}
function dateSortable(s: string | null): string {
if (!s) return "9999";
// Pad year-only ("1947" → "1947-00-00") and accept "1947-06-24"
const m = s.match(/^(\d{4})(?:-(\d{2}))?(?:-(\d{2}))?/);
if (!m) return s;
return `${m[1]}-${m[2] ?? "00"}-${m[3] ?? "00"}`;
}
export async function GET(req: Request) {
const u = new URL(req.url);
const cls = (u.searchParams.get("class") ?? "event") as keyof typeof CLASS_FOLDER;
const folder = CLASS_FOLDER[cls];
if (!folder) return json({ error: "bad_class", class: cls }, 400);
const from = u.searchParams.get("from") ?? "";
const to = u.searchParams.get("to") ?? "";
const q = (u.searchParams.get("q") ?? "").toLowerCase().trim();
const limit = Math.min(Math.max(Number(u.searchParams.get("limit") ?? 200), 1), 500);
// Default: only show events with curated/synthesised narrative — never stubs.
// Opt-in `?include_unsynthesized=1` returns everything (admin / debug).
const includeUnsynthesized = u.searchParams.get("include_unsynthesized") === "1";
const dir = path.join(WIKI, "entities", folder);
let files: string[] = [];
try {
files = (await fs.readdir(dir)).filter((f) => f.endsWith(".md"));
} catch {
return json({ entries: [], count: 0 });
}
const entries: TimelineEntry[] = [];
for (const f of files) {
try {
const raw = await fs.readFile(path.join(dir, f), "utf-8");
const parsed = matter(raw);
const fm = parsed.data as Record<string, unknown>;
const date_start = pickDate(fm);
if (!date_start) continue;
const sortable = dateSortable(date_start);
if (from && sortable < dateSortable(from)) continue;
if (to && sortable > dateSortable(to)) continue;
const canonical = String(fm.canonical_name ?? f.replace(/\.md$/, ""));
const narrativeRaw = fm.narrative_summary;
const narrative = typeof narrativeRaw === "string" ? narrativeRaw : "";
const statusRaw = String(fm.summary_status ?? (narrative ? "synthesized" : "none"));
const summary_status = (
["none", "synthesized", "curated", "red_teamed"].includes(statusRaw)
? statusRaw
: "none"
) as TimelineEntry["summary_status"];
// Default: hide events without a real narrative.
if (!includeUnsynthesized && summary_status === "none") continue;
// Always hide generic concept-entities (categories, not real events).
if (fm.is_generic === true) continue;
if (q && !canonical.toLowerCase().includes(q) && !narrative.toLowerCase().includes(q)) {
continue;
}
entries.push({
entity_class: cls,
entity_id: f.replace(/\.md$/, ""),
canonical_name: canonical,
date_start,
date_end: (fm.date_end as string) ?? null,
primary_location: (fm.primary_location as string) ?? null,
narrative_summary: narrative ? narrative.slice(0, 280) : null,
summary_status,
total_mentions: typeof fm.total_mentions === "number" ? fm.total_mentions : undefined,
href: `/e/${folder}/${f.replace(/\.md$/, "")}`,
});
} catch {
/* skip malformed */
}
}
entries.sort((a, b) => dateSortable(a.date_start).localeCompare(dateSortable(b.date_start)));
return json({
count: entries.length,
limit,
from: from || null,
to: to || null,
class: cls,
entries: entries.slice(0, limit),
});
}