Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page extraction. Add synthesize scripts to regenerate wiki/entities from the 116 _reextract.json (30), aggregate missing page.md from chunks (31), and reprocess 805 pages the doc-rebuilder agent dropped on context overflow (32). Add maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and typed relation extraction. Web: wire relations API + entity-relations component; entity/timeline/doc pages consume the rebuilt layer. Note: raw/, processing/, wiki/ remain gitignored (bulk data managed separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on disk only. The 27 curated anchor events under wiki/entities/events/ are preserved. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
137 lines
4.9 KiB
TypeScript
137 lines
4.9 KiB
TypeScript
/**
|
|
* /api/timeline?from=1940&to=2026&class=event&limit=200
|
|
*
|
|
* Returns events sorted by date_start from wiki/entities/events/*.md frontmatter.
|
|
* No DB required — pure filesystem read of YAML frontmatter.
|
|
*
|
|
* Optional filters:
|
|
* - from / to: ISO dates or year strings (e.g. "1947" or "1947-06-24")
|
|
* - class: "event" | "uap_object" | "operation" (default: event)
|
|
* - limit: 1..500 (default 200)
|
|
* - q: substring match in canonical_name or narrative_summary
|
|
*/
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
import matter from "gray-matter";
|
|
import { WIKI } from "@/lib/wiki";
|
|
|
|
export const runtime = "nodejs";
|
|
export const dynamic = "force-dynamic";
|
|
|
|
const CLASS_FOLDER: Record<string, string> = {
|
|
event: "events",
|
|
uap_object: "uap-objects",
|
|
operation: "operations",
|
|
};
|
|
|
|
interface TimelineEntry {
|
|
entity_class: string;
|
|
entity_id: string;
|
|
canonical_name: string;
|
|
date_start: string | null;
|
|
date_end: string | null;
|
|
primary_location?: string | null;
|
|
narrative_summary?: string | null;
|
|
summary_status: "none" | "synthesized" | "curated" | "red_teamed";
|
|
total_mentions?: number;
|
|
href: string;
|
|
}
|
|
|
|
function json(data: unknown, status = 200) {
|
|
return new Response(JSON.stringify(data), {
|
|
status,
|
|
headers: { "content-type": "application/json", "cache-control": "public, max-age=60" },
|
|
});
|
|
}
|
|
|
|
function pickDate(fm: Record<string, unknown>): string | null {
|
|
const cand =
|
|
fm.date_start ?? fm.date ?? fm.event_date ?? fm.observation_date ?? fm.start_date ?? null;
|
|
if (!cand) return null;
|
|
return String(cand).trim();
|
|
}
|
|
|
|
function dateSortable(s: string | null): string {
|
|
if (!s) return "9999";
|
|
// Pad year-only ("1947" → "1947-00-00") and accept "1947-06-24"
|
|
const m = s.match(/^(\d{4})(?:-(\d{2}))?(?:-(\d{2}))?/);
|
|
if (!m) return s;
|
|
return `${m[1]}-${m[2] ?? "00"}-${m[3] ?? "00"}`;
|
|
}
|
|
|
|
export async function GET(req: Request) {
|
|
const u = new URL(req.url);
|
|
const cls = (u.searchParams.get("class") ?? "event") as keyof typeof CLASS_FOLDER;
|
|
const folder = CLASS_FOLDER[cls];
|
|
if (!folder) return json({ error: "bad_class", class: cls }, 400);
|
|
|
|
const from = u.searchParams.get("from") ?? "";
|
|
const to = u.searchParams.get("to") ?? "";
|
|
const q = (u.searchParams.get("q") ?? "").toLowerCase().trim();
|
|
const limit = Math.min(Math.max(Number(u.searchParams.get("limit") ?? 200), 1), 500);
|
|
// Default: only show events with curated/synthesised narrative — never stubs.
|
|
// Opt-in `?include_unsynthesized=1` returns everything (admin / debug).
|
|
const includeUnsynthesized = u.searchParams.get("include_unsynthesized") === "1";
|
|
|
|
const dir = path.join(WIKI, "entities", folder);
|
|
let files: string[] = [];
|
|
try {
|
|
files = (await fs.readdir(dir)).filter((f) => f.endsWith(".md"));
|
|
} catch {
|
|
return json({ entries: [], count: 0 });
|
|
}
|
|
|
|
const entries: TimelineEntry[] = [];
|
|
for (const f of files) {
|
|
try {
|
|
const raw = await fs.readFile(path.join(dir, f), "utf-8");
|
|
const parsed = matter(raw);
|
|
const fm = parsed.data as Record<string, unknown>;
|
|
const date_start = pickDate(fm);
|
|
if (!date_start) continue;
|
|
const sortable = dateSortable(date_start);
|
|
if (from && sortable < dateSortable(from)) continue;
|
|
if (to && sortable > dateSortable(to)) continue;
|
|
const canonical = String(fm.canonical_name ?? f.replace(/\.md$/, ""));
|
|
const narrativeRaw = fm.narrative_summary;
|
|
const narrative = typeof narrativeRaw === "string" ? narrativeRaw : "";
|
|
const statusRaw = String(fm.summary_status ?? (narrative ? "synthesized" : "none"));
|
|
const summary_status = (
|
|
["none", "synthesized", "curated", "red_teamed"].includes(statusRaw)
|
|
? statusRaw
|
|
: "none"
|
|
) as TimelineEntry["summary_status"];
|
|
// Default: hide events without a real narrative.
|
|
if (!includeUnsynthesized && summary_status === "none") continue;
|
|
// Always hide generic concept-entities (categories, not real events).
|
|
if (fm.is_generic === true) continue;
|
|
if (q && !canonical.toLowerCase().includes(q) && !narrative.toLowerCase().includes(q)) {
|
|
continue;
|
|
}
|
|
entries.push({
|
|
entity_class: cls,
|
|
entity_id: f.replace(/\.md$/, ""),
|
|
canonical_name: canonical,
|
|
date_start,
|
|
date_end: (fm.date_end as string) ?? null,
|
|
primary_location: (fm.primary_location as string) ?? null,
|
|
narrative_summary: narrative ? narrative.slice(0, 280) : null,
|
|
summary_status,
|
|
total_mentions: typeof fm.total_mentions === "number" ? fm.total_mentions : undefined,
|
|
href: `/e/${folder}/${f.replace(/\.md$/, "")}`,
|
|
});
|
|
} catch {
|
|
/* skip malformed */
|
|
}
|
|
}
|
|
|
|
entries.sort((a, b) => dateSortable(a.date_start).localeCompare(dateSortable(b.date_start)));
|
|
return json({
|
|
count: entries.length,
|
|
limit,
|
|
from: from || null,
|
|
to: to || null,
|
|
class: cls,
|
|
entries: entries.slice(0, limit),
|
|
});
|
|
}
|