Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page extraction. Add synthesize scripts to regenerate wiki/entities from the 116 _reextract.json (30), aggregate missing page.md from chunks (31), and reprocess 805 pages the doc-rebuilder agent dropped on context overflow (32). Add maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and typed relation extraction. Web: wire relations API + entity-relations component; entity/timeline/doc pages consume the rebuilt layer. Note: raw/, processing/, wiki/ remain gitignored (bulk data managed separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on disk only. The 27 curated anchor events under wiki/entities/events/ are preserved. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
122 lines
4.1 KiB
TypeScript
122 lines
4.1 KiB
TypeScript
/**
|
|
* /e/[cls] — list page for an entity class (e.g. /e/people, /e/locations).
|
|
*
|
|
* Reads wiki/entities/<class>/*.md frontmatter on the server; renders a
|
|
* paginated, searchable grid with mention counts. Click → /e/<class>/<id>.
|
|
*/
|
|
import Link from "next/link";
|
|
import { notFound } from "next/navigation";
|
|
import fs from "node:fs/promises";
|
|
import path from "node:path";
|
|
import matter from "gray-matter";
|
|
import { AuthBar } from "@/components/auth-bar";
|
|
import { WIKI, classKeyToFolder, type EntityClass } from "@/lib/wiki";
|
|
import { EntityListFilter } from "@/components/entity-list-filter";
|
|
|
|
export const dynamic = "force-dynamic";
|
|
|
|
const CLASS_TITLE: Record<EntityClass, string> = {
|
|
people: "Pessoas",
|
|
organizations: "Organizações",
|
|
locations: "Locais",
|
|
events: "Eventos",
|
|
"uap-objects": "Objetos UAP",
|
|
vehicles: "Veículos",
|
|
operations: "Operações",
|
|
concepts: "Conceitos",
|
|
};
|
|
|
|
const CLASS_COLOR: Record<EntityClass, string> = {
|
|
people: "text-[#ff6ec7] border-[#ff6ec7]",
|
|
organizations: "text-[#ff8a4d] border-[#ff8a4d]",
|
|
locations: "text-[#3fde6a] border-[#3fde6a]",
|
|
events: "text-[#ffa500] border-[#ffa500]",
|
|
"uap-objects": "text-[#ff3344] border-[#ff3344]",
|
|
vehicles: "text-[#5b9bd5] border-[#5b9bd5]",
|
|
operations: "text-[#9b5de5] border-[#9b5de5]",
|
|
concepts: "text-[#06d6a0] border-[#06d6a0]",
|
|
};
|
|
|
|
interface EntityRow {
|
|
id: string;
|
|
canonical_name: string;
|
|
aliases: string[];
|
|
total_mentions: number;
|
|
documents_count: number;
|
|
enrichment_status: string | null;
|
|
}
|
|
|
|
async function listEntities(cls: EntityClass, includeGeneric = false): Promise<EntityRow[]> {
|
|
const dir = path.join(WIKI, "entities", cls);
|
|
let files: string[] = [];
|
|
try {
|
|
files = (await fs.readdir(dir)).filter((f) => f.endsWith(".md"));
|
|
} catch {
|
|
return [];
|
|
}
|
|
const rows: EntityRow[] = [];
|
|
for (const f of files) {
|
|
try {
|
|
const raw = await fs.readFile(path.join(dir, f), "utf-8");
|
|
const fm = matter(raw).data as Record<string, unknown>;
|
|
// Hide generic concept-entities (e.g. "Flying disc sighting reports") —
|
|
// they're categories, not real instances. Opt-in via ?include_generic=1.
|
|
if (!includeGeneric && fm.is_generic === true) continue;
|
|
rows.push({
|
|
id: f.replace(/\.md$/, ""),
|
|
canonical_name: String(fm.canonical_name ?? f.replace(/\.md$/, "")),
|
|
aliases: Array.isArray(fm.aliases) ? (fm.aliases as string[]) : [],
|
|
total_mentions: Number(fm.total_mentions ?? 0),
|
|
documents_count: Number(fm.documents_count ?? 0),
|
|
enrichment_status: (fm.enrichment_status as string | null) ?? null,
|
|
});
|
|
} catch {
|
|
/* skip malformed */
|
|
}
|
|
}
|
|
rows.sort((a, b) => b.total_mentions - a.total_mentions);
|
|
return rows;
|
|
}
|
|
|
|
export default async function EntityListPage({
|
|
params,
|
|
searchParams,
|
|
}: {
|
|
params: Promise<{ cls: string }>;
|
|
searchParams?: Promise<{ include_generic?: string }>;
|
|
}) {
|
|
const { cls } = await params;
|
|
const sp = (await searchParams) ?? {};
|
|
const includeGeneric = sp.include_generic === "1";
|
|
const folder = classKeyToFolder(cls);
|
|
if (!folder) notFound();
|
|
const entities = await listEntities(folder as EntityClass, includeGeneric);
|
|
|
|
return (
|
|
<main className="min-h-screen p-6 md:p-10 max-w-5xl mx-auto">
|
|
<div className="flex items-start justify-between gap-4 mb-6">
|
|
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
|
|
← home
|
|
</Link>
|
|
<AuthBar />
|
|
</div>
|
|
|
|
<header className="mb-6">
|
|
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-2">
|
|
<span className={`px-2 py-0.5 border rounded ${CLASS_COLOR[folder as EntityClass]}`}>
|
|
{folder}
|
|
</span>
|
|
<span>· {entities.length} entidades</span>
|
|
</div>
|
|
<h1 className="font-mono text-2xl text-[#00ff9c] mb-1">
|
|
▍ {CLASS_TITLE[folder as EntityClass]}
|
|
</h1>
|
|
<p className="text-[#8896aa] text-sm">
|
|
Ordenadas por número de menções no corpus. Filtre por nome/alias abaixo.
|
|
</p>
|
|
</header>
|
|
|
|
<EntityListFilter entities={entities} folder={folder} />
|
|
</main>
|
|
);
|
|
}
|