disclosure-bureau/web/app/e/[cls]/page.tsx
Luiz Gustavo a7e9dce6d2 rebuild entity layer from Sonnet-vision reextract pipeline
Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity
JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page
extraction. Add synthesize scripts to regenerate wiki/entities from the 116
_reextract.json (30), aggregate missing page.md from chunks (31), and reprocess
805 pages the doc-rebuilder agent dropped on context overflow (32). Add
maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and
typed relation extraction.

Web: wire relations API + entity-relations component; entity/timeline/doc
pages consume the rebuilt layer.

Note: raw/, processing/, wiki/ remain gitignored (bulk data managed
separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on
disk only. The 27 curated anchor events under wiki/entities/events/ are
preserved.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 12:20:24 -03:00

122 lines
4.1 KiB
TypeScript

/**
* /e/[cls] — list page for an entity class (e.g. /e/people, /e/locations).
*
* Reads wiki/entities/<class>/*.md frontmatter on the server; renders a
* paginated, searchable grid with mention counts. Click → /e/<class>/<id>.
*/
import Link from "next/link";
import { notFound } from "next/navigation";
import fs from "node:fs/promises";
import path from "node:path";
import matter from "gray-matter";
import { AuthBar } from "@/components/auth-bar";
import { WIKI, classKeyToFolder, type EntityClass } from "@/lib/wiki";
import { EntityListFilter } from "@/components/entity-list-filter";
export const dynamic = "force-dynamic";
const CLASS_TITLE: Record<EntityClass, string> = {
people: "Pessoas",
organizations: "Organizações",
locations: "Locais",
events: "Eventos",
"uap-objects": "Objetos UAP",
vehicles: "Veículos",
operations: "Operações",
concepts: "Conceitos",
};
const CLASS_COLOR: Record<EntityClass, string> = {
people: "text-[#ff6ec7] border-[#ff6ec7]",
organizations: "text-[#ff8a4d] border-[#ff8a4d]",
locations: "text-[#3fde6a] border-[#3fde6a]",
events: "text-[#ffa500] border-[#ffa500]",
"uap-objects": "text-[#ff3344] border-[#ff3344]",
vehicles: "text-[#5b9bd5] border-[#5b9bd5]",
operations: "text-[#9b5de5] border-[#9b5de5]",
concepts: "text-[#06d6a0] border-[#06d6a0]",
};
interface EntityRow {
id: string;
canonical_name: string;
aliases: string[];
total_mentions: number;
documents_count: number;
enrichment_status: string | null;
}
async function listEntities(cls: EntityClass, includeGeneric = false): Promise<EntityRow[]> {
const dir = path.join(WIKI, "entities", cls);
let files: string[] = [];
try {
files = (await fs.readdir(dir)).filter((f) => f.endsWith(".md"));
} catch {
return [];
}
const rows: EntityRow[] = [];
for (const f of files) {
try {
const raw = await fs.readFile(path.join(dir, f), "utf-8");
const fm = matter(raw).data as Record<string, unknown>;
// Hide generic concept-entities (e.g. "Flying disc sighting reports") —
// they're categories, not real instances. Opt-in via ?include_generic=1.
if (!includeGeneric && fm.is_generic === true) continue;
rows.push({
id: f.replace(/\.md$/, ""),
canonical_name: String(fm.canonical_name ?? f.replace(/\.md$/, "")),
aliases: Array.isArray(fm.aliases) ? (fm.aliases as string[]) : [],
total_mentions: Number(fm.total_mentions ?? 0),
documents_count: Number(fm.documents_count ?? 0),
enrichment_status: (fm.enrichment_status as string | null) ?? null,
});
} catch {
/* skip malformed */
}
}
rows.sort((a, b) => b.total_mentions - a.total_mentions);
return rows;
}
export default async function EntityListPage({
params,
searchParams,
}: {
params: Promise<{ cls: string }>;
searchParams?: Promise<{ include_generic?: string }>;
}) {
const { cls } = await params;
const sp = (await searchParams) ?? {};
const includeGeneric = sp.include_generic === "1";
const folder = classKeyToFolder(cls);
if (!folder) notFound();
const entities = await listEntities(folder as EntityClass, includeGeneric);
return (
<main className="min-h-screen p-6 md:p-10 max-w-5xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<AuthBar />
</div>
<header className="mb-6">
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-2">
<span className={`px-2 py-0.5 border rounded ${CLASS_COLOR[folder as EntityClass]}`}>
{folder}
</span>
<span>· {entities.length} entidades</span>
</div>
<h1 className="font-mono text-2xl text-[#00ff9c] mb-1">
{CLASS_TITLE[folder as EntityClass]}
</h1>
<p className="text-[#8896aa] text-sm">
Ordenadas por número de menções no corpus. Filtre por nome/alias abaixo.
</p>
</header>
<EntityListFilter entities={entities} folder={folder} />
</main>
);
}