disclosure-bureau/web/app/e/[cls]/[id]/page.tsx

332 lines
14 KiB
TypeScript
Raw Normal View History

/**
* Entity detail page DB-first (live data from public.entity_mentions + chunks).
* Wiki frontmatter usado como fallback estático (aliases, narrativa).
*/
import Link from "next/link";
import { notFound } from "next/navigation";
import Image from "next/image";
import { readEntity, classKeyToFolder, type EntityClass } from "@/lib/wiki";
import { MarkdownBody } from "@/components/markdown-body";
import { ChatBubble } from "@/components/chat-bubble";
import { AuthBar } from "@/components/auth-bar";
import { EntityGraphMini } from "@/components/entity-graph-mini";
import {
getEntityCore,
getEntityMentionsByDoc,
getEntityChunks,
} from "@/lib/retrieval/entity-pages";
const CLASS_TO_SINGULAR: Record<string, string> = {
people: "person",
organizations: "organization",
locations: "location",
events: "event",
"uap-objects": "uap_object",
vehicles: "vehicle",
operations: "operation",
concepts: "concept",
};
export const dynamic = "force-dynamic";
const CLASS_TITLE: Record<EntityClass, string> = {
people: "Pessoa",
organizations: "Organização",
locations: "Local",
events: "Evento",
"uap-objects": "Objeto UAP",
vehicles: "Veículo",
operations: "Operação",
concepts: "Conceito",
};
const CLASS_COLOR: Record<EntityClass, string> = {
people: "text-[#ff6ec7] border-[#ff6ec7]",
organizations: "text-[#ff8a4d] border-[#ff8a4d]",
locations: "text-[#3fde6a] border-[#3fde6a]",
events: "text-[#ffa500] border-[#ffa500]",
"uap-objects": "text-[#ff3344] border-[#ff3344]",
vehicles: "text-[#5b9bd5] border-[#5b9bd5]",
operations: "text-[#9b5de5] border-[#9b5de5]",
concepts: "text-[#06d6a0] border-[#06d6a0]",
};
const CLASS_BG: Record<EntityClass, string> = {
people: "from-[rgba(255,110,199,0.10)]",
organizations: "from-[rgba(255,138,77,0.10)]",
locations: "from-[rgba(63,222,106,0.10)]",
events: "from-[rgba(255,165,0,0.10)]",
"uap-objects": "from-[rgba(255,51,68,0.10)]",
vehicles: "from-[rgba(91,155,213,0.10)]",
operations: "from-[rgba(155,93,229,0.10)]",
concepts: "from-[rgba(6,214,160,0.10)]",
};
function pageOcurrencesText(pages: number[]): string {
if (pages.length === 0) return "—";
if (pages.length <= 5) return `p${pages.join(", p")}`;
return `p${pages.slice(0, 4).join(", p")} +${pages.length - 4}`;
}
export default async function EntityPage({
params,
}: {
params: Promise<{ cls: string; id: string }>;
}) {
const { cls, id } = await params;
const folder = classKeyToFolder(cls);
if (!folder) notFound();
const entityClassSingular = CLASS_TO_SINGULAR[folder as string] ?? folder;
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
// YAML-first: every count comes from the entity's frontmatter (kept in sync
// by scripts/maintain/42_sync_entity_stats.py). The DB is consulted ONLY for
// chunk previews, not for counts.
const core = await getEntityCore(entityClassSingular, id).catch(() => null);
const wiki = await readEntity(folder as EntityClass, id);
if (!core && !wiki) notFound();
const canonical = core?.canonical_name ?? (wiki?.fm.canonical_name as string) ?? id;
const aliases = (core?.aliases ?? (wiki?.fm.aliases as string[]) ?? []).filter(
(a) => a !== canonical,
);
const mentionGroups = core
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? await getEntityMentionsByDoc(entityClassSingular, id, 100).catch(() => [])
: [];
const sampleChunks = core
? await getEntityChunks(core.entity_pk, 12).catch(() => [])
: [];
const totalMentions = core?.total_mentions ?? 0;
const documentsCount = core?.documents_count ?? 0;
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
const strength = core?.signal_strength ?? "unverified";
const sigs = core?.signal_sources ?? { db_chunks: 0, page_refs: 0, cross_refs: 0 };
const classColor = CLASS_COLOR[folder as EntityClass];
const classBg = CLASS_BG[folder as EntityClass];
return (
<main className="min-h-screen p-6 md:p-10 max-w-6xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<div className="flex items-center gap-3">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<Link href={`/e/${folder}`} className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
todos {folder}
</Link>
<Link href="/graph" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
🕸 ver no grafo
</Link>
</div>
<AuthBar />
</div>
{/* Hero header */}
<header
className={`mb-8 p-6 rounded-lg border-2 bg-gradient-to-br to-[#040810] ${classBg} ${classColor.split(" ")[1]}`}
>
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-3 flex-wrap">
<span className={`px-2 py-0.5 border rounded ${classColor}`}>
{CLASS_TITLE[folder as EntityClass]}
</span>
<span>· /e/{folder}/{id}</span>
</div>
<h1 className="font-mono text-3xl md:text-4xl text-[#00ff9c] mb-3 leading-tight">
{canonical}
</h1>
{aliases.length > 0 && (
<div className="mb-4 flex flex-wrap gap-1.5">
{aliases.slice(0, 12).map((a) => (
<span
key={a}
className="font-mono text-[11px] px-2 py-0.5 bg-[rgba(127,219,255,0.06)] border border-[rgba(127,219,255,0.20)] text-[#7fdbff] rounded"
>
{a}
</span>
))}
</div>
)}
<div className="flex flex-wrap gap-3 mt-4">
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(0,255,156,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">menções</div>
<div className="font-mono text-2xl text-[#00ff9c] mt-0.5 tabular-nums">{totalMentions}</div>
</div>
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(127,219,255,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">documentos</div>
<div className="font-mono text-2xl text-[#7fdbff] mt-0.5 tabular-nums">{documentsCount}</div>
</div>
{core?.enrichment_status && core.enrichment_status !== "none" && (
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(167,139,250,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">enrichment</div>
<div className="font-mono text-sm text-[#a78bfa] mt-0.5">{core.enrichment_status}</div>
</div>
)}
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
<div
className={`px-4 py-3 bg-[#0a121e] border rounded ${
strength === "strong"
? "border-[#00ff9c]"
: strength === "weak"
? "border-[#ffa500]"
: strength === "orphan"
? "border-[#ff6b6b]"
: "border-[#5a6678]"
}`}
title="Cruzamento dos 3 sinais que confirmam esta entidade no corpus."
>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">
força do sinal
</div>
<div
className={`font-mono text-sm mt-0.5 ${
strength === "strong"
? "text-[#00ff9c]"
: strength === "weak"
? "text-[#ffa500]"
: strength === "orphan"
? "text-[#ff6b6b]"
: "text-[#8896aa]"
}`}
>
{strength === "strong" && "forte"}
{strength === "weak" && "fraca"}
{strength === "orphan" && "órfã"}
{strength === "unverified" && "não verificada"}
</div>
<div className="font-mono text-[9px] text-[#5a6678] mt-1 leading-tight">
{sigs.db_chunks} chunks · {sigs.page_refs} págs · {sigs.cross_refs} backlinks
</div>
</div>
</div>
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
{strength === "orphan" && (
<p className="mt-4 text-xs text-[#ff6b6b] font-mono">
entidade não confirmada: nenhuma página, chunk ou outra entidade aponta para
ela. Pode ser extração ruidosa do pipeline original.
</p>
)}
</header>
<div className="grid grid-cols-1 lg:grid-cols-[1fr_320px] gap-8">
{/* MAIN — narrative + chunks live */}
<article>
{/* Live chunk previews — most impactful section */}
{sampleChunks.length > 0 && (
<section className="mb-10">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Aparece em {sampleChunks.length}+ trechos · top {sampleChunks.length}
</h2>
<div className="space-y-2">
{sampleChunks.map((c) => {
const text = (c.content_pt || c.content_en || "").trim();
const docPretty = c.doc_id.replace(/^doc-/, "").replace(/-/g, " ").slice(0, 60);
const cropUrl = c.bbox
? `/api/crop?doc=${encodeURIComponent(c.doc_id)}&page=${c.page}&x=${c.bbox.x}&y=${c.bbox.y}&w=${c.bbox.w}&h=${c.bbox.h}&w_px=200`
: null;
return (
<Link
key={c.chunk_pk}
href={`/d/${c.doc_id}#${c.chunk_id}`}
className="flex items-start gap-3 p-3 bg-[#0a121e] border border-[rgba(127,219,255,0.15)] hover:border-[#00ff9c] rounded transition"
>
{cropUrl && (
<Image
src={cropUrl}
alt=""
width={120}
height={80}
unoptimized
className="block w-28 h-20 object-cover bg-black rounded flex-shrink-0"
/>
)}
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 text-[10px] font-mono mb-1 flex-wrap">
<span className="text-[#00ff9c]">{c.chunk_id}</span>
<span className="text-[#5a6678]">p{c.page}</span>
<span className="text-[#5a6678]">{c.type}</span>
{c.classification && (
<span className="text-[#ff6b6b]">{c.classification}</span>
)}
{c.ufo_anomaly && (
<span className="text-[#00ff9c]">🛸 {c.ufo_anomaly_type ?? "UAP"}</span>
)}
</div>
<div className="text-sm text-[#c8d4e6] line-clamp-3 leading-snug">{text}</div>
<div className="text-[10px] font-mono text-[#5a6678] truncate mt-1">
{docPretty}
</div>
</div>
</Link>
);
})}
</div>
</section>
)}
{/* Narrative body (Haiku stub OK quando rico) */}
{wiki?.body && wiki.body.trim().length > 30 && (
<section className="pt-6 border-t border-[rgba(0,255,156,0.12)]">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Narrativa
</h2>
<MarkdownBody>{wiki.body}</MarkdownBody>
</section>
)}
{sampleChunks.length === 0 && (!wiki?.body || wiki.body.trim().length === 0) && (
<div className="text-[#5a6678] italic text-sm p-6 border border-[rgba(255,165,0,0.30)] bg-[rgba(255,165,0,0.05)] rounded">
Entidade ainda sem chunks indexados na DB. Aguarde o indexer terminar.
</div>
)}
</article>
{/* SIDEBAR — documentos onde aparece (DB live) + grafo mini */}
<aside className="lg:sticky lg:top-6 lg:self-start space-y-6">
<section>
<h3 className="font-mono text-[10px] text-[#8896aa] uppercase tracking-widest mb-2">
Aparece em {documentsCount} documento(s)
</h3>
{mentionGroups.length === 0 ? (
<p className="text-[#5a6678] text-xs italic">Sem dados de mention ainda.</p>
) : (
<ul className="space-y-1 max-h-[50vh] overflow-y-auto pr-1">
{mentionGroups.map((m) => (
<li key={m.doc_id}>
<Link
href={`/d/${m.doc_id}`}
className="group block p-2 border border-transparent hover:border-[rgba(0,255,156,0.32)] hover:bg-[rgba(0,255,156,0.04)] rounded transition"
>
<div className="flex items-baseline gap-2 font-mono text-[11px]">
<span className="text-[#7fdbff] group-hover:text-[#00ff9c] truncate flex-1">
{m.canonical_title ?? m.doc_id}
</span>
<span className="text-[#00ff9c] tabular-nums shrink-0">{m.mention_count}×</span>
</div>
<div className="flex items-center gap-2 mt-0.5 font-mono text-[10px] text-[#5a6678]">
{m.classification && (
<span className="text-[#ff6b6b]">{m.classification}</span>
)}
<span>· {pageOcurrencesText(m.pages ?? [])}</span>
</div>
</Link>
</li>
))}
</ul>
)}
</section>
<EntityGraphMini
entityClassSingular={entityClassSingular}
entityId={id}
/>
</aside>
</div>
<ChatBubble context={{ doc_id: mentionGroups[0]?.doc_id }} />
</main>
);
}