disclosure-bureau/web/app/e/[cls]/[id]/page.tsx
guto 291748df63 sanitize entities: single YAML source of truth, signal_strength badge
The corpus had two parallel reverse-reference signals: the wiki/pages
entities_extracted blocks (Haiku page-level) and public.entity_mentions
(Sonnet chunk-level, ILIKE-matched). The entity page only consulted the
DB, so it showed "0 menções" for thousands of entities that were anchored
in pages or in cross-entity links the DB never indexed.

Resolved by collapsing all signals into the YAML frontmatter, which is
now the single runtime source for entity metadata.

scripts/maintain/42_sync_entity_stats.py walks every entity and writes:

  mentioned_in:        [...]        # consolidated page refs
  total_mentions:      max(db, pages)
  documents_count:     max(db_docs, distinct page docs)
  signal_sources:
    db_chunks:         int
    page_refs:         int
    cross_refs:        int
  signal_strength:     strong | weak | orphan | unverified
  referenced_by:       [[class/id]]  # cross-entity backlinks

Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the
entity's own cross_refs so anchored-but-not-mentioned entities don't
register as orphan.

OBJ canonical names like "7m long, 1.3m high, two rocket motors,
smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)"
are rewritten to "Peyerl shot down UAP" derived from observed_in_event,
preserving the full description as an alias. --fix-obj-names did this
for every OBJ-* with >80 char canonical_name.

Default behaviour is conservative: --archive-only-junk archives only
single/double-char names and pure-numeric noise. Everything else stays
on disk with signal_strength marked, so the user can review later.

web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first.
The /e/[cls]/[id] page now reads counts straight from YAML and renders
a "força do sinal" badge with the per-source breakdown. Orphan entities
get a banner explaining they have no cross-references.

DB is still queried for ONE thing: the chunk text for preview cards on
the entity page, so we don't re-parse 21k markdown files on every render.

First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945-
PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink"
in the live UI.
2026-05-18 19:49:31 -03:00

331 lines
14 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Entity detail page — DB-first (live data from public.entity_mentions + chunks).
* Wiki frontmatter usado só como fallback estático (aliases, narrativa).
*/
import Link from "next/link";
import { notFound } from "next/navigation";
import Image from "next/image";
import { readEntity, classKeyToFolder, type EntityClass } from "@/lib/wiki";
import { MarkdownBody } from "@/components/markdown-body";
import { ChatBubble } from "@/components/chat-bubble";
import { AuthBar } from "@/components/auth-bar";
import { EntityGraphMini } from "@/components/entity-graph-mini";
import {
getEntityCore,
getEntityMentionsByDoc,
getEntityChunks,
} from "@/lib/retrieval/entity-pages";
const CLASS_TO_SINGULAR: Record<string, string> = {
people: "person",
organizations: "organization",
locations: "location",
events: "event",
"uap-objects": "uap_object",
vehicles: "vehicle",
operations: "operation",
concepts: "concept",
};
export const dynamic = "force-dynamic";
const CLASS_TITLE: Record<EntityClass, string> = {
people: "Pessoa",
organizations: "Organização",
locations: "Local",
events: "Evento",
"uap-objects": "Objeto UAP",
vehicles: "Veículo",
operations: "Operação",
concepts: "Conceito",
};
const CLASS_COLOR: Record<EntityClass, string> = {
people: "text-[#ff6ec7] border-[#ff6ec7]",
organizations: "text-[#ff8a4d] border-[#ff8a4d]",
locations: "text-[#3fde6a] border-[#3fde6a]",
events: "text-[#ffa500] border-[#ffa500]",
"uap-objects": "text-[#ff3344] border-[#ff3344]",
vehicles: "text-[#5b9bd5] border-[#5b9bd5]",
operations: "text-[#9b5de5] border-[#9b5de5]",
concepts: "text-[#06d6a0] border-[#06d6a0]",
};
const CLASS_BG: Record<EntityClass, string> = {
people: "from-[rgba(255,110,199,0.10)]",
organizations: "from-[rgba(255,138,77,0.10)]",
locations: "from-[rgba(63,222,106,0.10)]",
events: "from-[rgba(255,165,0,0.10)]",
"uap-objects": "from-[rgba(255,51,68,0.10)]",
vehicles: "from-[rgba(91,155,213,0.10)]",
operations: "from-[rgba(155,93,229,0.10)]",
concepts: "from-[rgba(6,214,160,0.10)]",
};
function pageOcurrencesText(pages: number[]): string {
if (pages.length === 0) return "—";
if (pages.length <= 5) return `p${pages.join(", p")}`;
return `p${pages.slice(0, 4).join(", p")} +${pages.length - 4}`;
}
export default async function EntityPage({
params,
}: {
params: Promise<{ cls: string; id: string }>;
}) {
const { cls, id } = await params;
const folder = classKeyToFolder(cls);
if (!folder) notFound();
const entityClassSingular = CLASS_TO_SINGULAR[folder as string] ?? folder;
// YAML-first: every count comes from the entity's frontmatter (kept in sync
// by scripts/maintain/42_sync_entity_stats.py). The DB is consulted ONLY for
// chunk previews, not for counts.
const core = await getEntityCore(entityClassSingular, id).catch(() => null);
const wiki = await readEntity(folder as EntityClass, id);
if (!core && !wiki) notFound();
const canonical = core?.canonical_name ?? (wiki?.fm.canonical_name as string) ?? id;
const aliases = (core?.aliases ?? (wiki?.fm.aliases as string[]) ?? []).filter(
(a) => a !== canonical,
);
const mentionGroups = core
? await getEntityMentionsByDoc(entityClassSingular, id, 100).catch(() => [])
: [];
const sampleChunks = core
? await getEntityChunks(core.entity_pk, 12).catch(() => [])
: [];
const totalMentions = core?.total_mentions ?? 0;
const documentsCount = core?.documents_count ?? 0;
const strength = core?.signal_strength ?? "unverified";
const sigs = core?.signal_sources ?? { db_chunks: 0, page_refs: 0, cross_refs: 0 };
const classColor = CLASS_COLOR[folder as EntityClass];
const classBg = CLASS_BG[folder as EntityClass];
return (
<main className="min-h-screen p-6 md:p-10 max-w-6xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<div className="flex items-center gap-3">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<Link href={`/e/${folder}`} className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
todos {folder}
</Link>
<Link href="/graph" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
🕸 ver no grafo
</Link>
</div>
<AuthBar />
</div>
{/* Hero header */}
<header
className={`mb-8 p-6 rounded-lg border-2 bg-gradient-to-br to-[#040810] ${classBg} ${classColor.split(" ")[1]}`}
>
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-3 flex-wrap">
<span className={`px-2 py-0.5 border rounded ${classColor}`}>
{CLASS_TITLE[folder as EntityClass]}
</span>
<span>· /e/{folder}/{id}</span>
</div>
<h1 className="font-mono text-3xl md:text-4xl text-[#00ff9c] mb-3 leading-tight">
{canonical}
</h1>
{aliases.length > 0 && (
<div className="mb-4 flex flex-wrap gap-1.5">
{aliases.slice(0, 12).map((a) => (
<span
key={a}
className="font-mono text-[11px] px-2 py-0.5 bg-[rgba(127,219,255,0.06)] border border-[rgba(127,219,255,0.20)] text-[#7fdbff] rounded"
>
{a}
</span>
))}
</div>
)}
<div className="flex flex-wrap gap-3 mt-4">
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(0,255,156,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">menções</div>
<div className="font-mono text-2xl text-[#00ff9c] mt-0.5 tabular-nums">{totalMentions}</div>
</div>
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(127,219,255,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">documentos</div>
<div className="font-mono text-2xl text-[#7fdbff] mt-0.5 tabular-nums">{documentsCount}</div>
</div>
{core?.enrichment_status && core.enrichment_status !== "none" && (
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(167,139,250,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">enrichment</div>
<div className="font-mono text-sm text-[#a78bfa] mt-0.5">{core.enrichment_status}</div>
</div>
)}
<div
className={`px-4 py-3 bg-[#0a121e] border rounded ${
strength === "strong"
? "border-[#00ff9c]"
: strength === "weak"
? "border-[#ffa500]"
: strength === "orphan"
? "border-[#ff6b6b]"
: "border-[#5a6678]"
}`}
title="Cruzamento dos 3 sinais que confirmam esta entidade no corpus."
>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">
força do sinal
</div>
<div
className={`font-mono text-sm mt-0.5 ${
strength === "strong"
? "text-[#00ff9c]"
: strength === "weak"
? "text-[#ffa500]"
: strength === "orphan"
? "text-[#ff6b6b]"
: "text-[#8896aa]"
}`}
>
{strength === "strong" && "forte"}
{strength === "weak" && "fraca"}
{strength === "orphan" && "órfã"}
{strength === "unverified" && "não verificada"}
</div>
<div className="font-mono text-[9px] text-[#5a6678] mt-1 leading-tight">
{sigs.db_chunks} chunks · {sigs.page_refs} págs · {sigs.cross_refs} backlinks
</div>
</div>
</div>
{strength === "orphan" && (
<p className="mt-4 text-xs text-[#ff6b6b] font-mono">
entidade não confirmada: nenhuma página, chunk ou outra entidade aponta para
ela. Pode ser extração ruidosa do pipeline original.
</p>
)}
</header>
<div className="grid grid-cols-1 lg:grid-cols-[1fr_320px] gap-8">
{/* MAIN — narrative + chunks live */}
<article>
{/* Live chunk previews — most impactful section */}
{sampleChunks.length > 0 && (
<section className="mb-10">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Aparece em {sampleChunks.length}+ trechos · top {sampleChunks.length}
</h2>
<div className="space-y-2">
{sampleChunks.map((c) => {
const text = (c.content_pt || c.content_en || "").trim();
const docPretty = c.doc_id.replace(/^doc-/, "").replace(/-/g, " ").slice(0, 60);
const cropUrl = c.bbox
? `/api/crop?doc=${encodeURIComponent(c.doc_id)}&page=${c.page}&x=${c.bbox.x}&y=${c.bbox.y}&w=${c.bbox.w}&h=${c.bbox.h}&w_px=200`
: null;
return (
<Link
key={c.chunk_pk}
href={`/d/${c.doc_id}#${c.chunk_id}`}
className="flex items-start gap-3 p-3 bg-[#0a121e] border border-[rgba(127,219,255,0.15)] hover:border-[#00ff9c] rounded transition"
>
{cropUrl && (
<Image
src={cropUrl}
alt=""
width={120}
height={80}
unoptimized
className="block w-28 h-20 object-cover bg-black rounded flex-shrink-0"
/>
)}
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 text-[10px] font-mono mb-1 flex-wrap">
<span className="text-[#00ff9c]">{c.chunk_id}</span>
<span className="text-[#5a6678]">p{c.page}</span>
<span className="text-[#5a6678]">{c.type}</span>
{c.classification && (
<span className="text-[#ff6b6b]">{c.classification}</span>
)}
{c.ufo_anomaly && (
<span className="text-[#00ff9c]">🛸 {c.ufo_anomaly_type ?? "UAP"}</span>
)}
</div>
<div className="text-sm text-[#c8d4e6] line-clamp-3 leading-snug">{text}</div>
<div className="text-[10px] font-mono text-[#5a6678] truncate mt-1">
{docPretty}
</div>
</div>
</Link>
);
})}
</div>
</section>
)}
{/* Narrative body (Haiku stub OK quando rico) */}
{wiki?.body && wiki.body.trim().length > 30 && (
<section className="pt-6 border-t border-[rgba(0,255,156,0.12)]">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Narrativa
</h2>
<MarkdownBody>{wiki.body}</MarkdownBody>
</section>
)}
{sampleChunks.length === 0 && (!wiki?.body || wiki.body.trim().length === 0) && (
<div className="text-[#5a6678] italic text-sm p-6 border border-[rgba(255,165,0,0.30)] bg-[rgba(255,165,0,0.05)] rounded">
Entidade ainda sem chunks indexados na DB. Aguarde o indexer terminar.
</div>
)}
</article>
{/* SIDEBAR — documentos onde aparece (DB live) + grafo mini */}
<aside className="lg:sticky lg:top-6 lg:self-start space-y-6">
<section>
<h3 className="font-mono text-[10px] text-[#8896aa] uppercase tracking-widest mb-2">
Aparece em {documentsCount} documento(s)
</h3>
{mentionGroups.length === 0 ? (
<p className="text-[#5a6678] text-xs italic">Sem dados de mention ainda.</p>
) : (
<ul className="space-y-1 max-h-[50vh] overflow-y-auto pr-1">
{mentionGroups.map((m) => (
<li key={m.doc_id}>
<Link
href={`/d/${m.doc_id}`}
className="group block p-2 border border-transparent hover:border-[rgba(0,255,156,0.32)] hover:bg-[rgba(0,255,156,0.04)] rounded transition"
>
<div className="flex items-baseline gap-2 font-mono text-[11px]">
<span className="text-[#7fdbff] group-hover:text-[#00ff9c] truncate flex-1">
{m.canonical_title ?? m.doc_id}
</span>
<span className="text-[#00ff9c] tabular-nums shrink-0">{m.mention_count}×</span>
</div>
<div className="flex items-center gap-2 mt-0.5 font-mono text-[10px] text-[#5a6678]">
{m.classification && (
<span className="text-[#ff6b6b]">{m.classification}</span>
)}
<span>· {pageOcurrencesText(m.pages ?? [])}</span>
</div>
</Link>
</li>
))}
</ul>
)}
</section>
<EntityGraphMini
entityClassSingular={entityClassSingular}
entityId={id}
/>
</aside>
</div>
<ChatBubble context={{ doc_id: mentionGroups[0]?.doc_id }} />
</main>
);
}