disclosure-bureau/web/app/e/[cls]/[id]/page.tsx

431 lines
20 KiB
TypeScript
Raw Normal View History

/**
* Entity detail page DB-first (live data from public.entity_mentions + chunks).
* Wiki frontmatter usado como fallback estático (aliases, narrativa).
*/
import Link from "next/link";
import { notFound } from "next/navigation";
import Image from "next/image";
import { readEntity, classKeyToFolder, type EntityClass } from "@/lib/wiki";
import { MarkdownBody } from "@/components/markdown-body";
import { ChatBubble } from "@/components/chat-bubble";
import { AuthBar } from "@/components/auth-bar";
import { EntityGraphMini } from "@/components/entity-graph-mini";
import { EntityRelations } from "@/components/entity-relations";
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
import { EntityAttributes } from "@/components/entity-attributes";
import {
getEntityCore,
getEntityMentionsByDoc,
getEntityChunks,
} from "@/lib/retrieval/entity-pages";
const CLASS_TO_SINGULAR: Record<string, string> = {
people: "person",
organizations: "organization",
locations: "location",
events: "event",
"uap-objects": "uap_object",
vehicles: "vehicle",
operations: "operation",
concepts: "concept",
};
export const dynamic = "force-dynamic";
const CLASS_TITLE: Record<EntityClass, string> = {
people: "Pessoa",
organizations: "Organização",
locations: "Local",
events: "Evento",
"uap-objects": "Objeto UAP",
vehicles: "Veículo",
operations: "Operação",
concepts: "Conceito",
};
const CLASS_COLOR: Record<EntityClass, string> = {
people: "text-[#ff6ec7] border-[#ff6ec7]",
organizations: "text-[#ff8a4d] border-[#ff8a4d]",
locations: "text-[#3fde6a] border-[#3fde6a]",
events: "text-[#ffa500] border-[#ffa500]",
"uap-objects": "text-[#ff3344] border-[#ff3344]",
vehicles: "text-[#5b9bd5] border-[#5b9bd5]",
operations: "text-[#9b5de5] border-[#9b5de5]",
concepts: "text-[#06d6a0] border-[#06d6a0]",
};
const CLASS_BG: Record<EntityClass, string> = {
people: "from-[rgba(255,110,199,0.10)]",
organizations: "from-[rgba(255,138,77,0.10)]",
locations: "from-[rgba(63,222,106,0.10)]",
events: "from-[rgba(255,165,0,0.10)]",
"uap-objects": "from-[rgba(255,51,68,0.10)]",
vehicles: "from-[rgba(91,155,213,0.10)]",
operations: "from-[rgba(155,93,229,0.10)]",
concepts: "from-[rgba(6,214,160,0.10)]",
};
function pageOcurrencesText(pages: number[]): string {
if (pages.length === 0) return "—";
if (pages.length <= 5) return `p${pages.join(", p")}`;
return `p${pages.slice(0, 4).join(", p")} +${pages.length - 4}`;
}
export default async function EntityPage({
params,
}: {
params: Promise<{ cls: string; id: string }>;
}) {
const { cls, id } = await params;
const folder = classKeyToFolder(cls);
if (!folder) notFound();
const entityClassSingular = CLASS_TO_SINGULAR[folder as string] ?? folder;
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
// YAML-first: every count comes from the entity's frontmatter (kept in sync
// by scripts/maintain/42_sync_entity_stats.py). The DB is consulted ONLY for
// chunk previews, not for counts.
const core = await getEntityCore(entityClassSingular, id).catch(() => null);
const wiki = await readEntity(folder as EntityClass, id);
if (!core && !wiki) notFound();
const canonical = core?.canonical_name ?? (wiki?.fm.canonical_name as string) ?? id;
const aliases = (core?.aliases ?? (wiki?.fm.aliases as string[]) ?? []).filter(
(a) => a !== canonical,
);
const mentionGroups = core
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? await getEntityMentionsByDoc(entityClassSingular, id, 100).catch(() => [])
: [];
const sampleChunks = core
? await getEntityChunks(core.entity_pk, 12).catch(() => [])
: [];
const totalMentions = core?.total_mentions ?? 0;
const documentsCount = core?.documents_count ?? 0;
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
const strength = core?.signal_strength ?? "unverified";
const sigs = core?.signal_sources ?? { db_chunks: 0, page_refs: 0, cross_refs: 0, text_refs: 0 };
// Derived display class: orphan + curated narrative is not noise — it's a
// knowledge-curated entity the corpus simply doesn't mention. Label it apart.
const displayStrength: "strong" | "weak" | "curated" | "orphan" | "unverified" =
strength === "orphan" && core?.summary_status === "curated" ? "curated" : (strength as any);
const classColor = CLASS_COLOR[folder as EntityClass];
const classBg = CLASS_BG[folder as EntityClass];
W5.5 (Phase 3C): Sun-Tzu strategist feeder + entity hero illustrations Sun-Tzu (silent backend) — builds the strongest pro-anomaly brief the corpus supports for any topic. Bilingual JSON: thesis + 2-4 pillars (each with claim + citation-backed support) + honest residual unexplained clause. NEVER surfaced reader-facing. Migration 0009 (apply as supabase_admin): public.pro_anomaly_briefs brief_pk BIGSERIAL PK brief_id B-NNNN unique topic + topic_pt_br thesis + thesis_pt_br pillars JSONB unexplained + unexplained_pt_br doc_id, job_id, created_by, created_at + brief_id_seq sequence + GIN trigram indexes on topic + topic_pt_br + RLS policies (investigator INSERT, public SELECT) + GRANTs on seq + table to investigator prompts/sun-tzu.md "Adversarial strategist who plays the pro-disclosure side with the same rigour a red-team plays skeptic" — single thesis, 2-4 pillars, honest residual. Every claim cites a chunk. No fabrication from training-time knowledge. Output INTERNAL — case-writer pulls it. Bilingual mandatory. NO_STRONG_CASE sentinel when corpus is thin. detectives/sun_tzu.ts Grounds with hybridSearch top 18 chunks, calls Sonnet, parses JSON strict, calls writeProAnomalyBrief. tools/write_pro_anomaly_brief.ts Validates 2-4 pillars with bilingual claim+support, requires at least one [[wiki-link]] citation per pillar, INSERTs. orchestrator: new kind "anomaly_brief" dispatches Sun-Tzu. Case-writer integration (detectives/case_writer.ts): - Pulls most recent matching brief via ILIKE on topic or doc_id. - Renders brief as a separate prompt section labelled "Strategic brief (internal — do NOT cite or attribute)". - Instructs the narrator to weave the thesis as a quiet through- line, use pillar facts in scenes, let the unexplained clause inform the closing paragraph. Forbidden to name "the analyst", say "a brief argues", or use the words "thesis"/"pillar" explicitly. Translate it into prose. Entity hero illustrations: - 3 painterly editorial illustrations generated via Nano Banana Pro at 2K, stored under /data/disclosure/processing/case-art/: * EV-1947-06-24-kenneth-arnold-sighting.png — cockpit POV of Arnold in a CallAir A-2 over Mount Rainier, 9 chevron disc objects in formation, 1947 Life-magazine register. * EV-1947-07-08-roswell-incident.png — debris field in NM desert, USAAF officer in 1947 uniform examining foil fragments, period staff car. * EV-1947-06-21-maury-island-incident.png — wooden patrol boat on Puget Sound, 6 doughnut craft hovering, one shedding glowing slag, Harold Dahl + son + dog watching. - app/e/[cls]/[id]/page.tsx: full-bleed editorial hero replaces the old gradient header card when an illustration exists for that entity_id. Title sits over the painting with gradient overlay. "Ilustração editorial" chip in the top-right. Quota note: Claude OAuth still rate-limited as of this commit, so Sun-Tzu hasn't been smoke-tested in production. Code is shipped and ready; first brief will land when the weekly quota refreshes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 19:41:20 +00:00
// Hero illustration — W5.4 generates one painterly editorial image per
// iconic entity, stored at /data/ufo/processing/case-art/<entity_id>.png
// and served via the existing /api/static/processing/ route.
let heroIllustration: string | null = null;
try {
const fs = await import("node:fs/promises");
const path = await import("node:path");
const ufoRoot = process.env.UFO_ROOT || "/data/ufo";
await fs.stat(path.join(ufoRoot, "processing", "case-art", `${id}.png`));
heroIllustration = `/api/static/processing/case-art/${id}.png`;
} catch { /* no illustration for this entity yet */ }
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
// The generated entity bodies hold only "# Title" + empty "## Description"
// headings — strip headings and see if any real prose remains.
const bodyProse = (wiki?.body ?? "").replace(/^#.*$/gm, "").trim();
const hasNarrativeProse = bodyProse.length > 20;
// Does the frontmatter carry any displayable description/attribute?
const fm = (wiki?.fm ?? {}) as Record<string, unknown>;
const arr = (v: unknown) => Array.isArray(v) && v.length > 0;
const fmHasContent = Boolean(
fm.narrative_summary_pt_br || fm.narrative_summary_en || fm.maneuver_notes ||
fm.shape || fm.color || fm.medium || fm.event_class || fm.person_class ||
fm.org_class || fm.geo_class || fm.date_start ||
arr(fm.countries) || arr(fm.roles) || arr(fm.affiliations) ||
arr(fm.primary_location_names) || arr(fm.regions_or_states),
);
return (
<main className="min-h-screen p-6 md:p-10 max-w-6xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<div className="flex items-center gap-3">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<Link href={`/e/${folder}`} className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
todos {folder}
</Link>
<Link href="/graph" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
🕸 ver no grafo
</Link>
</div>
<AuthBar />
</div>
W5.5 (Phase 3C): Sun-Tzu strategist feeder + entity hero illustrations Sun-Tzu (silent backend) — builds the strongest pro-anomaly brief the corpus supports for any topic. Bilingual JSON: thesis + 2-4 pillars (each with claim + citation-backed support) + honest residual unexplained clause. NEVER surfaced reader-facing. Migration 0009 (apply as supabase_admin): public.pro_anomaly_briefs brief_pk BIGSERIAL PK brief_id B-NNNN unique topic + topic_pt_br thesis + thesis_pt_br pillars JSONB unexplained + unexplained_pt_br doc_id, job_id, created_by, created_at + brief_id_seq sequence + GIN trigram indexes on topic + topic_pt_br + RLS policies (investigator INSERT, public SELECT) + GRANTs on seq + table to investigator prompts/sun-tzu.md "Adversarial strategist who plays the pro-disclosure side with the same rigour a red-team plays skeptic" — single thesis, 2-4 pillars, honest residual. Every claim cites a chunk. No fabrication from training-time knowledge. Output INTERNAL — case-writer pulls it. Bilingual mandatory. NO_STRONG_CASE sentinel when corpus is thin. detectives/sun_tzu.ts Grounds with hybridSearch top 18 chunks, calls Sonnet, parses JSON strict, calls writeProAnomalyBrief. tools/write_pro_anomaly_brief.ts Validates 2-4 pillars with bilingual claim+support, requires at least one [[wiki-link]] citation per pillar, INSERTs. orchestrator: new kind "anomaly_brief" dispatches Sun-Tzu. Case-writer integration (detectives/case_writer.ts): - Pulls most recent matching brief via ILIKE on topic or doc_id. - Renders brief as a separate prompt section labelled "Strategic brief (internal — do NOT cite or attribute)". - Instructs the narrator to weave the thesis as a quiet through- line, use pillar facts in scenes, let the unexplained clause inform the closing paragraph. Forbidden to name "the analyst", say "a brief argues", or use the words "thesis"/"pillar" explicitly. Translate it into prose. Entity hero illustrations: - 3 painterly editorial illustrations generated via Nano Banana Pro at 2K, stored under /data/disclosure/processing/case-art/: * EV-1947-06-24-kenneth-arnold-sighting.png — cockpit POV of Arnold in a CallAir A-2 over Mount Rainier, 9 chevron disc objects in formation, 1947 Life-magazine register. * EV-1947-07-08-roswell-incident.png — debris field in NM desert, USAAF officer in 1947 uniform examining foil fragments, period staff car. * EV-1947-06-21-maury-island-incident.png — wooden patrol boat on Puget Sound, 6 doughnut craft hovering, one shedding glowing slag, Harold Dahl + son + dog watching. - app/e/[cls]/[id]/page.tsx: full-bleed editorial hero replaces the old gradient header card when an illustration exists for that entity_id. Title sits over the painting with gradient overlay. "Ilustração editorial" chip in the top-right. Quota note: Claude OAuth still rate-limited as of this commit, so Sun-Tzu hasn't been smoke-tested in production. Code is shipped and ready; first brief will land when the weekly quota refreshes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 19:41:20 +00:00
{/* Full-bleed painterly hero appears only when this entity has a
generated illustration. Otherwise the existing header card stays. */}
{heroIllustration && (
<div className="relative -mx-6 md:-mx-10 mb-8 overflow-hidden border-y border-[rgba(224,192,128,0.20)]">
<div className="relative aspect-[16/9] md:aspect-[21/9] max-h-[520px]">
{/* eslint-disable-next-line @next/next/no-img-element */}
<img
src={heroIllustration}
alt={canonical}
className="absolute inset-0 w-full h-full object-cover"
/>
<div className="absolute inset-0 bg-gradient-to-t from-[#020409] via-[#020409]/30 to-transparent" />
<div className="absolute inset-0 bg-gradient-to-r from-[#020409]/60 via-transparent to-transparent" />
<div className="absolute bottom-0 left-0 right-0 px-6 md:px-10 pb-6 md:pb-10">
<div className="font-mono text-[10px] uppercase tracking-[0.18em] text-[#e0c080] mb-2">
{CLASS_TITLE[folder as EntityClass]} · /e/{folder}/{id}
</div>
<h1 className="font-display text-4xl md:text-6xl font-semibold leading-[1.05] tracking-tight text-white drop-shadow-lg">
{canonical}
</h1>
</div>
<div className="absolute top-3 right-3 text-[9px] font-mono uppercase tracking-wider text-[#9aa6b8]/80 bg-[#020409]/70 px-2 py-1 rounded">
Ilustração editorial
</div>
</div>
</div>
)}
{/* Hero header — shown only when there's no illustration above */}
{!heroIllustration && <header
className={`mb-8 p-6 rounded-lg border-2 bg-gradient-to-br to-[#040810] ${classBg} ${classColor.split(" ")[1]}`}
>
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-3 flex-wrap">
<span className={`px-2 py-0.5 border rounded ${classColor}`}>
{CLASS_TITLE[folder as EntityClass]}
</span>
<span>· /e/{folder}/{id}</span>
</div>
<h1 className="font-mono text-3xl md:text-4xl text-[#00ff9c] mb-3 leading-tight">
{canonical}
</h1>
{aliases.length > 0 && (
<div className="mb-4 flex flex-wrap gap-1.5">
{aliases.slice(0, 12).map((a) => (
<span
key={a}
className="font-mono text-[11px] px-2 py-0.5 bg-[rgba(127,219,255,0.06)] border border-[rgba(127,219,255,0.20)] text-[#7fdbff] rounded"
>
{a}
</span>
))}
</div>
)}
<div className="flex flex-wrap gap-3 mt-4">
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(0,255,156,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">menções</div>
<div className="font-mono text-2xl text-[#00ff9c] mt-0.5 tabular-nums">{totalMentions}</div>
</div>
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(127,219,255,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">documentos</div>
<div className="font-mono text-2xl text-[#7fdbff] mt-0.5 tabular-nums">{documentsCount}</div>
</div>
{core?.enrichment_status && core.enrichment_status !== "none" && (
<div className="px-4 py-3 bg-[#0a121e] border border-[rgba(167,139,250,0.20)] rounded">
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">enrichment</div>
<div className="font-mono text-sm text-[#a78bfa] mt-0.5">{core.enrichment_status}</div>
</div>
)}
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
<div
className={`px-4 py-3 bg-[#0a121e] border rounded ${
displayStrength === "strong"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? "border-[#00ff9c]"
: displayStrength === "weak"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? "border-[#ffa500]"
: displayStrength === "curated"
? "border-[#a78bfa]"
: displayStrength === "orphan"
? "border-[#ff6b6b]"
: "border-[#5a6678]"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
}`}
title="Cruzamento dos sinais que confirmam esta entidade no corpus."
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678]">
força do sinal
</div>
<div
className={`font-mono text-sm mt-0.5 ${
displayStrength === "strong"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? "text-[#00ff9c]"
: displayStrength === "weak"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
? "text-[#ffa500]"
: displayStrength === "curated"
? "text-[#a78bfa]"
: displayStrength === "orphan"
? "text-[#ff6b6b]"
: "text-[#8896aa]"
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
}`}
>
{displayStrength === "strong" && "forte"}
{displayStrength === "weak" && "fraca"}
{displayStrength === "curated" && "curado"}
{displayStrength === "orphan" && "órfã"}
{displayStrength === "unverified" && "não verificada"}
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
</div>
<div className="font-mono text-[9px] text-[#5a6678] mt-1 leading-tight">
{sigs.db_chunks} chunks · {sigs.page_refs} págs · {sigs.cross_refs} backlinks · {sigs.text_refs} textuais
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
</div>
</div>
</div>
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
{strength === "orphan" && core?.summary_status === "curated" && (
<p className="mt-4 text-xs text-[#a78bfa] font-mono leading-relaxed">
📚 conhecimento curado · este evento/entidade faz parte do registro UAP/UFO
mundial mas <strong>não foi mencionado</strong> nos PDFs deste corpus (war.gov/ufo).
Narrativa abaixo vem de fonte curada manualmente, não de extração.
</p>
)}
{strength === "orphan" && core?.summary_status !== "curated" && (
sanitize entities: single YAML source of truth, signal_strength badge The corpus had two parallel reverse-reference signals: the wiki/pages entities_extracted blocks (Haiku page-level) and public.entity_mentions (Sonnet chunk-level, ILIKE-matched). The entity page only consulted the DB, so it showed "0 menções" for thousands of entities that were anchored in pages or in cross-entity links the DB never indexed. Resolved by collapsing all signals into the YAML frontmatter, which is now the single runtime source for entity metadata. scripts/maintain/42_sync_entity_stats.py walks every entity and writes: mentioned_in: [...] # consolidated page refs total_mentions: max(db, pages) documents_count: max(db_docs, distinct page docs) signal_sources: db_chunks: int page_refs: int cross_refs: int signal_strength: strong | weak | orphan | unverified referenced_by: [[class/id]] # cross-entity backlinks Outgoing wikilinks (e.g. OBJ.observed_in_event → EV) count toward the entity's own cross_refs so anchored-but-not-mentioned entities don't register as orphan. OBJ canonical names like "7m long, 1.3m high, two rocket motors, smooth flow, rotary drive null UAP (OBJ-EV1945-PEYERLSHOTDOWN-01)" are rewritten to "Peyerl shot down UAP" derived from observed_in_event, preserving the full description as an alias. --fix-obj-names did this for every OBJ-* with >80 char canonical_name. Default behaviour is conservative: --archive-only-junk archives only single/double-char names and pure-numeric noise. Everything else stays on disk with signal_strength marked, so the user can review later. web/lib/retrieval/entity-pages.ts swapped from db-first to yaml-first. The /e/[cls]/[id] page now reads counts straight from YAML and renders a "força do sinal" badge with the per-source breakdown. Orphan entities get a banner explaining they have no cross-references. DB is still queried for ONE thing: the chunk text for preview cards on the entity page, so we don't re-parse 21k markdown files on every render. First-pass result: 9020 strong / 14520 weak / 10814 orphan; OBJ-EV1945- PEYERLSHOTDOWN-01 now reads "Peyerl shot down UAP · fraca · 1 backlink" in the live UI.
2026-05-18 22:49:31 +00:00
<p className="mt-4 text-xs text-[#ff6b6b] font-mono">
entidade não confirmada: nenhuma página, chunk ou outra entidade aponta para
ela. Pode ser extração ruidosa do pipeline original.
</p>
)}
W5.5 (Phase 3C): Sun-Tzu strategist feeder + entity hero illustrations Sun-Tzu (silent backend) — builds the strongest pro-anomaly brief the corpus supports for any topic. Bilingual JSON: thesis + 2-4 pillars (each with claim + citation-backed support) + honest residual unexplained clause. NEVER surfaced reader-facing. Migration 0009 (apply as supabase_admin): public.pro_anomaly_briefs brief_pk BIGSERIAL PK brief_id B-NNNN unique topic + topic_pt_br thesis + thesis_pt_br pillars JSONB unexplained + unexplained_pt_br doc_id, job_id, created_by, created_at + brief_id_seq sequence + GIN trigram indexes on topic + topic_pt_br + RLS policies (investigator INSERT, public SELECT) + GRANTs on seq + table to investigator prompts/sun-tzu.md "Adversarial strategist who plays the pro-disclosure side with the same rigour a red-team plays skeptic" — single thesis, 2-4 pillars, honest residual. Every claim cites a chunk. No fabrication from training-time knowledge. Output INTERNAL — case-writer pulls it. Bilingual mandatory. NO_STRONG_CASE sentinel when corpus is thin. detectives/sun_tzu.ts Grounds with hybridSearch top 18 chunks, calls Sonnet, parses JSON strict, calls writeProAnomalyBrief. tools/write_pro_anomaly_brief.ts Validates 2-4 pillars with bilingual claim+support, requires at least one [[wiki-link]] citation per pillar, INSERTs. orchestrator: new kind "anomaly_brief" dispatches Sun-Tzu. Case-writer integration (detectives/case_writer.ts): - Pulls most recent matching brief via ILIKE on topic or doc_id. - Renders brief as a separate prompt section labelled "Strategic brief (internal — do NOT cite or attribute)". - Instructs the narrator to weave the thesis as a quiet through- line, use pillar facts in scenes, let the unexplained clause inform the closing paragraph. Forbidden to name "the analyst", say "a brief argues", or use the words "thesis"/"pillar" explicitly. Translate it into prose. Entity hero illustrations: - 3 painterly editorial illustrations generated via Nano Banana Pro at 2K, stored under /data/disclosure/processing/case-art/: * EV-1947-06-24-kenneth-arnold-sighting.png — cockpit POV of Arnold in a CallAir A-2 over Mount Rainier, 9 chevron disc objects in formation, 1947 Life-magazine register. * EV-1947-07-08-roswell-incident.png — debris field in NM desert, USAAF officer in 1947 uniform examining foil fragments, period staff car. * EV-1947-06-21-maury-island-incident.png — wooden patrol boat on Puget Sound, 6 doughnut craft hovering, one shedding glowing slag, Harold Dahl + son + dog watching. - app/e/[cls]/[id]/page.tsx: full-bleed editorial hero replaces the old gradient header card when an illustration exists for that entity_id. Title sits over the painting with gradient overlay. "Ilustração editorial" chip in the top-right. Quota note: Claude OAuth still rate-limited as of this commit, so Sun-Tzu hasn't been smoke-tested in production. Code is shipped and ready; first brief will land when the weekly quota refreshes. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 19:41:20 +00:00
</header>}
<div className="grid grid-cols-1 lg:grid-cols-[1fr_320px] gap-8">
{/* MAIN — narrative + chunks live */}
<article>
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
{/* Structured description + attributes from frontmatter */}
{wiki?.fm && <EntityAttributes fm={wiki.fm as Record<string, unknown>} />}
{/* Live chunk previews — most impactful section */}
{sampleChunks.length > 0 && (
<section className="mb-10">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Aparece em {sampleChunks.length}+ trechos · top {sampleChunks.length}
</h2>
<div className="space-y-2">
{sampleChunks.map((c) => {
const text = (c.content_pt || c.content_en || "").trim();
const docPretty = c.doc_id.replace(/^doc-/, "").replace(/-/g, " ").slice(0, 60);
const cropUrl = c.bbox
? `/api/crop?doc=${encodeURIComponent(c.doc_id)}&page=${c.page}&x=${c.bbox.x}&y=${c.bbox.y}&w=${c.bbox.w}&h=${c.bbox.h}&w_px=200`
: null;
return (
<Link
key={c.chunk_pk}
href={`/d/${c.doc_id}#${c.chunk_id}`}
className="flex items-start gap-3 p-3 bg-[#0a121e] border border-[rgba(127,219,255,0.15)] hover:border-[#00ff9c] rounded transition"
>
{cropUrl && (
<Image
src={cropUrl}
alt=""
width={120}
height={80}
unoptimized
className="block w-28 h-20 object-cover bg-black rounded flex-shrink-0"
/>
)}
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2 text-[10px] font-mono mb-1 flex-wrap">
<span className="text-[#00ff9c]">{c.chunk_id}</span>
<span className="text-[#5a6678]">p{c.page}</span>
<span className="text-[#5a6678]">{c.type}</span>
{c.classification && (
<span className="text-[#ff6b6b]">{c.classification}</span>
)}
{c.ufo_anomaly && (
<span className="text-[#00ff9c]">🛸 {c.ufo_anomaly_type ?? "UAP"}</span>
)}
</div>
<div className="text-sm text-[#c8d4e6] line-clamp-3 leading-snug">{text}</div>
<div className="text-[10px] font-mono text-[#5a6678] truncate mt-1">
{docPretty}
</div>
</div>
</Link>
);
})}
</div>
</section>
)}
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
{/* Narrative body only when it carries real prose, not just the
empty "## Description" headings the generator leaves behind. */}
{hasNarrativeProse && (
<section className="pt-6 border-t border-[rgba(0,255,156,0.12)]">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Narrativa
</h2>
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
<MarkdownBody>{wiki!.body}</MarkdownBody>
</section>
)}
W0+W1+W1.2: security hardening, observability, autocomplete, glitchtip, forgejo CI W0 — security hardening (5 fixes verified live on disclosure.top) - middleware: gate /api/admin/* same as /admin/* (F1) - imgproxy: tighten LOCAL_FILESYSTEM_ROOT from / to /var/lib/storage (F2) - studio: real basic-auth label (bcrypt hash, middleware reference) (F3) - relations: ENABLE ROW LEVEL SECURITY + public SELECT policy (F4) - migration 0003: fold is_searchable + hybrid_search update into canonical (TD#2) W1 — observability + resilience + autocomplete - studio: HOSTNAME=0.0.0.0 so Next.js binds on loopback for healthcheck - compose: PG_POOL_MAX=20, CLAUDE_CODE_OAUTH_TOKEN gated by separate env - claude-code.ts: subprocess timeout configurable (CLAUDE_CODE_TIMEOUT_MS) - openrouter.ts: retry with exponential backoff + Retry-After + in-memory circuit breaker (promotes FALLBACK after CB_THRESHOLD failures) - lib/logger.ts: pino logger (NDJSON prod / pretty dev) + withRequest helper - middleware: mints correlation_id, stamps x-correlation-id response header, emits structured http_request log per /api/* call - messages/route.ts: switch to structured logger - 60_meili_index.py: push documents + chunks into Meilisearch - /api/search/autocomplete: parallel meili search (docs + chunks), 5-8ms p50 - search-autocomplete.tsx: debounced dropdown wired into search-panel W1.2 — Glitchtip + Forgejo self-hosted - compose: glitchtip-redis + glitchtip-web + glitchtip-worker (v4.2) - compose: forgejo + forgejo-runner (server v9, runner v6) with group_add=988 - @sentry/nextjs SDK wired (instrumentation.ts + sentry.{client,server}.config.ts) - /api/admin/throw smoke endpoint (gated by W0-F1 middleware) - Synthetic event ingestion verified at glitchtip.disclosure.top - forgejo.disclosure.top up, repo discadmin/disclosure-bureau created, runner registered (labels: ubuntu-latest, docker) - .forgejo/workflows/ci.yml: typecheck + lint + build + npm audit + python syntax + compose validation Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:18:42 +00:00
{sampleChunks.length === 0 && !hasNarrativeProse && !fmHasContent && (
<div className="text-[#5a6678] italic text-sm p-6 border border-[rgba(255,165,0,0.30)] bg-[rgba(255,165,0,0.05)] rounded">
Entidade ainda sem chunks indexados na DB. Aguarde o indexer terminar.
</div>
)}
</article>
{/* SIDEBAR — documentos onde aparece (DB live) + grafo mini */}
<aside className="lg:sticky lg:top-6 lg:self-start space-y-6">
<section>
<h3 className="font-mono text-[10px] text-[#8896aa] uppercase tracking-widest mb-2">
Aparece em {mentionGroups.length} documento(s)
</h3>
{mentionGroups.length === 0 ? (
displayStrength === "curated" ? (
<p className="text-[#a78bfa] text-xs italic leading-relaxed">
Não documentado nos PDFs deste corpus. Conteúdo abaixo vem de fonte
curada (registro UAP mundial), não de extração de documentos.
</p>
) : (
<p className="text-[#5a6678] text-xs italic">Sem dados de mention ainda.</p>
)
) : (
<ul className="space-y-1 max-h-[50vh] overflow-y-auto pr-1">
{mentionGroups.map((m) => (
<li key={m.doc_id}>
<Link
href={`/d/${m.doc_id}`}
className="group block p-2 border border-transparent hover:border-[rgba(0,255,156,0.32)] hover:bg-[rgba(0,255,156,0.04)] rounded transition"
>
<div className="flex items-baseline gap-2 font-mono text-[11px]">
<span className="text-[#7fdbff] group-hover:text-[#00ff9c] truncate flex-1">
{m.canonical_title ?? m.doc_id}
</span>
{m.text_only && (
<span
title="Menção textual encontrada via back-fill (alias dentro do corpo narrativo); pipeline estruturado não pegou."
className="text-[9px] text-[#a78bfa] border border-[rgba(167,139,250,0.40)] px-1 rounded shrink-0"
>
texto
</span>
)}
<span className="text-[#00ff9c] tabular-nums shrink-0">{m.mention_count}×</span>
</div>
<div className="flex items-center gap-2 mt-0.5 font-mono text-[10px] text-[#5a6678]">
{m.classification && (
<span className="text-[#ff6b6b]">{m.classification}</span>
)}
<span>· {pageOcurrencesText(m.pages ?? [])}</span>
</div>
</Link>
</li>
))}
</ul>
)}
</section>
<section className="border-t border-[rgba(0,255,156,0.12)] pt-4">
<h3 className="font-mono text-[10px] text-[#8896aa] uppercase tracking-widest mb-3">
Relações tipadas
</h3>
<EntityRelations entityClass={entityClassSingular} entityId={id} />
</section>
<EntityGraphMini
entityClassSingular={entityClassSingular}
entityId={id}
/>
</aside>
</div>
<ChatBubble context={{ doc_id: mentionGroups[0]?.doc_id }} />
</main>
);
}