disclosure-bureau/web/components/doc-renderer-v2.tsx
Luiz Gustavo 5b62d0a3fe fix: UAP flag renders cleanly when type/rationale absent
~414 chunks have ufo_anomaly_detected=true but no type/rationale (extraction
left them null), so the flag rendered "UAP flag: anomaly —" with a dangling
em-dash. Build the label from the parts that exist: fall back to "anomalia"
for a missing type and omit the "—" when there's no rationale. The flag still
shows (the chunk genuinely contains UAP content), just without the noise.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 16:42:37 -03:00

507 lines
16 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* DocRendererV2 — render a document from agentic chunks (raw/<doc>--subagent/).
*
* Per chunk, picks the right HTML element from its type, places images at
* their bbox position via on-demand /api/crop, and renders tables when
* a related_table is attached.
*
* Modes:
* - flow: continuous reading order (one column, all pages)
* - paged: page-by-page, with mini PNG thumbnail per page
*/
"use client";
import { useState } from "react";
import Image from "next/image";
import type { ParsedChunk } from "@/lib/chunks";
type Mode = "flow" | "paged";
type Lang = "pt-br" | "en" | "both";
const CSS_VARS = {
cyan: "#7fdbff",
green: "#00ff9c",
dim: "#5a6678",
text: "#c8d4e6",
};
// Chunks that are pure visual noise on a scanned page — skip rendering entirely.
const NOISE_CHUNK_TYPES = new Set<string>([
"blank",
"blank_area",
"blank_page",
"separator",
"punch_hole",
"fastener_hole",
"barcode",
"redaction_bar",
"redaction_header",
"redaction_footer",
]);
// ALLOWLIST: only render an image crop when image_type is something the
// reader actually benefits from seeing — photographs, drawings, maps,
// illustrations, newspaper clippings, UAP-object crops, sensor frames.
// Everything else (seals, stamps, signatures, labels, envelopes, page
// edges, holes, marks, watermarks, etc.) is filtered out — it adds no
// investigative value and clutters the page.
const RENDERABLE_IMAGE_TYPES = new Set<string>([
// Photographs (all flavors)
"photo",
"photograph",
"photograph_of_document",
"ufo_photograph",
"surveillance_photo",
"surveillance_photograph",
"surveillance_infrared_photo",
"infrared_photo",
"infrared_photo_detail",
"thermal_infrared_photo",
"thermal_infrared_surveillance_frame",
"aerial_surveillance",
"portrait",
// Drawings & sketches
"drawing",
"sketch",
"hand_drawn_diagram",
"doodle",
"artist_rendering",
// Diagrams & maps
"diagram",
"map",
"map_diagram",
"geographical_map",
"anatomical_diagram",
"ufo_diagram_technical",
// Illustrations
"illustration",
"illustration_ufo_encounter",
"illustration_ufo_sighting",
"cartoon_illustration",
"composite_rendering",
// Newspaper / magazine clippings (informational content)
"newspaper_clipping",
"newspaper_article_eyewitness",
"newspaper_article_with_headline",
"newspaper_article_mass_sighting",
"newspaper_article_regional_sighting",
"newspaper_clipping_collage",
"newspaper_clipping_composite",
"newspaper_clippings_display",
"newspaper_collage",
"magazine_page",
"magazine_cover",
"advertisement_illustration",
"clipping",
// UAP object crops
"uap_object",
"uap_object_crop",
"uap_object_detail",
"uap_detail_crop",
"uap_closeup",
"aerial_object",
"aerial_objects_closeup",
"sensor_frame",
"sensor_footage",
"sensor_overlay",
"sensor_reticle_with_uap",
"thermal_sensor_frame",
"infrared_camera_frame",
]);
function ChunkCard({
c,
lang,
docId,
}: {
c: ParsedChunk;
lang: Lang;
docId: string;
}) {
const { fm, content_en, content_pt } = c;
const bbox = fm.bbox ?? { x: 0, y: 0, w: 1, h: 0.05 };
const showEn = lang === "en" || lang === "both";
const showPt = lang === "pt-br" || lang === "both";
// Skip pure visual noise (blank areas, binder holes, separators, bare redaction bars)
if (typeof fm.type === "string" && NOISE_CHUNK_TYPES.has(fm.type)) return null;
// Image chunks: only render the crop if image_type is on the allowlist
// (photographs, drawings, maps, diagrams, illustrations, clippings, UAP
// crops, sensor frames). Seals, stamps, signatures, labels, envelopes,
// marks, holes, etc. produce visually meaningless crops — drop them.
if (fm.type === "image" || fm.image_type) {
const it = typeof fm.image_type === "string" ? fm.image_type : "";
if (!RENDERABLE_IMAGE_TYPES.has(it)) return null;
}
// Anchor for citation jumps
const anchor = (
<a id={fm.chunk_id} className="absolute -mt-24" aria-hidden="true" />
);
// Image chunks: render crop inline
if (fm.type === "image" || fm.image_type) {
const hasValidBbox =
bbox &&
typeof bbox.w === "number" && typeof bbox.h === "number" &&
typeof bbox.x === "number" && typeof bbox.y === "number" &&
bbox.w > 0 && bbox.h > 0;
// No crop coordinates → don't request a broken crop (400). Render the
// image's textual description so the content is still surfaced.
if (!hasValidBbox) {
const descEn = fm.image_description_en || content_en;
const descPt = fm.image_description_pt_br || content_pt;
if (!descEn && !descPt) return null;
return (
<figure className="relative my-8" id={fm.chunk_id}>
{anchor}
<div className="border border-[rgba(0,255,156,0.20)] bg-[#0a121e] rounded p-4 text-sm text-[#c8d4e6]">
<div className="font-mono text-[10px] text-[#7fdbff] mb-2">
🖼 {fm.chunk_id} · p{fm.page}
{fm.image_type ? ` · ${fm.image_type}` : ""} · descrição (sem recorte)
</div>
{showEn && descEn && <p className="mb-1"><b>EN:</b> {descEn}</p>}
{showPt && descPt && <p><b>PT-BR:</b> {descPt}</p>}
</div>
</figure>
);
}
const cropUrl =
`/api/crop?doc=${encodeURIComponent(docId)}` +
`&page=${fm.page}&x=${bbox.x}&y=${bbox.y}&w=${bbox.w}&h=${bbox.h}&w_px=800`;
return (
<figure className="relative my-8" id={fm.chunk_id}>
{anchor}
<div className="relative border border-[rgba(0,255,156,0.20)] bg-[#0a121e] rounded overflow-hidden">
<Image
src={cropUrl}
alt={(showPt ? content_pt : content_en) || `chunk ${fm.chunk_id}`}
width={800}
height={Math.round(800 * (bbox.h / Math.max(0.01, bbox.w)))}
sizes="(max-width: 768px) 90vw, 700px"
className="block w-full h-auto"
/>
</div>
<figcaption className="mt-2 text-xs text-[#8896aa] flex items-center gap-2">
<span className="font-mono text-[#7fdbff]">{fm.chunk_id}</span>
<span>·</span>
<span>p{fm.page}</span>
{fm.image_type && <span>· {fm.image_type}</span>}
{fm.ufo_anomaly_detected && (
<span className="text-[#00ff9c]">· 🛸 UAP: {(typeof fm.ufo_anomaly_type === "string" && fm.ufo_anomaly_type.trim() && fm.ufo_anomaly_type.toLowerCase() !== "null") ? fm.ufo_anomaly_type : "anomalia"}</span>
)}
</figcaption>
{(showEn || showPt) && (
<div className="mt-2 text-xs text-[#8896aa] space-y-1">
{showEn && fm.image_description_en && <p><b>EN:</b> {fm.image_description_en}</p>}
{showPt && fm.image_description_pt_br && <p><b>PT-BR:</b> {fm.image_description_pt_br}</p>}
</div>
)}
</figure>
);
}
// Classification banner
if (fm.type === "classification_marking" || fm.type === "classification_banner") {
return (
<div className="my-4 text-center" id={fm.chunk_id}>
{anchor}
<span className="inline-block font-mono text-xs px-3 py-1 border border-[#ff6b6b] text-[#ff6b6b] bg-[rgba(255,107,107,0.05)]">
{showPt ? content_pt : content_en}
</span>
</div>
);
}
// Stamp
if (fm.type === "stamp") {
return (
<div className="my-3 inline-block mr-2" id={fm.chunk_id}>
{anchor}
<span
className="font-mono text-[10px] px-2 py-1 border-2 border-[#a78bfa] text-[#a78bfa] rounded"
style={{ transform: "rotate(-2deg)" }}
>
🏛 {showPt ? content_pt : content_en}
</span>
</div>
);
}
// Signature
if (fm.type === "signature" || fm.type === "signature_block") {
return (
<div className="my-3 italic text-[#8896aa] font-serif" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</div>
);
}
// Letterhead
if (fm.type === "letterhead") {
return (
<header className="my-6 text-center border-b border-[rgba(0,255,156,0.20)] pb-3" id={fm.chunk_id}>
{anchor}
{showEn && <div className="font-mono text-sm uppercase tracking-wider text-[#7fdbff]">{content_en}</div>}
{showPt && lang === "both" && (
<div className="font-mono text-xs uppercase tracking-wider text-[#8896aa] mt-1">{content_pt}</div>
)}
{!showEn && showPt && <div className="font-mono text-sm uppercase tracking-wider text-[#7fdbff]">{content_pt}</div>}
</header>
);
}
// Address block
if (fm.type === "address_block" || fm.type === "addressee_block") {
return (
<address className="my-4 not-italic font-mono text-sm text-[#c8d4e6] whitespace-pre-line" id={fm.chunk_id}>
{anchor}
{showEn && <div>{content_en}</div>}
{showPt && lang === "both" && <div className="text-[#8896aa] mt-1">{content_pt}</div>}
{!showEn && showPt && <div>{content_pt}</div>}
</address>
);
}
// Heading
if (fm.type === "heading" || fm.type === "header_block" || fm.type === "subject_line" || fm.type === "section_header") {
return (
<h3 className="mt-8 mb-3 text-lg font-mono text-[#00ff9c] border-l-2 border-[#00ff9c] pl-3" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</h3>
);
}
// Footer
if (fm.type === "footer") {
return (
<footer className="my-4 text-xs text-[#5a6678] font-mono text-center" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</footer>
);
}
// Form field
if (fm.type === "form_field" || fm.type === "form_reference") {
return (
<div className="my-2 font-mono text-sm" id={fm.chunk_id}>
{anchor}
<span className="text-[#7fdbff]"></span> {showPt ? content_pt : content_en}
</div>
);
}
// Bulleted / numbered
if (fm.type === "bulleted_item") {
return (
<li className="my-1 ml-6" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</li>
);
}
if (fm.type === "numbered_item") {
return (
<li className="my-1 ml-6 list-decimal" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</li>
);
}
// Quote
if (fm.type === "quote_block") {
return (
<blockquote className="my-4 ml-4 pl-4 border-l-4 border-[#7fdbff] italic text-[#c8d4e6]" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</blockquote>
);
}
// Marginalia
if (fm.type === "marginalia") {
return (
<aside className="my-3 ml-8 text-xs text-[#a78bfa] font-handwriting italic" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</aside>
);
}
// Redaction
if (fm.type === "redaction") {
return (
<span
className="my-2 inline-block px-3 py-1 bg-black text-[#5a6678] font-mono text-xs rounded"
id={fm.chunk_id}
title={fm.redaction_code ?? "redacted"}
>
{anchor}
[REDACTED{fm.redaction_code ? `${fm.redaction_code}` : ""}]
</span>
);
}
// Table marker (basic — full table render needs related_table data)
if (fm.type === "table_marker" && fm.related_table) {
return (
<div className="my-6 border border-[rgba(127,219,255,0.30)] bg-[#0a121e] p-4 rounded" id={fm.chunk_id}>
{anchor}
<div className="text-xs font-mono text-[#7fdbff] mb-2">📊 {fm.related_table}</div>
<div className="text-sm text-[#c8d4e6]">{showPt ? content_pt : content_en}</div>
<a
href={`/api/static/raw/${docId}--subagent/tables/${fm.related_table}.csv`}
target="_blank"
rel="noopener"
className="mt-2 inline-block text-xs text-[#00ff9c] hover:underline font-mono"
>
download CSV
</a>
</div>
);
}
// Paragraph (default)
return (
<div className="my-3 text-[15px] leading-relaxed text-[#c8d4e6]" id={fm.chunk_id}>
{anchor}
{showEn && <p>{content_en}</p>}
{showPt && lang === "both" && <p className="mt-1 text-[#8896aa] text-sm">{content_pt}</p>}
{!showEn && showPt && <p>{content_pt}</p>}
{fm.ufo_anomaly_detected && <UapFlag fm={fm} />}
</div>
);
}
/** UAP anomaly flag — shows type and/or rationale, omitting parts that are
* absent so an uncharacterized flag doesn't render a dangling "anomaly —". */
function UapFlag({ fm }: { fm: ParsedChunk["fm"] }) {
const clean = (v: unknown) => {
const s = typeof v === "string" ? v.trim() : "";
return s && s.toLowerCase() !== "null" ? s : null;
};
const type = clean(fm.ufo_anomaly_type);
const rationale = clean(fm.ufo_anomaly_rationale);
return (
<div className="mt-1 text-xs text-[#00ff9c] font-mono">
🛸 UAP flag: {type ?? "anomalia"}
{rationale ? `${rationale}` : ""}
</div>
);
}
function isNoiseChunk(c: ParsedChunk): boolean {
const t = c.fm.type;
const it = c.fm.image_type;
if (typeof t === "string" && NOISE_CHUNK_TYPES.has(t)) return true;
// For chunks that have an image_type but ARE image-class (type === "image"),
// hide unless they're on the allowlist. Non-image chunks with a stray
// image_type field (e.g. type "stamp" with image_type "stamp") are also
// skipped — we don't render visual crops for them.
const isImageChunk = t === "image" || typeof it === "string";
if (isImageChunk && !(typeof it === "string" && RENDERABLE_IMAGE_TYPES.has(it))) {
return true;
}
return false;
}
function PageGroup({
page,
chunks,
lang,
docId,
}: {
page: number;
chunks: ParsedChunk[];
lang: Lang;
docId: string;
}) {
const visibleChunks = chunks.filter((c) => !isNoiseChunk(c));
if (visibleChunks.length === 0) return null;
return (
<section className="mb-12">
<div
className="sticky top-0 z-10 bg-[#040810] border-b border-[rgba(0,255,156,0.15)] py-2 mb-4 flex items-center justify-between"
>
<h2 className="font-mono text-xs uppercase tracking-widest text-[#5a6678]">
página <span className="text-[#7fdbff]">{page}</span> {visibleChunks.length} trechos
</h2>
<a
href={`/d/${docId}/p${String(page).padStart(3, "0")}`}
className="font-mono text-[10px] text-[#7fdbff] hover:text-[#00ff9c]"
>
ver scan original
</a>
</div>
{visibleChunks.map((c) => (
<ChunkCard key={c.fm.chunk_id} c={c} lang={lang} docId={docId} />
))}
</section>
);
}
export function DocRendererV2({
docId,
chunksByPage,
}: {
docId: string;
chunksByPage: Array<[number, ParsedChunk[]]>;
}) {
const [lang, setLang] = useState<Lang>("pt-br");
const [mode, setMode] = useState<Mode>("paged");
return (
<div>
<div className="mb-6 flex items-center justify-between flex-wrap gap-3">
<div className="flex items-center gap-2 font-mono text-xs">
{(["pt-br", "en", "both"] as Lang[]).map((l) => (
<button
key={l}
onClick={() => setLang(l)}
className={`px-3 py-1.5 border rounded ${
lang === l
? "border-[#00ff9c] text-[#00ff9c] bg-[rgba(0,255,156,0.08)]"
: "border-[rgba(0,255,156,0.20)] text-[#8896aa]"
}`}
>
{l}
</button>
))}
</div>
<div className="flex items-center gap-2 font-mono text-xs">
{(["paged", "flow"] as Mode[]).map((m) => (
<button
key={m}
onClick={() => setMode(m)}
className={`px-3 py-1.5 border rounded ${
mode === m
? "border-[#7fdbff] text-[#7fdbff] bg-[rgba(127,219,255,0.08)]"
: "border-[rgba(127,219,255,0.20)] text-[#8896aa]"
}`}
>
{m === "paged" ? "paged" : "flow"}
</button>
))}
</div>
</div>
{mode === "paged" ? (
chunksByPage.map(([page, chunks]) => (
<PageGroup key={page} page={page} chunks={chunks} lang={lang} docId={docId} />
))
) : (
<div>
{chunksByPage.flatMap(([, chunks]) =>
chunks.map((c) => <ChunkCard key={c.fm.chunk_id} c={c} lang={lang} docId={docId} />),
)}
</div>
)}
</div>
);
}