disclosure-bureau/web/components/doc-renderer-v2.tsx

468 lines
14 KiB
TypeScript
Raw Normal View History

/**
* DocRendererV2 render a document from agentic chunks (raw/<doc>--subagent/).
*
* Per chunk, picks the right HTML element from its type, places images at
* their bbox position via on-demand /api/crop, and renders tables when
* a related_table is attached.
*
* Modes:
* - flow: continuous reading order (one column, all pages)
* - paged: page-by-page, with mini PNG thumbnail per page
*/
"use client";
import { useState } from "react";
import Image from "next/image";
import type { ParsedChunk } from "@/lib/chunks";
type Mode = "flow" | "paged";
type Lang = "pt-br" | "en" | "both";
const CSS_VARS = {
cyan: "#7fdbff",
green: "#00ff9c",
dim: "#5a6678",
text: "#c8d4e6",
};
// Chunks that are pure visual noise on a scanned page — skip rendering entirely.
const NOISE_CHUNK_TYPES = new Set<string>([
"blank",
"blank_area",
"blank_page",
"separator",
"punch_hole",
"fastener_hole",
"barcode",
"redaction_bar",
"redaction_header",
"redaction_footer",
]);
// ALLOWLIST: only render an image crop when image_type is something the
// reader actually benefits from seeing — photographs, drawings, maps,
// illustrations, newspaper clippings, UAP-object crops, sensor frames.
// Everything else (seals, stamps, signatures, labels, envelopes, page
// edges, holes, marks, watermarks, etc.) is filtered out — it adds no
// investigative value and clutters the page.
const RENDERABLE_IMAGE_TYPES = new Set<string>([
// Photographs (all flavors)
"photo",
"photograph",
"photograph_of_document",
"ufo_photograph",
"surveillance_photo",
"surveillance_photograph",
"surveillance_infrared_photo",
"infrared_photo",
"infrared_photo_detail",
"thermal_infrared_photo",
"thermal_infrared_surveillance_frame",
"aerial_surveillance",
"portrait",
// Drawings & sketches
"drawing",
"sketch",
"hand_drawn_diagram",
"doodle",
"artist_rendering",
// Diagrams & maps
"diagram",
"map",
"map_diagram",
"geographical_map",
"anatomical_diagram",
"ufo_diagram_technical",
// Illustrations
"illustration",
"illustration_ufo_encounter",
"illustration_ufo_sighting",
"cartoon_illustration",
"composite_rendering",
// Newspaper / magazine clippings (informational content)
"newspaper_clipping",
"newspaper_article_eyewitness",
"newspaper_article_with_headline",
"newspaper_article_mass_sighting",
"newspaper_article_regional_sighting",
"newspaper_clipping_collage",
"newspaper_clipping_composite",
"newspaper_clippings_display",
"newspaper_collage",
"magazine_page",
"magazine_cover",
"advertisement_illustration",
"clipping",
// UAP object crops
"uap_object",
"uap_object_crop",
"uap_object_detail",
"uap_detail_crop",
"uap_closeup",
"aerial_object",
"aerial_objects_closeup",
"sensor_frame",
"sensor_footage",
"sensor_overlay",
"sensor_reticle_with_uap",
"thermal_sensor_frame",
"infrared_camera_frame",
]);
function ChunkCard({
c,
lang,
docId,
}: {
c: ParsedChunk;
lang: Lang;
docId: string;
}) {
const { fm, content_en, content_pt } = c;
const bbox = fm.bbox ?? { x: 0, y: 0, w: 1, h: 0.05 };
const showEn = lang === "en" || lang === "both";
const showPt = lang === "pt-br" || lang === "both";
// Skip pure visual noise (blank areas, binder holes, separators, bare redaction bars)
if (typeof fm.type === "string" && NOISE_CHUNK_TYPES.has(fm.type)) return null;
// Image chunks: only render the crop if image_type is on the allowlist
// (photographs, drawings, maps, diagrams, illustrations, clippings, UAP
// crops, sensor frames). Seals, stamps, signatures, labels, envelopes,
// marks, holes, etc. produce visually meaningless crops — drop them.
if (fm.type === "image" || fm.image_type) {
const it = typeof fm.image_type === "string" ? fm.image_type : "";
if (!RENDERABLE_IMAGE_TYPES.has(it)) return null;
}
// Anchor for citation jumps
const anchor = (
<a id={fm.chunk_id} className="absolute -mt-24" aria-hidden="true" />
);
// Image chunks: render crop inline
if (fm.type === "image" || fm.image_type) {
const cropUrl =
`/api/crop?doc=${encodeURIComponent(docId)}` +
`&page=${fm.page}&x=${bbox.x}&y=${bbox.y}&w=${bbox.w}&h=${bbox.h}&w_px=800`;
return (
<figure className="relative my-8" id={fm.chunk_id}>
{anchor}
<div className="relative border border-[rgba(0,255,156,0.20)] bg-[#0a121e] rounded overflow-hidden">
<Image
src={cropUrl}
alt={(showPt ? content_pt : content_en) || `chunk ${fm.chunk_id}`}
width={800}
height={Math.round(800 * (bbox.h / Math.max(0.01, bbox.w)))}
sizes="(max-width: 768px) 90vw, 700px"
className="block w-full h-auto"
/>
</div>
<figcaption className="mt-2 text-xs text-[#8896aa] flex items-center gap-2">
<span className="font-mono text-[#7fdbff]">{fm.chunk_id}</span>
<span>·</span>
<span>p{fm.page}</span>
{fm.image_type && <span>· {fm.image_type}</span>}
{fm.ufo_anomaly_detected && (
<span className="text-[#00ff9c]">· 🛸 UAP: {fm.ufo_anomaly_type}</span>
)}
</figcaption>
{(showEn || showPt) && (
<div className="mt-2 text-xs text-[#8896aa] space-y-1">
{showEn && fm.image_description_en && <p><b>EN:</b> {fm.image_description_en}</p>}
{showPt && fm.image_description_pt_br && <p><b>PT-BR:</b> {fm.image_description_pt_br}</p>}
</div>
)}
</figure>
);
}
// Classification banner
if (fm.type === "classification_marking" || fm.type === "classification_banner") {
return (
<div className="my-4 text-center" id={fm.chunk_id}>
{anchor}
<span className="inline-block font-mono text-xs px-3 py-1 border border-[#ff6b6b] text-[#ff6b6b] bg-[rgba(255,107,107,0.05)]">
{showPt ? content_pt : content_en}
</span>
</div>
);
}
// Stamp
if (fm.type === "stamp") {
return (
<div className="my-3 inline-block mr-2" id={fm.chunk_id}>
{anchor}
<span
className="font-mono text-[10px] px-2 py-1 border-2 border-[#a78bfa] text-[#a78bfa] rounded"
style={{ transform: "rotate(-2deg)" }}
>
🏛 {showPt ? content_pt : content_en}
</span>
</div>
);
}
// Signature
if (fm.type === "signature" || fm.type === "signature_block") {
return (
<div className="my-3 italic text-[#8896aa] font-serif" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</div>
);
}
// Letterhead
if (fm.type === "letterhead") {
return (
<header className="my-6 text-center border-b border-[rgba(0,255,156,0.20)] pb-3" id={fm.chunk_id}>
{anchor}
{showEn && <div className="font-mono text-sm uppercase tracking-wider text-[#7fdbff]">{content_en}</div>}
{showPt && lang === "both" && (
<div className="font-mono text-xs uppercase tracking-wider text-[#8896aa] mt-1">{content_pt}</div>
)}
{!showEn && showPt && <div className="font-mono text-sm uppercase tracking-wider text-[#7fdbff]">{content_pt}</div>}
</header>
);
}
// Address block
if (fm.type === "address_block" || fm.type === "addressee_block") {
return (
<address className="my-4 not-italic font-mono text-sm text-[#c8d4e6] whitespace-pre-line" id={fm.chunk_id}>
{anchor}
{showEn && <div>{content_en}</div>}
{showPt && lang === "both" && <div className="text-[#8896aa] mt-1">{content_pt}</div>}
{!showEn && showPt && <div>{content_pt}</div>}
</address>
);
}
// Heading
if (fm.type === "heading" || fm.type === "header_block" || fm.type === "subject_line" || fm.type === "section_header") {
return (
<h3 className="mt-8 mb-3 text-lg font-mono text-[#00ff9c] border-l-2 border-[#00ff9c] pl-3" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</h3>
);
}
// Footer
if (fm.type === "footer") {
return (
<footer className="my-4 text-xs text-[#5a6678] font-mono text-center" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</footer>
);
}
// Form field
if (fm.type === "form_field" || fm.type === "form_reference") {
return (
<div className="my-2 font-mono text-sm" id={fm.chunk_id}>
{anchor}
<span className="text-[#7fdbff]"></span> {showPt ? content_pt : content_en}
</div>
);
}
// Bulleted / numbered
if (fm.type === "bulleted_item") {
return (
<li className="my-1 ml-6" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</li>
);
}
if (fm.type === "numbered_item") {
return (
<li className="my-1 ml-6 list-decimal" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</li>
);
}
// Quote
if (fm.type === "quote_block") {
return (
<blockquote className="my-4 ml-4 pl-4 border-l-4 border-[#7fdbff] italic text-[#c8d4e6]" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</blockquote>
);
}
// Marginalia
if (fm.type === "marginalia") {
return (
<aside className="my-3 ml-8 text-xs text-[#a78bfa] font-handwriting italic" id={fm.chunk_id}>
{anchor}
{showPt ? content_pt : content_en}
</aside>
);
}
// Redaction
if (fm.type === "redaction") {
return (
<span
className="my-2 inline-block px-3 py-1 bg-black text-[#5a6678] font-mono text-xs rounded"
id={fm.chunk_id}
title={fm.redaction_code ?? "redacted"}
>
{anchor}
[REDACTED{fm.redaction_code ? `${fm.redaction_code}` : ""}]
</span>
);
}
// Table marker (basic — full table render needs related_table data)
if (fm.type === "table_marker" && fm.related_table) {
return (
<div className="my-6 border border-[rgba(127,219,255,0.30)] bg-[#0a121e] p-4 rounded" id={fm.chunk_id}>
{anchor}
<div className="text-xs font-mono text-[#7fdbff] mb-2">📊 {fm.related_table}</div>
<div className="text-sm text-[#c8d4e6]">{showPt ? content_pt : content_en}</div>
<a
href={`/api/static/raw/${docId}--subagent/tables/${fm.related_table}.csv`}
target="_blank"
rel="noopener"
className="mt-2 inline-block text-xs text-[#00ff9c] hover:underline font-mono"
>
download CSV
</a>
</div>
);
}
// Paragraph (default)
return (
<div className="my-3 text-[15px] leading-relaxed text-[#c8d4e6]" id={fm.chunk_id}>
{anchor}
{showEn && <p>{content_en}</p>}
{showPt && lang === "both" && <p className="mt-1 text-[#8896aa] text-sm">{content_pt}</p>}
{!showEn && showPt && <p>{content_pt}</p>}
{fm.ufo_anomaly_detected && (
<div className="mt-1 text-xs text-[#00ff9c] font-mono">
🛸 UAP flag: {fm.ufo_anomaly_type ?? "anomaly"} {fm.ufo_anomaly_rationale}
</div>
)}
</div>
);
}
function isNoiseChunk(c: ParsedChunk): boolean {
const t = c.fm.type;
const it = c.fm.image_type;
if (typeof t === "string" && NOISE_CHUNK_TYPES.has(t)) return true;
// For chunks that have an image_type but ARE image-class (type === "image"),
// hide unless they're on the allowlist. Non-image chunks with a stray
// image_type field (e.g. type "stamp" with image_type "stamp") are also
// skipped — we don't render visual crops for them.
const isImageChunk = t === "image" || typeof it === "string";
if (isImageChunk && !(typeof it === "string" && RENDERABLE_IMAGE_TYPES.has(it))) {
return true;
}
return false;
}
function PageGroup({
page,
chunks,
lang,
docId,
}: {
page: number;
chunks: ParsedChunk[];
lang: Lang;
docId: string;
}) {
const visibleChunks = chunks.filter((c) => !isNoiseChunk(c));
if (visibleChunks.length === 0) return null;
return (
<section className="mb-12">
<div
className="sticky top-0 z-10 bg-[#040810] border-b border-[rgba(0,255,156,0.15)] py-2 mb-4 flex items-center justify-between"
>
<h2 className="font-mono text-xs uppercase tracking-widest text-[#5a6678]">
página <span className="text-[#7fdbff]">{page}</span> {visibleChunks.length} trechos
</h2>
<a
href={`/d/${docId}/p${String(page).padStart(3, "0")}`}
className="font-mono text-[10px] text-[#7fdbff] hover:text-[#00ff9c]"
>
ver scan original
</a>
</div>
{visibleChunks.map((c) => (
<ChunkCard key={c.fm.chunk_id} c={c} lang={lang} docId={docId} />
))}
</section>
);
}
export function DocRendererV2({
docId,
chunksByPage,
}: {
docId: string;
chunksByPage: Array<[number, ParsedChunk[]]>;
}) {
const [lang, setLang] = useState<Lang>("pt-br");
const [mode, setMode] = useState<Mode>("paged");
return (
<div>
<div className="mb-6 flex items-center justify-between flex-wrap gap-3">
<div className="flex items-center gap-2 font-mono text-xs">
{(["pt-br", "en", "both"] as Lang[]).map((l) => (
<button
key={l}
onClick={() => setLang(l)}
className={`px-3 py-1.5 border rounded ${
lang === l
? "border-[#00ff9c] text-[#00ff9c] bg-[rgba(0,255,156,0.08)]"
: "border-[rgba(0,255,156,0.20)] text-[#8896aa]"
}`}
>
{l}
</button>
))}
</div>
<div className="flex items-center gap-2 font-mono text-xs">
{(["paged", "flow"] as Mode[]).map((m) => (
<button
key={m}
onClick={() => setMode(m)}
className={`px-3 py-1.5 border rounded ${
mode === m
? "border-[#7fdbff] text-[#7fdbff] bg-[rgba(127,219,255,0.08)]"
: "border-[rgba(127,219,255,0.20)] text-[#8896aa]"
}`}
>
{m === "paged" ? "paged" : "flow"}
</button>
))}
</div>
</div>
{mode === "paged" ? (
chunksByPage.map(([page, chunks]) => (
<PageGroup key={page} page={page} chunks={chunks} lang={lang} docId={docId} />
))
) : (
<div>
{chunksByPage.flatMap(([, chunks]) =>
chunks.map((c) => <ChunkCard key={c.fm.chunk_id} c={c} lang={lang} docId={docId} />),
)}
</div>
)}
</div>
);
}