/** * DocRendererV2 — render a document from agentic chunks (raw/--subagent/). * * Per chunk, picks the right HTML element from its type, places images at * their bbox position via on-demand /api/crop, and renders tables when * a related_table is attached. * * Modes: * - flow: continuous reading order (one column, all pages) * - paged: page-by-page, with mini PNG thumbnail per page */ "use client"; import { useState } from "react"; import Image from "next/image"; import type { ParsedChunk } from "@/lib/chunks"; type Mode = "flow" | "paged"; type Lang = "pt-br" | "en" | "both"; const CSS_VARS = { cyan: "#7fdbff", green: "#00ff9c", dim: "#5a6678", text: "#c8d4e6", }; // Chunks that are pure visual noise on a scanned page — skip rendering entirely. const NOISE_CHUNK_TYPES = new Set([ "blank", "blank_area", "blank_page", "separator", "punch_hole", "fastener_hole", "barcode", "redaction_bar", "redaction_header", "redaction_footer", ]); // ALLOWLIST: only render an image crop when image_type is something the // reader actually benefits from seeing — photographs, drawings, maps, // illustrations, newspaper clippings, UAP-object crops, sensor frames. // Everything else (seals, stamps, signatures, labels, envelopes, page // edges, holes, marks, watermarks, etc.) is filtered out — it adds no // investigative value and clutters the page. const RENDERABLE_IMAGE_TYPES = new Set([ // Photographs (all flavors) "photo", "photograph", "photograph_of_document", "ufo_photograph", "surveillance_photo", "surveillance_photograph", "surveillance_infrared_photo", "infrared_photo", "infrared_photo_detail", "thermal_infrared_photo", "thermal_infrared_surveillance_frame", "aerial_surveillance", "portrait", // Drawings & sketches "drawing", "sketch", "hand_drawn_diagram", "doodle", "artist_rendering", // Diagrams & maps "diagram", "map", "map_diagram", "geographical_map", "anatomical_diagram", "ufo_diagram_technical", // Illustrations "illustration", "illustration_ufo_encounter", "illustration_ufo_sighting", "cartoon_illustration", "composite_rendering", // Newspaper / magazine clippings (informational content) "newspaper_clipping", "newspaper_article_eyewitness", "newspaper_article_with_headline", "newspaper_article_mass_sighting", "newspaper_article_regional_sighting", "newspaper_clipping_collage", "newspaper_clipping_composite", "newspaper_clippings_display", "newspaper_collage", "magazine_page", "magazine_cover", "advertisement_illustration", "clipping", // UAP object crops "uap_object", "uap_object_crop", "uap_object_detail", "uap_detail_crop", "uap_closeup", "aerial_object", "aerial_objects_closeup", "sensor_frame", "sensor_footage", "sensor_overlay", "sensor_reticle_with_uap", "thermal_sensor_frame", "infrared_camera_frame", ]); function ChunkCard({ c, lang, docId, }: { c: ParsedChunk; lang: Lang; docId: string; }) { const { fm, content_en, content_pt } = c; const bbox = fm.bbox ?? { x: 0, y: 0, w: 1, h: 0.05 }; const showEn = lang === "en" || lang === "both"; const showPt = lang === "pt-br" || lang === "both"; // Skip pure visual noise (blank areas, binder holes, separators, bare redaction bars) if (typeof fm.type === "string" && NOISE_CHUNK_TYPES.has(fm.type)) return null; // Image chunks: only render the crop if image_type is on the allowlist // (photographs, drawings, maps, diagrams, illustrations, clippings, UAP // crops, sensor frames). Seals, stamps, signatures, labels, envelopes, // marks, holes, etc. produce visually meaningless crops — drop them. if (fm.type === "image" || fm.image_type) { const it = typeof fm.image_type === "string" ? fm.image_type : ""; if (!RENDERABLE_IMAGE_TYPES.has(it)) return null; } // Anchor for citation jumps const anchor = ( ); } // Paragraph (default) return (
{anchor} {showEn &&

{content_en}

} {showPt && lang === "both" &&

{content_pt}

} {!showEn && showPt &&

{content_pt}

} {fm.ufo_anomaly_detected && (
🛸 UAP flag: {fm.ufo_anomaly_type ?? "anomaly"} — {fm.ufo_anomaly_rationale}
)}
); } function isNoiseChunk(c: ParsedChunk): boolean { const t = c.fm.type; const it = c.fm.image_type; if (typeof t === "string" && NOISE_CHUNK_TYPES.has(t)) return true; // For chunks that have an image_type but ARE image-class (type === "image"), // hide unless they're on the allowlist. Non-image chunks with a stray // image_type field (e.g. type "stamp" with image_type "stamp") are also // skipped — we don't render visual crops for them. const isImageChunk = t === "image" || typeof it === "string"; if (isImageChunk && !(typeof it === "string" && RENDERABLE_IMAGE_TYPES.has(it))) { return true; } return false; } function PageGroup({ page, chunks, lang, docId, }: { page: number; chunks: ParsedChunk[]; lang: Lang; docId: string; }) { const visibleChunks = chunks.filter((c) => !isNoiseChunk(c)); if (visibleChunks.length === 0) return null; return (

▍ página {page} — {visibleChunks.length} trechos

ver scan original →
{visibleChunks.map((c) => ( ))}
); } export function DocRendererV2({ docId, chunksByPage, }: { docId: string; chunksByPage: Array<[number, ParsedChunk[]]>; }) { const [lang, setLang] = useState("pt-br"); const [mode, setMode] = useState("paged"); return (
{(["pt-br", "en", "both"] as Lang[]).map((l) => ( ))}
{(["paged", "flow"] as Mode[]).map((m) => ( ))}
{mode === "paged" ? ( chunksByPage.map(([page, chunks]) => ( )) ) : (
{chunksByPage.flatMap(([, chunks]) => chunks.map((c) => ), )}
)}
); }