"use client"; import { useState } from "react"; import Image from "next/image"; import { FmBboxThumb } from "@/components/fm/bbox-thumb"; import { EntityModal } from "@/components/entity-modal"; import { FmContentChip, FmPageTypeChip, FmClassification } from "@/components/fm/badges"; import type { Inline, RenderedPage } from "@/lib/doc-renderer"; import type { Match } from "@/components/reader-content"; interface Props { pages: RenderedPage[]; locale: "en" | "pt-br"; } function segmentText(text: string, matches: Match[]): Array<{ text: string; match?: Match }> { if (!matches || matches.length === 0) return [{ text }]; const sorted = [...matches].sort((a, b) => a.start - b.start); const segs: Array<{ text: string; match?: Match }> = []; let cursor = 0; for (const m of sorted) { if (m.start < cursor) continue; if (m.start > cursor) segs.push({ text: text.slice(cursor, m.start) }); segs.push({ text: text.slice(m.start, m.end), match: m }); cursor = m.end; } if (cursor < text.length) segs.push({ text: text.slice(cursor) }); return segs; } /** Splits OCR into N+1 segments using bbox.y positions to mark cut points. */ function splitOcrByY(ocr: string, ys: number[]): string[] { const lines = ocr.split("\n"); const total = lines.length; if (total === 0 || ys.length === 0) return [ocr]; const cuts = [...ys].map((y) => Math.max(0, Math.min(total, Math.round(y * total)))); cuts.sort((a, b) => a - b); const segs: string[] = []; let prev = 0; for (const c of cuts) { segs.push(lines.slice(prev, c).join("\n")); prev = c; } segs.push(lines.slice(prev).join("\n")); return segs; } const IMAGE_TYPE_LABEL_EN: Record = { photo: "Photograph", sketch: "Sketch", map: "Map", chart: "Chart", stamp: "Stamp", signature: "Signature", redaction: "Redaction", logo: "Logo", seal: "Seal", diagram: "Diagram", other: "Image", }; const IMAGE_TYPE_LABEL_PT: Record = { photo: "Fotografia", sketch: "Esboço", map: "Mapa", chart: "Gráfico", stamp: "Carimbo", signature: "Assinatura", redaction: "Censura", logo: "Logo", seal: "Selo", diagram: "Diagrama", other: "Imagem", }; export function FullDocRenderer({ pages, locale }: Props) { const [modalEntity, setModalEntity] = useState<{ cls: string; id: string } | null>(null); const labels = locale === "pt-br" ? IMAGE_TYPE_LABEL_PT : IMAGE_TYPE_LABEL_EN; return ( <>
{pages.map((p) => { const ys = p.inline.map((i) => i.bboxY); const segs = splitOcrByY(p.ocr, ys); // segs[i] comes BEFORE inline[i] (when i < inline.length). // The last seg (segs[inline.length]) comes AFTER all inlines. return (
▍ {p.pageStem} {p.pageType && } {p.contentClassification?.slice(0, 4).map((c) => ( [0]["kind"]} /> ))} {p.classification && ( [0]["level"]} /> )}
open page →
{/* Bilingual vision description */} {(locale === "pt-br" ? p.visionPt : p.visionEn) && (
{locale === "pt-br" ? p.visionPt : p.visionEn}
)} {/* OCR + interleaved inlines */}
{segs.map((seg, i) => { const inline = p.inline[i]; return (
{/* The text segment */} {seg && (
                          {segmentText(seg, p.matches as Match[]).map((s, j) =>
                            s.match ? (
                               setModalEntity({ cls: s.match!.class, id: s.match!.entity_id })}
                                onKeyDown={(e) => {
                                  if (e.key === "Enter" || e.key === " ") setModalEntity({ cls: s.match!.class, id: s.match!.entity_id });
                                }}
                              >
                                {s.text}
                              
                            ) : (
                              {s.text}
                            ),
                          )}
                        
)} {/* Inline block at this Y position */} {inline && }
); })}
); })}
{modalEntity && ( setModalEntity(null)} /> )} ); } function InlineBlock({ inline, labels }: { inline: Inline; labels: Record }) { if (inline.kind === "image") { const docId = inline.src.split("/")[5]; // /api/static/processing/png//p-NNN.png const padded = inline.src.split("/p-")[1]?.replace(".png", "") ?? "001"; const pageNum = parseInt(padded, 10); return (
{labels[inline.imageType ?? "other"] ?? labels.other} {inline.caption ? — {inline.caption} : null}
); } if (inline.kind === "table") { if (inline.csv && inline.csv.length > 1) { const [headers, ...rows] = inline.csv; return (
{headers.map((h, i) => )} {rows.map((row, i) => ( {row.map((c, j) => )} ))}
{h}
{c}
); } // Fallback: bbox crop of the table area return (
Table {inline.colEstimate && inline.rowEstimate ? · {inline.colEstimate}×{inline.rowEstimate} (not extracted) : null} {inline.headersSummary ? — {inline.headersSummary} : null}
); } if (inline.kind === "redaction") { return (
▓▓▓ {inline.code ?? "REDACTED"} ▓▓▓ {inline.description && — {inline.description}}
); } // signature return (
✍ {inline.signer ?? "signature"}
); }