disclosure-bureau/web/components/full-doc-renderer.tsx

233 lines
9.2 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use client";
import { useState } from "react";
import Image from "next/image";
import { FmBboxThumb } from "@/components/fm/bbox-thumb";
import { EntityModal } from "@/components/entity-modal";
import { FmContentChip, FmPageTypeChip, FmClassification } from "@/components/fm/badges";
import type { Inline, RenderedPage } from "@/lib/doc-renderer";
import type { Match } from "@/components/reader-content";
interface Props {
pages: RenderedPage[];
locale: "en" | "pt-br";
}
function segmentText(text: string, matches: Match[]): Array<{ text: string; match?: Match }> {
if (!matches || matches.length === 0) return [{ text }];
const sorted = [...matches].sort((a, b) => a.start - b.start);
const segs: Array<{ text: string; match?: Match }> = [];
let cursor = 0;
for (const m of sorted) {
if (m.start < cursor) continue;
if (m.start > cursor) segs.push({ text: text.slice(cursor, m.start) });
segs.push({ text: text.slice(m.start, m.end), match: m });
cursor = m.end;
}
if (cursor < text.length) segs.push({ text: text.slice(cursor) });
return segs;
}
/** Splits OCR into N+1 segments using bbox.y positions to mark cut points. */
function splitOcrByY(ocr: string, ys: number[]): string[] {
const lines = ocr.split("\n");
const total = lines.length;
if (total === 0 || ys.length === 0) return [ocr];
const cuts = [...ys].map((y) => Math.max(0, Math.min(total, Math.round(y * total))));
cuts.sort((a, b) => a - b);
const segs: string[] = [];
let prev = 0;
for (const c of cuts) {
segs.push(lines.slice(prev, c).join("\n"));
prev = c;
}
segs.push(lines.slice(prev).join("\n"));
return segs;
}
const IMAGE_TYPE_LABEL_EN: Record<string, string> = {
photo: "Photograph", sketch: "Sketch", map: "Map", chart: "Chart",
stamp: "Stamp", signature: "Signature", redaction: "Redaction",
logo: "Logo", seal: "Seal", diagram: "Diagram", other: "Image",
};
const IMAGE_TYPE_LABEL_PT: Record<string, string> = {
photo: "Fotografia", sketch: "Esboço", map: "Mapa", chart: "Gráfico",
stamp: "Carimbo", signature: "Assinatura", redaction: "Censura",
logo: "Logo", seal: "Selo", diagram: "Diagrama", other: "Imagem",
};
export function FullDocRenderer({ pages, locale }: Props) {
const [modalEntity, setModalEntity] = useState<{ cls: string; id: string } | null>(null);
const labels = locale === "pt-br" ? IMAGE_TYPE_LABEL_PT : IMAGE_TYPE_LABEL_EN;
return (
<>
<article className="space-y-12">
{pages.map((p) => {
const ys = p.inline.map((i) => i.bboxY);
const segs = splitOcrByY(p.ocr, ys);
// segs[i] comes BEFORE inline[i] (when i < inline.length).
// The last seg (segs[inline.length]) comes AFTER all inlines.
return (
<section key={p.pageStem} id={p.pageStem} className="border-b border-[rgba(0,255,156,0.12)] pb-8">
<header className="flex items-center justify-between gap-2 mb-3">
<div className="flex items-center gap-2">
<a href={`#${p.pageStem}`} className="font-mono text-sm text-[#00ff9c]">
{p.pageStem}
</a>
{p.pageType && <FmPageTypeChip type={p.pageType} />}
{p.contentClassification?.slice(0, 4).map((c) => (
<FmContentChip key={c} kind={c as Parameters<typeof FmContentChip>[0]["kind"]} />
))}
{p.classification && (
<FmClassification level={p.classification as Parameters<typeof FmClassification>[0]["level"]} />
)}
</div>
<a
href={`/d/${p.pageId.split("/")[0]}/${p.pageStem}`}
className="font-mono text-[10px] text-[#7fdbff] hover:text-[#00ff9c] uppercase tracking-widest"
>
open page
</a>
</header>
{/* Bilingual vision description */}
{(locale === "pt-br" ? p.visionPt : p.visionEn) && (
<blockquote className="border-l-2 border-[#00ff9c] pl-3 mb-4 text-sm text-[#8896aa] italic">
{locale === "pt-br" ? p.visionPt : p.visionEn}
</blockquote>
)}
{/* OCR + interleaved inlines */}
<div className="reader-content">
{segs.map((seg, i) => {
const inline = p.inline[i];
return (
<div key={i}>
{/* The text segment */}
{seg && (
<pre className="font-sans whitespace-pre-wrap text-sm leading-relaxed text-[#c8d4e6]">
{segmentText(seg, p.matches as Match[]).map((s, j) =>
s.match ? (
<span
key={j}
className="entity-link"
data-class={s.match.class}
role="button"
tabIndex={0}
onClick={() => setModalEntity({ cls: s.match!.class, id: s.match!.entity_id })}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ") setModalEntity({ cls: s.match!.class, id: s.match!.entity_id });
}}
>
{s.text}
</span>
) : (
<span key={j}>{s.text}</span>
),
)}
</pre>
)}
{/* Inline block at this Y position */}
{inline && <InlineBlock inline={inline} labels={labels} />}
</div>
);
})}
</div>
</section>
);
})}
</article>
{modalEntity && (
<EntityModal
cls={modalEntity.cls}
id={modalEntity.id}
open={!!modalEntity}
onClose={() => setModalEntity(null)}
/>
)}
</>
);
}
function InlineBlock({ inline, labels }: { inline: Inline; labels: Record<string, string> }) {
if (inline.kind === "image") {
const docId = inline.src.split("/")[5]; // /api/static/processing/png/<doc>/p-NNN.png
const padded = inline.src.split("/p-")[1]?.replace(".png", "") ?? "001";
const pageNum = parseInt(padded, 10);
return (
<figure className="my-4 inline-block">
<FmBboxThumb
bbox={inline.bbox}
docId={docId}
pageNum={pageNum}
width={Math.min(640, Math.round(inline.bbox.w * 720))}
height={Math.min(800, Math.round(inline.bbox.h * 960))}
label={inline.caption}
/>
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
{labels[inline.imageType ?? "other"] ?? labels.other}
{inline.caption ? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal"> {inline.caption}</span> : null}
</figcaption>
</figure>
);
}
if (inline.kind === "table") {
if (inline.csv && inline.csv.length > 1) {
const [headers, ...rows] = inline.csv;
return (
<div className="md-table-wrap my-4">
<table>
<thead><tr>{headers.map((h, i) => <th key={i}>{h}</th>)}</tr></thead>
<tbody>
{rows.map((row, i) => (
<tr key={i}>{row.map((c, j) => <td key={j}>{c}</td>)}</tr>
))}
</tbody>
</table>
</div>
);
}
// Fallback: bbox crop of the table area
return (
<figure className="my-4">
<FmBboxThumb
bbox={inline.bbox}
docId={inline.docId}
pageNum={inline.pageNum}
width={Math.min(720, Math.round(inline.bbox.w * 800))}
height={Math.min(800, Math.round(inline.bbox.h * 1000))}
label={inline.headersSummary}
/>
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
Table
{inline.colEstimate && inline.rowEstimate
? <span className="ml-1 text-[#5a6678]">· {inline.colEstimate}×{inline.rowEstimate} (not extracted)</span>
: null}
{inline.headersSummary
? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal"> {inline.headersSummary}</span>
: null}
</figcaption>
</figure>
);
}
if (inline.kind === "redaction") {
return (
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#ff3344] bg-[rgba(255,51,68,0.06)] rounded text-[11px] font-mono">
<span className="text-[#ff3344]"> {inline.code ?? "REDACTED"} </span>
{inline.description && <span className="text-[#8896aa]"> {inline.description}</span>}
</div>
);
}
// signature
return (
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#bb6bd9] bg-[rgba(187,107,217,0.06)] rounded text-[11px] font-mono">
<span className="text-[#bb6bd9]"> {inline.signer ?? "signature"}</span>
</div>
);
}