disclosure-bureau/web/components/full-doc-renderer.tsx

234 lines
9.2 KiB
TypeScript
Raw Permalink Normal View History

"use client";
import { useState } from "react";
import Image from "next/image";
import { FmBboxThumb } from "@/components/fm/bbox-thumb";
import { EntityModal } from "@/components/entity-modal";
import { FmContentChip, FmPageTypeChip, FmClassification } from "@/components/fm/badges";
import type { Inline, RenderedPage } from "@/lib/doc-renderer";
import type { Match } from "@/components/reader-content";
interface Props {
pages: RenderedPage[];
locale: "en" | "pt-br";
}
function segmentText(text: string, matches: Match[]): Array<{ text: string; match?: Match }> {
if (!matches || matches.length === 0) return [{ text }];
const sorted = [...matches].sort((a, b) => a.start - b.start);
const segs: Array<{ text: string; match?: Match }> = [];
let cursor = 0;
for (const m of sorted) {
if (m.start < cursor) continue;
if (m.start > cursor) segs.push({ text: text.slice(cursor, m.start) });
segs.push({ text: text.slice(m.start, m.end), match: m });
cursor = m.end;
}
if (cursor < text.length) segs.push({ text: text.slice(cursor) });
return segs;
}
/** Splits OCR into N+1 segments using bbox.y positions to mark cut points. */
function splitOcrByY(ocr: string, ys: number[]): string[] {
const lines = ocr.split("\n");
const total = lines.length;
if (total === 0 || ys.length === 0) return [ocr];
const cuts = [...ys].map((y) => Math.max(0, Math.min(total, Math.round(y * total))));
cuts.sort((a, b) => a - b);
const segs: string[] = [];
let prev = 0;
for (const c of cuts) {
segs.push(lines.slice(prev, c).join("\n"));
prev = c;
}
segs.push(lines.slice(prev).join("\n"));
return segs;
}
const IMAGE_TYPE_LABEL_EN: Record<string, string> = {
photo: "Photograph", sketch: "Sketch", map: "Map", chart: "Chart",
stamp: "Stamp", signature: "Signature", redaction: "Redaction",
logo: "Logo", seal: "Seal", diagram: "Diagram", other: "Image",
};
const IMAGE_TYPE_LABEL_PT: Record<string, string> = {
photo: "Fotografia", sketch: "Esboço", map: "Mapa", chart: "Gráfico",
stamp: "Carimbo", signature: "Assinatura", redaction: "Censura",
logo: "Logo", seal: "Selo", diagram: "Diagrama", other: "Imagem",
};
export function FullDocRenderer({ pages, locale }: Props) {
const [modalEntity, setModalEntity] = useState<{ cls: string; id: string } | null>(null);
const labels = locale === "pt-br" ? IMAGE_TYPE_LABEL_PT : IMAGE_TYPE_LABEL_EN;
return (
<>
<article className="space-y-12">
{pages.map((p) => {
const ys = p.inline.map((i) => i.bboxY);
const segs = splitOcrByY(p.ocr, ys);
// segs[i] comes BEFORE inline[i] (when i < inline.length).
// The last seg (segs[inline.length]) comes AFTER all inlines.
return (
<section key={p.pageStem} id={p.pageStem} className="border-b border-[rgba(0,255,156,0.12)] pb-8">
<header className="flex items-center justify-between gap-2 mb-3">
<div className="flex items-center gap-2">
<a href={`#${p.pageStem}`} className="font-mono text-sm text-[#00ff9c]">
{p.pageStem}
</a>
{p.pageType && <FmPageTypeChip type={p.pageType} />}
{p.contentClassification?.slice(0, 4).map((c) => (
<FmContentChip key={c} kind={c as Parameters<typeof FmContentChip>[0]["kind"]} />
))}
{p.classification && (
<FmClassification level={p.classification as Parameters<typeof FmClassification>[0]["level"]} />
)}
</div>
<a
href={`/d/${p.pageId.split("/")[0]}/${p.pageStem}`}
className="font-mono text-[10px] text-[#7fdbff] hover:text-[#00ff9c] uppercase tracking-widest"
>
open page
</a>
</header>
{/* Bilingual vision description */}
{(locale === "pt-br" ? p.visionPt : p.visionEn) && (
<blockquote className="border-l-2 border-[#00ff9c] pl-3 mb-4 text-sm text-[#8896aa] italic">
{locale === "pt-br" ? p.visionPt : p.visionEn}
</blockquote>
)}
{/* OCR + interleaved inlines */}
<div className="reader-content">
{segs.map((seg, i) => {
const inline = p.inline[i];
return (
<div key={i}>
{/* The text segment */}
{seg && (
<pre className="font-sans whitespace-pre-wrap text-sm leading-relaxed text-[#c8d4e6]">
{segmentText(seg, p.matches as Match[]).map((s, j) =>
s.match ? (
<span
key={j}
className="entity-link"
data-class={s.match.class}
role="button"
tabIndex={0}
onClick={() => setModalEntity({ cls: s.match!.class, id: s.match!.entity_id })}
onKeyDown={(e) => {
if (e.key === "Enter" || e.key === " ") setModalEntity({ cls: s.match!.class, id: s.match!.entity_id });
}}
>
{s.text}
</span>
) : (
<span key={j}>{s.text}</span>
),
)}
</pre>
)}
{/* Inline block at this Y position */}
{inline && <InlineBlock inline={inline} labels={labels} />}
</div>
);
})}
</div>
</section>
);
})}
</article>
{modalEntity && (
<EntityModal
cls={modalEntity.cls}
id={modalEntity.id}
open={!!modalEntity}
onClose={() => setModalEntity(null)}
/>
)}
</>
);
}
function InlineBlock({ inline, labels }: { inline: Inline; labels: Record<string, string> }) {
if (inline.kind === "image") {
const docId = inline.src.split("/")[5]; // /api/static/processing/png/<doc>/p-NNN.png
const padded = inline.src.split("/p-")[1]?.replace(".png", "") ?? "001";
const pageNum = parseInt(padded, 10);
return (
<figure className="my-4 inline-block">
<FmBboxThumb
bbox={inline.bbox}
docId={docId}
pageNum={pageNum}
width={Math.min(640, Math.round(inline.bbox.w * 720))}
height={Math.min(800, Math.round(inline.bbox.h * 960))}
label={inline.caption}
/>
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
{labels[inline.imageType ?? "other"] ?? labels.other}
{inline.caption ? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal"> {inline.caption}</span> : null}
</figcaption>
</figure>
);
}
if (inline.kind === "table") {
if (inline.csv && inline.csv.length > 1) {
const [headers, ...rows] = inline.csv;
return (
<div className="md-table-wrap my-4">
<table>
<thead><tr>{headers.map((h, i) => <th key={i}>{h}</th>)}</tr></thead>
<tbody>
{rows.map((row, i) => (
<tr key={i}>{row.map((c, j) => <td key={j}>{c}</td>)}</tr>
))}
</tbody>
</table>
</div>
);
}
// Fallback: bbox crop of the table area
return (
<figure className="my-4">
<FmBboxThumb
bbox={inline.bbox}
docId={inline.docId}
pageNum={inline.pageNum}
width={Math.min(720, Math.round(inline.bbox.w * 800))}
height={Math.min(800, Math.round(inline.bbox.h * 1000))}
label={inline.headersSummary}
/>
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
Table
{inline.colEstimate && inline.rowEstimate
? <span className="ml-1 text-[#5a6678]">· {inline.colEstimate}×{inline.rowEstimate} (not extracted)</span>
: null}
{inline.headersSummary
? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal"> {inline.headersSummary}</span>
: null}
</figcaption>
</figure>
);
}
if (inline.kind === "redaction") {
return (
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#ff3344] bg-[rgba(255,51,68,0.06)] rounded text-[11px] font-mono">
<span className="text-[#ff3344]"> {inline.code ?? "REDACTED"} </span>
{inline.description && <span className="text-[#8896aa]"> {inline.description}</span>}
</div>
);
}
// signature
return (
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#bb6bd9] bg-[rgba(187,107,217,0.06)] rounded text-[11px] font-mono">
<span className="text-[#bb6bd9]"> {inline.signer ?? "signature"}</span>
</div>
);
}