233 lines
9.2 KiB
TypeScript
233 lines
9.2 KiB
TypeScript
"use client";
|
||
|
||
import { useState } from "react";
|
||
import Image from "next/image";
|
||
import { FmBboxThumb } from "@/components/fm/bbox-thumb";
|
||
import { EntityModal } from "@/components/entity-modal";
|
||
import { FmContentChip, FmPageTypeChip, FmClassification } from "@/components/fm/badges";
|
||
import type { Inline, RenderedPage } from "@/lib/doc-renderer";
|
||
import type { Match } from "@/components/reader-content";
|
||
|
||
interface Props {
|
||
pages: RenderedPage[];
|
||
locale: "en" | "pt-br";
|
||
}
|
||
|
||
function segmentText(text: string, matches: Match[]): Array<{ text: string; match?: Match }> {
|
||
if (!matches || matches.length === 0) return [{ text }];
|
||
const sorted = [...matches].sort((a, b) => a.start - b.start);
|
||
const segs: Array<{ text: string; match?: Match }> = [];
|
||
let cursor = 0;
|
||
for (const m of sorted) {
|
||
if (m.start < cursor) continue;
|
||
if (m.start > cursor) segs.push({ text: text.slice(cursor, m.start) });
|
||
segs.push({ text: text.slice(m.start, m.end), match: m });
|
||
cursor = m.end;
|
||
}
|
||
if (cursor < text.length) segs.push({ text: text.slice(cursor) });
|
||
return segs;
|
||
}
|
||
|
||
/** Splits OCR into N+1 segments using bbox.y positions to mark cut points. */
|
||
function splitOcrByY(ocr: string, ys: number[]): string[] {
|
||
const lines = ocr.split("\n");
|
||
const total = lines.length;
|
||
if (total === 0 || ys.length === 0) return [ocr];
|
||
const cuts = [...ys].map((y) => Math.max(0, Math.min(total, Math.round(y * total))));
|
||
cuts.sort((a, b) => a - b);
|
||
const segs: string[] = [];
|
||
let prev = 0;
|
||
for (const c of cuts) {
|
||
segs.push(lines.slice(prev, c).join("\n"));
|
||
prev = c;
|
||
}
|
||
segs.push(lines.slice(prev).join("\n"));
|
||
return segs;
|
||
}
|
||
|
||
const IMAGE_TYPE_LABEL_EN: Record<string, string> = {
|
||
photo: "Photograph", sketch: "Sketch", map: "Map", chart: "Chart",
|
||
stamp: "Stamp", signature: "Signature", redaction: "Redaction",
|
||
logo: "Logo", seal: "Seal", diagram: "Diagram", other: "Image",
|
||
};
|
||
const IMAGE_TYPE_LABEL_PT: Record<string, string> = {
|
||
photo: "Fotografia", sketch: "Esboço", map: "Mapa", chart: "Gráfico",
|
||
stamp: "Carimbo", signature: "Assinatura", redaction: "Censura",
|
||
logo: "Logo", seal: "Selo", diagram: "Diagrama", other: "Imagem",
|
||
};
|
||
|
||
export function FullDocRenderer({ pages, locale }: Props) {
|
||
const [modalEntity, setModalEntity] = useState<{ cls: string; id: string } | null>(null);
|
||
const labels = locale === "pt-br" ? IMAGE_TYPE_LABEL_PT : IMAGE_TYPE_LABEL_EN;
|
||
|
||
return (
|
||
<>
|
||
<article className="space-y-12">
|
||
{pages.map((p) => {
|
||
const ys = p.inline.map((i) => i.bboxY);
|
||
const segs = splitOcrByY(p.ocr, ys);
|
||
// segs[i] comes BEFORE inline[i] (when i < inline.length).
|
||
// The last seg (segs[inline.length]) comes AFTER all inlines.
|
||
|
||
return (
|
||
<section key={p.pageStem} id={p.pageStem} className="border-b border-[rgba(0,255,156,0.12)] pb-8">
|
||
<header className="flex items-center justify-between gap-2 mb-3">
|
||
<div className="flex items-center gap-2">
|
||
<a href={`#${p.pageStem}`} className="font-mono text-sm text-[#00ff9c]">
|
||
▍ {p.pageStem}
|
||
</a>
|
||
{p.pageType && <FmPageTypeChip type={p.pageType} />}
|
||
{p.contentClassification?.slice(0, 4).map((c) => (
|
||
<FmContentChip key={c} kind={c as Parameters<typeof FmContentChip>[0]["kind"]} />
|
||
))}
|
||
{p.classification && (
|
||
<FmClassification level={p.classification as Parameters<typeof FmClassification>[0]["level"]} />
|
||
)}
|
||
</div>
|
||
<a
|
||
href={`/d/${p.pageId.split("/")[0]}/${p.pageStem}`}
|
||
className="font-mono text-[10px] text-[#7fdbff] hover:text-[#00ff9c] uppercase tracking-widest"
|
||
>
|
||
open page →
|
||
</a>
|
||
</header>
|
||
|
||
{/* Bilingual vision description */}
|
||
{(locale === "pt-br" ? p.visionPt : p.visionEn) && (
|
||
<blockquote className="border-l-2 border-[#00ff9c] pl-3 mb-4 text-sm text-[#8896aa] italic">
|
||
{locale === "pt-br" ? p.visionPt : p.visionEn}
|
||
</blockquote>
|
||
)}
|
||
|
||
{/* OCR + interleaved inlines */}
|
||
<div className="reader-content">
|
||
{segs.map((seg, i) => {
|
||
const inline = p.inline[i];
|
||
return (
|
||
<div key={i}>
|
||
{/* The text segment */}
|
||
{seg && (
|
||
<pre className="font-sans whitespace-pre-wrap text-sm leading-relaxed text-[#c8d4e6]">
|
||
{segmentText(seg, p.matches as Match[]).map((s, j) =>
|
||
s.match ? (
|
||
<span
|
||
key={j}
|
||
className="entity-link"
|
||
data-class={s.match.class}
|
||
role="button"
|
||
tabIndex={0}
|
||
onClick={() => setModalEntity({ cls: s.match!.class, id: s.match!.entity_id })}
|
||
onKeyDown={(e) => {
|
||
if (e.key === "Enter" || e.key === " ") setModalEntity({ cls: s.match!.class, id: s.match!.entity_id });
|
||
}}
|
||
>
|
||
{s.text}
|
||
</span>
|
||
) : (
|
||
<span key={j}>{s.text}</span>
|
||
),
|
||
)}
|
||
</pre>
|
||
)}
|
||
{/* Inline block at this Y position */}
|
||
{inline && <InlineBlock inline={inline} labels={labels} />}
|
||
</div>
|
||
);
|
||
})}
|
||
</div>
|
||
</section>
|
||
);
|
||
})}
|
||
</article>
|
||
|
||
{modalEntity && (
|
||
<EntityModal
|
||
cls={modalEntity.cls}
|
||
id={modalEntity.id}
|
||
open={!!modalEntity}
|
||
onClose={() => setModalEntity(null)}
|
||
/>
|
||
)}
|
||
</>
|
||
);
|
||
}
|
||
|
||
function InlineBlock({ inline, labels }: { inline: Inline; labels: Record<string, string> }) {
|
||
if (inline.kind === "image") {
|
||
const docId = inline.src.split("/")[5]; // /api/static/processing/png/<doc>/p-NNN.png
|
||
const padded = inline.src.split("/p-")[1]?.replace(".png", "") ?? "001";
|
||
const pageNum = parseInt(padded, 10);
|
||
return (
|
||
<figure className="my-4 inline-block">
|
||
<FmBboxThumb
|
||
bbox={inline.bbox}
|
||
docId={docId}
|
||
pageNum={pageNum}
|
||
width={Math.min(640, Math.round(inline.bbox.w * 720))}
|
||
height={Math.min(800, Math.round(inline.bbox.h * 960))}
|
||
label={inline.caption}
|
||
/>
|
||
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
|
||
{labels[inline.imageType ?? "other"] ?? labels.other}
|
||
{inline.caption ? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal">— {inline.caption}</span> : null}
|
||
</figcaption>
|
||
</figure>
|
||
);
|
||
}
|
||
|
||
if (inline.kind === "table") {
|
||
if (inline.csv && inline.csv.length > 1) {
|
||
const [headers, ...rows] = inline.csv;
|
||
return (
|
||
<div className="md-table-wrap my-4">
|
||
<table>
|
||
<thead><tr>{headers.map((h, i) => <th key={i}>{h}</th>)}</tr></thead>
|
||
<tbody>
|
||
{rows.map((row, i) => (
|
||
<tr key={i}>{row.map((c, j) => <td key={j}>{c}</td>)}</tr>
|
||
))}
|
||
</tbody>
|
||
</table>
|
||
</div>
|
||
);
|
||
}
|
||
// Fallback: bbox crop of the table area
|
||
return (
|
||
<figure className="my-4">
|
||
<FmBboxThumb
|
||
bbox={inline.bbox}
|
||
docId={inline.docId}
|
||
pageNum={inline.pageNum}
|
||
width={Math.min(720, Math.round(inline.bbox.w * 800))}
|
||
height={Math.min(800, Math.round(inline.bbox.h * 1000))}
|
||
label={inline.headersSummary}
|
||
/>
|
||
<figcaption className="mt-1 font-mono text-[10px] text-[#8896aa] tracking-widest uppercase">
|
||
Table
|
||
{inline.colEstimate && inline.rowEstimate
|
||
? <span className="ml-1 text-[#5a6678]">· {inline.colEstimate}×{inline.rowEstimate} (not extracted)</span>
|
||
: null}
|
||
{inline.headersSummary
|
||
? <span className="ml-2 text-[#c8d4e6] normal-case tracking-normal">— {inline.headersSummary}</span>
|
||
: null}
|
||
</figcaption>
|
||
</figure>
|
||
);
|
||
}
|
||
|
||
if (inline.kind === "redaction") {
|
||
return (
|
||
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#ff3344] bg-[rgba(255,51,68,0.06)] rounded text-[11px] font-mono">
|
||
<span className="text-[#ff3344]">▓▓▓ {inline.code ?? "REDACTED"} ▓▓▓</span>
|
||
{inline.description && <span className="text-[#8896aa]">— {inline.description}</span>}
|
||
</div>
|
||
);
|
||
}
|
||
|
||
// signature
|
||
return (
|
||
<div className="my-2 inline-flex items-center gap-2 px-2 py-1 border border-[#bb6bd9] bg-[rgba(187,107,217,0.06)] rounded text-[11px] font-mono">
|
||
<span className="text-[#bb6bd9]">✍ {inline.signer ?? "signature"}</span>
|
||
</div>
|
||
);
|
||
}
|