disclosure-bureau/web/app/d/[docId]/page.tsx
Luiz Gustavo e75ca5eda2 add clean LLM reading version of documents (the core goal)
Scanned docs are messy — duplicate transcriptions (typed + handwritten),
two classification variants of the same narrative, OCR noise, repeated
banners. The doc page showed raw chunks, so everything appeared twice.

40_reading_version.py generates ONE clean, deduplicated, well-structured
bilingual Markdown reading version per doc (Sonnet): merges duplicate versions
without losing unique lines, drops page furniture, formats transcripts as
dialogue. Faithful — invents nothing; redactions kept as markers.

/d/[docId] now defaults to a "📖 leitura" tab rendering this clean version,
with "🔍 trechos · scan original" preserving the faithful per-chunk + per-page
scan view. reading.md lives in raw/<doc>--subagent/ alongside the chunks.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 17:23:36 -03:00

144 lines
5.4 KiB
TypeScript

/**
* /d/<docId> — Document view (Sonnet 4.6 agentic chunks v0.2.0, the only view).
*
* Server component loads chunks from disk; client component <DocRendererV2>
* provides language + flow/paged toggles, image crops on-demand, table CSVs.
*/
import Link from "next/link";
import { notFound } from "next/navigation";
import { readChunksByPage, readIndex, hasChunks, readReadingVersion } from "@/lib/chunks";
import { readDocument } from "@/lib/wiki";
import { pickPitch } from "@/lib/doc-summary";
import { getLocale } from "@/components/locale-toggle";
import { AuthBar } from "@/components/auth-bar";
import { ChatBubble } from "@/components/chat-bubble";
import { DocReadingView } from "@/components/doc-reading-view";
import { MarkdownBody } from "@/components/markdown-body";
export const dynamic = "force-dynamic";
export default async function DocPage({
params,
}: {
params: Promise<{ docId: string }>;
}) {
const { docId } = await params;
const locale = await getLocale();
if (!(await hasChunks(docId))) {
return (
<main className="min-h-screen p-6 md:p-10 max-w-4xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<AuthBar />
</div>
<div className="border border-[rgba(255,165,0,0.30)] bg-[rgba(255,165,0,0.05)] rounded p-6">
<h1 className="font-mono text-lg text-[#ffa500] mb-2"> Documento ainda não processado</h1>
<p className="text-[#c8d4e6] text-sm">
Este documento ainda não foi indexado.
</p>
<p className="font-mono text-xs text-[#5a6678] mt-4">doc_id: {docId}</p>
</div>
</main>
);
}
const [idx, byPage, doc, reading] = await Promise.all([
readIndex(docId),
readChunksByPage(docId),
readDocument(docId),
readReadingVersion(docId),
]);
if (!idx) notFound();
const ordered: Array<[number, typeof byPage extends Map<number, infer V> ? V : never]> =
Array.from(byPage.entries()).sort((a, b) => a[0] - b[0]);
const pitch = pickPitch(
doc?.fm as Record<string, unknown> | undefined,
locale === "en" ? "en" : "pt",
);
// Histogram chunk types
const typeCounts = new Map<string, number>();
for (const entry of idx.chunks ?? []) {
typeCounts.set(entry.type, (typeCounts.get(entry.type) || 0) + 1);
}
const topTypes = Array.from(typeCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 6);
// Count UFO/cryptid anomalies across chunks
let ufoCount = 0;
let cryptidCount = 0;
let imageCount = 0;
for (const [, chunks] of byPage) {
for (const c of chunks) {
if (c.fm.ufo_anomaly_detected) ufoCount++;
if (c.fm.cryptid_anomaly_detected) cryptidCount++;
if (c.fm.type === "image") imageCount++;
}
}
const classification = (doc?.fm.highest_classification as string) ?? "—";
const collection = (doc?.fm.collection as string) ?? "—";
return (
<main className="min-h-screen p-6 md:p-10 max-w-5xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<AuthBar />
</div>
<header className="mb-6 pb-6 border-b border-[rgba(0,255,156,0.32)]">
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2 flex items-center gap-2 flex-wrap">
<span className="px-2 py-0.5 border border-[#ff3344] text-[#ff3344] rounded">
{classification}
</span>
<span>· {collection}</span>
<span>· doc_id: <span className="text-[#7fdbff]">{docId}</span></span>
</div>
<h1 className="font-mono text-3xl text-[#00ff9c] mb-3">
{(doc?.fm.canonical_title as string) ?? docId}
</h1>
{pitch && (
<div className="mt-4 mb-4 p-4 border-l-4 border-[#7fdbff] bg-[rgba(127,219,255,0.04)]">
<div className="text-[15px] leading-relaxed text-[#c8d4e6] markdown-body">
<MarkdownBody>{pitch}</MarkdownBody>
</div>
</div>
)}
<div className="flex flex-wrap items-center gap-4 mt-4 font-mono text-xs text-[#8896aa]">
<span><span className="text-[#7fdbff]">{idx.total_pages}</span> páginas</span>
<span><span className="text-[#00ff9c]">{idx.total_chunks}</span> trechos</span>
{imageCount > 0 && <span><span className="text-[#a78bfa]">{imageCount}</span> imagens</span>}
{ufoCount > 0 && <span><span className="text-[#ff3344]">🛸 {ufoCount}</span> UAP flags</span>}
{cryptidCount > 0 && <span><span className="text-[#9b5de5]">{cryptidCount}</span> cryptid</span>}
</div>
{topTypes.length > 0 && (
<div className="mt-3 flex flex-wrap gap-1.5 font-mono text-[10px]">
{topTypes.map(([t, n]) => (
<span
key={t}
className="px-2 py-0.5 border border-[rgba(127,219,255,0.20)] text-[#8896aa] rounded"
>
{t} <span className="text-[#7fdbff]">{n}</span>
</span>
))}
</div>
)}
</header>
<DocReadingView docId={docId} reading={reading} chunksByPage={ordered} />
<ChatBubble context={{ doc_id: docId }} />
</main>
);
}