disclosure-bureau/web/app/admin/indexer/page.tsx

98 lines
4.2 KiB
TypeScript
Raw Permalink Normal View History

/**
* /admin/indexer Standalone admin view of the retrieval index health.
*
* Same component as in /admin/batch, but on its own page with deeper context
* about pgvector schema, embed-service status, and recovery commands.
*/
import Link from "next/link";
import { AuthBar } from "@/components/auth-bar";
import { IndexerStatus } from "@/components/indexer-status";
export const dynamic = "force-dynamic";
export default function AdminIndexerPage() {
return (
<main className="min-h-screen p-6 md:p-10 max-w-5xl mx-auto">
<div className="flex items-start justify-between gap-4 mb-6">
<Link href="/" className="font-mono text-xs text-[#7fdbff] hover:text-[#00ff9c]">
home
</Link>
<div className="flex items-center gap-2">
<Link
href="/admin/batch"
className="font-mono text-xs px-3 py-1.5 border border-[rgba(0,255,156,0.30)] text-[#00ff9c] hover:bg-[rgba(0,255,156,0.10)] rounded"
>
📈 batch monitor
</Link>
<AuthBar />
</div>
</div>
<header className="mb-6">
<div className="font-mono text-[10px] text-[#5a6678] tracking-widest uppercase mb-2">
admin · retrieval index health
</div>
<h1 className="font-mono text-2xl text-[#00ff9c] mb-1"> Postgres + pgvector + BGE-M3</h1>
<p className="text-[#8896aa] text-sm">
Estado da camada de retrieval que alimenta o chat e o <code>/search</code>.
Postgres faz BM25 (tsvector bilíngue) + dense (pgvector HNSW 1024-dim);
embed-service (BGE-M3 self-host) provê vetores e reranker BGE-Reranker-v2-M3.
</p>
</header>
<IndexerStatus />
<section className="mt-10 pt-6 border-t border-[rgba(0,255,156,0.12)]">
<h2 className="font-mono text-sm text-[#7fdbff] uppercase tracking-widest mb-3 border-l-2 border-[#7fdbff] pl-3">
Pipeline de ingestão
</h2>
<ol className="space-y-3 text-sm text-[#c8d4e6]">
<li>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678] mb-1">
1. rebuild de chunks (Sonnet 4.6 via subagents)
</div>
<code className="block p-2 bg-[#060a13] border border-[rgba(0,255,156,0.12)] rounded text-[11px]">
python3 scripts/28-batch-rebuild-all.py --workers 2
</code>
<p className="text-xs text-[#8896aa] mt-1">
Cada doc roda em <code>claude -p</code> isolado. Saída em <code>raw/&lt;doc&gt;--subagent/</code>.
</p>
</li>
<li>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678] mb-1">
2. index Postgres + embeddings BGE-M3
</div>
<code className="block p-2 bg-[#060a13] border border-[rgba(0,255,156,0.12)] rounded text-[11px]">
python3 scripts/30-index-chunks-to-db.py --skip-existing
</code>
</li>
<li>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678] mb-1">
3. materializa entity_mentions (chunk entity)
</div>
<code className="block p-2 bg-[#060a13] border border-[rgba(0,255,156,0.12)] rounded text-[11px]">
python3 scripts/31-populate-entity-mentions.py
</code>
</li>
<li>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678] mb-1">
4. sync mentioned_in[] markdown (fecha o loop wiki DB)
</div>
<code className="block p-2 bg-[#060a13] border border-[rgba(0,255,156,0.12)] rounded text-[11px]">
python3 scripts/32-sync-mentioned-in-yaml.py
</code>
</li>
<li>
<div className="font-mono text-[10px] uppercase tracking-widest text-[#5a6678] mb-1">
5. compact progress.jsonl (manutenção)
</div>
<code className="block p-2 bg-[#060a13] border border-[rgba(0,255,156,0.12)] rounded text-[11px]">
python3 scripts/33-compact-progress-log.py
</code>
</li>
</ol>
</section>
</main>
);
}