/** * /admin/indexer — Standalone admin view of the retrieval index health. * * Same component as in /admin/batch, but on its own page with deeper context * about pgvector schema, embed-service status, and recovery commands. */ import Link from "next/link"; import { AuthBar } from "@/components/auth-bar"; import { IndexerStatus } from "@/components/indexer-status"; export const dynamic = "force-dynamic"; export default function AdminIndexerPage() { return (
← home
📈 batch monitor
admin · retrieval index health

▍ Postgres + pgvector + BGE-M3

Estado da camada de retrieval que alimenta o chat e o /search. Postgres faz BM25 (tsvector bilíngue) + dense (pgvector HNSW 1024-dim); embed-service (BGE-M3 self-host) provê vetores e reranker BGE-Reranker-v2-M3.

Pipeline de ingestão

  1. 1. rebuild de chunks (Sonnet 4.6 via subagents)
    python3 scripts/28-batch-rebuild-all.py --workers 2

    Cada doc roda em claude -p isolado. Saída em raw/<doc>--subagent/.

  2. 2. index → Postgres + embeddings BGE-M3
    python3 scripts/30-index-chunks-to-db.py --skip-existing
  3. 3. materializa entity_mentions (chunk ↔ entity)
    python3 scripts/31-populate-entity-mentions.py
  4. 4. sync mentioned_in[] → markdown (fecha o loop wiki ↔ DB)
    python3 scripts/32-sync-mentioned-in-yaml.py
  5. 5. compact progress.jsonl (manutenção)
    python3 scripts/33-compact-progress-log.py
); }