#!/usr/bin/env python3 """ 22-update-stub-messages.py — Phase 0: replace misleading "Will be enriched in Phase 6" stubs with an honest low-signal message. For each entity file whose body still has the stub phrasing: - Read total_mentions, documents_count from frontmatter - Rewrite body with calibrated message that reflects reality - Preserve frontmatter as-is """ from __future__ import annotations import re import sys from pathlib import Path try: import yaml except ImportError: sys.stderr.write("pip3 install pyyaml\n"); sys.exit(1) ENTITIES = Path("/Users/guto/ufo/wiki/entities") STUB_RE = re.compile( r"^# [^\n]+\n\n## Description \(EN\)\n\n_Stub generated by entity dedup\..*?_\n\n" r"## Descrição \(PT-BR\)\n\n_Stub gerado pela deduplicação de entidades\..*?_\n*$", re.DOTALL, ) def new_body(canonical: str, total: int, docs: int) -> str: return ( f"# {canonical}\n\n" f"## Description (EN)\n\n" f"_Low-signal entity — referenced **{total} time(s)** across **{docs} document(s)**. " f"No external enrichment performed (criteria: ≥3 mentions). Use the page references below for raw context._\n\n" f"## Descrição (PT-BR)\n\n" f"_Entidade de baixo sinal — referenciada **{total} vez(es)** em **{docs} documento(s)**. " f"Sem enriquecimento externo (critério: ≥3 menções). Use as referências de páginas abaixo para contexto bruto._\n" ) def main(): updated = 0 skipped = 0 enriched = 0 for p in ENTITIES.glob("*/*.md"): c = p.read_text(encoding="utf-8") if not c.startswith("---"): skipped += 1 continue end = c.find("---", 4) if end < 0: skipped += 1; continue fm = yaml.safe_load(c[3:end].strip()) or {} body = c[end + 3:].lstrip("\n") # Don't touch entities that have real enrichment content if fm.get("enrichment_status") in ("deep", "shallow") and "external_sources" in body: enriched += 1 continue # Don't touch the seeded entities that had hand-curated bodies if "Phase 6" not in body and "Phase 7" not in body: skipped += 1 continue canonical = fm.get("canonical_name") or p.stem total = int(fm.get("total_mentions") or 0) docs = int(fm.get("documents_count") or 0) new = new_body(canonical, total, docs) new_full = c[:end + 4] + "\n" + new if new_full == c: skipped += 1; continue p.write_text(new_full, encoding="utf-8") updated += 1 print(f"Updated: {updated}\nSkipped (no stub / hand-curated): {skipped}\nKept enriched: {enriched}") if __name__ == "__main__": main()