disclosure-bureau/scripts/maintain/54_sync_is_generic.py

56 lines
1.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Sync `is_generic` flag from each entity YAML to public.entities table.
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
import psycopg
import yaml
WIKI_ENT = Path("/Users/guto/ufo/wiki/entities")
def main() -> int:
dburl = os.environ.get("DATABASE_URL") or os.environ.get("SUPABASE_DB_URL")
if not dburl: sys.exit("DATABASE_URL not set")
rows: list[tuple[str, str, bool]] = []
for f in WIKI_ENT.rglob("*.md"):
if "_archived" in f.parts: continue
try:
text = f.read_text(encoding="utf-8")
if not text.startswith("---"): continue
fm = yaml.safe_load(text.split("---")[1]) or {}
except Exception: continue
cls = fm.get("entity_class")
eid = fm.get("entity_id") or (fm.get(f"{cls}_id") if cls else None)
if not (cls and eid): continue
rows.append((cls, eid, bool(fm.get("is_generic"))))
print(f"Loaded {len(rows)} entities from YAML")
with psycopg.connect(dburl) as conn:
with conn.cursor() as cur:
cur.execute("CREATE TEMP TABLE _gen (entity_class TEXT, entity_id TEXT, is_generic BOOL)")
with cur.copy("COPY _gen (entity_class, entity_id, is_generic) FROM STDIN") as cp:
for row in rows: cp.write_row(row)
cur.execute("""
UPDATE entities e SET is_generic = g.is_generic
FROM _gen g
WHERE e.entity_class = g.entity_class
AND e.entity_id = g.entity_id
AND e.is_generic IS DISTINCT FROM g.is_generic
""")
print(f" rows updated: {cur.rowcount}")
cur.execute("SELECT COUNT(*) FROM entities WHERE is_generic")
print(f" total is_generic=TRUE in DB: {cur.fetchone()[0]}")
conn.commit()
return 0
if __name__ == "__main__":
sys.exit(main())