disclosure-bureau/scripts/synthesize/20_entity_summary.py

#!/usr/bin/env python3
"""
20_entity_summary.py — Synthesise narrative_summary EN+PT-BR for entities with
total_mentions >= threshold, via Claude Code OAuth subprocess (Sonnet).

Strategy per entity:
  1. Query DB for top-K verbatim chunk snippets that mention the entity
     (joined via public.entity_mentions + public.chunks). K=10 default.
  2. Build a Holmes-Watson voice prompt with the entity's canonical_name,
     class, alias list, and the verbatim snippets.
  3. Call `claude -p --model sonnet --output-format json` → JSON with
     narrative_summary + narrative_summary_pt_br.
  4. Update wiki/entities/<class>/<id>.md frontmatter:
     - narrative_summary, narrative_summary_pt_br
     - summary_status: 'synthesized'
     - summary_confidence: 'medium'
     - last_lint: now()

Idempotent: entities with summary_status in {'synthesized','curated','red_teamed'}
are skipped. Re-run safely advances any new ones.

Throttle: 1 entity at a time (sequential). Max 20x plan: 5h window.

Usage:
  ./20_entity_summary.py --min-mentions 20 --limit 200      # top entities
  ./20_entity_summary.py --classes person,organization      # subset
  ./20_entity_summary.py --dry-run --limit 5                # preview
"""
from __future__ import annotations

import argparse
import json
import os
import re
import subprocess
import sys
import time
from datetime import datetime, timezone
from pathlib import Path

try:
    import yaml
    import psycopg
except ImportError as e:
    sys.stderr.write(f"pip3 install pyyaml psycopg[binary]  # missing: {e}\n")
    sys.exit(1)

UFO_ROOT = Path(__file__).resolve().parents[2]
ENTITIES_BASE = UFO_ROOT / "wiki" / "entities"
LOG_PATH = UFO_ROOT / "wiki" / "log.md"

DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("SUPABASE_DB_URL")

# Map DB entity_class → filesystem folder
CLASS_FOLDER = {
    "person": "people",
    "organization": "organizations",
    "location": "locations",
    "event": "events",
    "uap_object": "uap-objects",
    "vehicle": "vehicles",
    "operation": "operations",
    "concept": "concepts",
}


def utc_iso() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


PROMPT_TEMPLATE = """You are writing an encyclopedic entry for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules:

- Holmes–Watson narrator: precise, fact-dense, no hype, no breathless language.
- Open with what this entity is and how it figures in the corpus. Cite who/where/when. Optionally mention notable patterns across the snippets.
- 3–6 sentences. No editorial speculation beyond what the snippets support.
- Original-language verbatim quotes stay as-is; the EN summary is in English, the PT-BR summary in Brazilian Portuguese (with full UTF-8 accents).
- If snippets contradict each other or are sparse, say so plainly.
- NEVER include placeholder text like "Will be enriched in Phase N", "[REDACTED]", or markdown headings — pure prose only.

ENTITY:
- Class: {entity_class}
- Canonical name: {name}
- Aliases: {aliases}
- Total mentions across corpus: {total_mentions}
- Documents that mention it: {documents_count}

TOP {n_snippets} VERBATIM SNIPPETS FROM THE CORPUS:
{snippets}

OUTPUT (STRICT JSON, no markdown fences, no commentary):
{{
  "narrative_summary": "<EN, 3-6 sentences>",
  "narrative_summary_pt_br": "<PT-BR brasileiro, 3-6 sentences>"
}}"""


def call_sonnet(prompt: str, timeout_s: int = 180) -> dict:
    """claude -p --model sonnet --output-format json subprocess."""
    try:
        res = subprocess.run(
            ["claude", "-p", "--model", "sonnet", "--output-format", "json"],
            input=prompt, capture_output=True, text=True,
            timeout=timeout_s, check=False,
        )
    except subprocess.TimeoutExpired:
        raise RuntimeError(f"claude subprocess timed out after {timeout_s}s")
    if res.returncode != 0:
        raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}")
    try:
        env = json.loads(res.stdout)
    except json.JSONDecodeError as e:
        raise RuntimeError(f"unparseable claude envelope: {e} :: {res.stdout[:300]}")
    txt = env.get("result") or env.get("response") or env.get("content") or ""
    txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip()
    try:
        return json.loads(txt)
    except json.JSONDecodeError:
        m = re.search(r"\{.*?\"narrative_summary\".*\}", txt, flags=re.DOTALL)
        if not m:
            raise RuntimeError(f"no JSON object in claude output: {txt[:300]}")
        return json.loads(m.group(0))


def load_md(path: Path) -> tuple[dict, str]:
    raw = path.read_text(encoding="utf-8")
    if not raw.startswith("---"):
        return {}, raw
    end = raw.find("---", 4)
    fm = yaml.safe_load(raw[3:end].strip()) or {}
    body = raw[end + 3:].lstrip("\n")
    return fm, body


def write_md(path: Path, fm: dict, body: str) -> None:
    yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
    sep = "" if body.startswith("\n") else "\n"
    path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8")


def fetch_top_entities(conn, min_mentions: int, limit: int, classes: list[str] | None,
                       require_status_none: bool):
    sql = """
        SELECT entity_pk, entity_class, entity_id, canonical_name,
               COALESCE(aliases, ARRAY[]::text[]) AS aliases,
               total_mentions, documents_count
        FROM public.entities
        WHERE total_mentions >= %s
    """
    params: list = [min_mentions]
    if classes:
        sql += " AND entity_class = ANY(%s)"
        params.append(classes)
    sql += " ORDER BY total_mentions DESC LIMIT %s"
    params.append(limit)
    with conn.cursor() as cur:
        cur.execute(sql, params)
        return cur.fetchall()


def fetch_snippets(conn, entity_pk: int, k: int = 10) -> list[str]:
    """Top-K longest chunks (by content length) mentioning the entity."""
    sql = """
        SELECT c.content_pt, c.content_en, c.doc_id, c.page, c.type
        FROM public.entity_mentions em
        JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
        WHERE em.entity_pk = %s
        ORDER BY GREATEST(COALESCE(LENGTH(c.content_pt),0), COALESCE(LENGTH(c.content_en),0)) DESC
        LIMIT %s
    """
    with conn.cursor() as cur:
        cur.execute(sql, (entity_pk, k))
        rows = cur.fetchall()
    out = []
    for pt, en, doc, page, typ in rows:
        body = (pt or en or "").strip()
        if not body:
            continue
        # Cap each snippet so the prompt stays compact
        body = body[:600]
        out.append(f"- ({doc} p{page} · {typ}) {body}")
    return out


def resolve_path(entity_class: str, entity_id: str) -> Path:
    folder = CLASS_FOLDER.get(entity_class)
    if not folder:
        raise ValueError(f"unknown entity_class {entity_class}")
    return ENTITIES_BASE / folder / f"{entity_id}.md"


def synthesise_one(conn, row, dry_run: bool, verbose: bool) -> str:
    entity_pk, entity_class, entity_id, canonical_name, aliases, total_mentions, documents_count = row
    path = resolve_path(entity_class, entity_id)
    if not path.exists():
        return "skipped (file missing)"
    fm, body = load_md(path)
    status = fm.get("summary_status")
    if status in ("synthesized", "curated", "red_teamed"):
        return f"skipped (already {status})"

    snippets = fetch_snippets(conn, entity_pk, k=10)
    if not snippets:
        return "skipped (no snippets)"

    prompt = PROMPT_TEMPLATE.format(
        entity_class=entity_class,
        name=canonical_name,
        aliases=", ".join((aliases or [])[:8]) or "—",
        total_mentions=total_mentions,
        documents_count=documents_count,
        n_snippets=len(snippets),
        snippets="\n".join(snippets),
    )

    if dry_run:
        return f"ok (dry — {len(snippets)} snippets, {len(prompt)} chars prompt)"

    if verbose:
        print(f"    → calling sonnet ({len(snippets)} snippets, {len(prompt)} chars)...", flush=True)
    out = call_sonnet(prompt)
    narr_en = (out.get("narrative_summary") or "").strip()
    narr_pt = (out.get("narrative_summary_pt_br") or "").strip()
    if len(narr_en) < 40 or len(narr_pt) < 40:
        return f"empty/short output (en={len(narr_en)}, pt={len(narr_pt)})"

    fm["narrative_summary"] = narr_en
    fm["narrative_summary_pt_br"] = narr_pt
    fm["summary_status"] = "synthesized"
    fm["summary_confidence"] = "medium"
    fm["last_lint"] = utc_iso()
    # Refresh canonical mention counts from DB so the wiki agrees with retrieval
    fm["total_mentions"] = int(total_mentions)
    fm["documents_count"] = int(documents_count)
    write_md(path, fm, body)
    return "ok"


def main() -> int:
    p = argparse.ArgumentParser()
    p.add_argument("--min-mentions", type=int, default=20)
    p.add_argument("--limit", type=int, default=200)
    p.add_argument("--classes", default=None,
                   help="comma-separated subset (e.g. 'person,organization,location')")
    p.add_argument("--dry-run", action="store_true")
    p.add_argument("--verbose", action="store_true")
    p.add_argument("--sleep", type=float, default=0.5,
                   help="seconds between calls (respect Max 20x rate)")
    args = p.parse_args()

    if not DATABASE_URL:
        sys.stderr.write("DATABASE_URL not set\n")
        return 1

    classes = [c.strip() for c in args.classes.split(",")] if args.classes else None

    print(f"connecting → {DATABASE_URL.split('@')[-1]}")
    with psycopg.connect(DATABASE_URL) as conn:
        rows = fetch_top_entities(conn, args.min_mentions, args.limit, classes, require_status_none=True)
        print(f"candidates: {len(rows)} (min_mentions={args.min_mentions}, limit={args.limit})")
        done = 0
        skipped = 0
        errors = 0
        for i, row in enumerate(rows, 1):
            entity_pk, entity_class, entity_id, canonical_name, _, total_mentions, _ = row
            label = f"[{i:>3}/{len(rows)}] {entity_class}/{entity_id} ({total_mentions}m) — {canonical_name[:40]}"
            try:
                msg = synthesise_one(conn, row, args.dry_run, args.verbose)
            except Exception as e:
                errors += 1
                print(f"  ✗ {label}  — ERROR: {e}", flush=True)
                continue
            if msg.startswith("ok"):
                done += 1
                print(f"  ✓ {label}  — {msg}", flush=True)
            else:
                skipped += 1
                print(f"  · {label}  — {msg}", flush=True)
            if not args.dry_run and args.sleep > 0:
                time.sleep(args.sleep)

    print(f"\ndone={done} skipped={skipped} errors={errors}")

    if not args.dry_run and done > 0:
        with LOG_PATH.open("a", encoding="utf-8") as f:
            f.write(
                f"\n## {utc_iso()} · SYNTHESIZE_ENTITY_SUMMARIES\n"
                f"- script: scripts/synthesize/20_entity_summary.py\n"
                f"- min_mentions: {args.min_mentions}\n"
                f"- limit: {args.limit}\n"
                f"- synthesised: {done}\n"
                f"- skipped: {skipped}\n"
                f"- errors: {errors}\n"
                f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n"
            )
    return 0


if __name__ == "__main__":
    sys.exit(main())
-												ship: synthesize 158 entities, AG-UI artifacts, chat persistence, auth flow

Fase 3 onda 2 — entity synthesis at scale:
- scripts/synthesize/20_entity_summary.py: queries DB for entities with
  total_mentions ≥ threshold + top-K verbatim chunk snippets via
  entity_mentions JOIN, prompts Sonnet (Holmes-Watson voice, bilingual),
  writes narrative_summary EN+PT-BR + summary_status=synthesized.
  Ran on 187 candidates (mentions ≥ 20) → 158 OK · 1 err · 29 skipped (no
  snippets). Combined with anchor curation: 20 curated + 158 synthesized
  = 178 entities with real narrative (vs 0 a day ago).

Fase 4 — chat with typed artifacts + persistence:
- lib/chat/agui.ts: AG-UI v1 typed Artifact union (citation, crop_image,
  entity_card, evidence_card, hypothesis_card, case_card, navigation_offer)
  alongside the existing event types.
- lib/chat/tools.ts + openrouter.ts: hybrid_search emits up to 6
  citation + crop_image artifacts per query. Provider collects them and
  returns in done.artifacts so the route can persist.
- api/sessions/[id]/messages: persist artifacts to messages.citations.
- components/chat-bubble.tsx: ArtifactCard renders inline cards (citation,
  crop_image, entity_card, navigation_offer) for streamed and persisted
  messages. activeId now persisted in localStorage so navigation between
  pages keeps the same conversation. New sessions are lazy (only when user
  has zero). loadMessages hydrates tools + artifacts from server. CRUD UI:
  rename (✎) + archive (🗑) buttons per session in the list.

Home search:
- doc-list-filters: input now fires hybrid_search (rerank=0 for speed)
  in parallel with the local title filter; chunk hits render above the doc
  grid with snippet + score + classification.
- api/search/hybrid: accept ?rerank=0 to skip the cross-encoder (1.3s vs 60s).

Auth flow:
- infra: SMTP_HOST=mail.spacemail.com:587 + DMARC published; mail now lands
  in inbox. GOTRUE_MAILER_AUTOCONFIRM=false (real email verification).
- kong.yml: proxy /auth/callback on api.disclosure.top → web:3000 so PKCE
  email links don't 404 at the gateway.
- web/app/auth/callback: handle both ?code= (OAuth) and ?token=&type=
  (PKCE); redirect to the public site host before verifyOtp so the session
  cookie lands on the right domain.

Audit deliverables:
- .nirvana/outputs/disclosure-bureau/.../systems-atelier/: 5 docs (code
  analysis, tech debt, discovery brief, system arch, 5 ADRs) authored by
  sa-principal that produced this roadmap. Kept in-tree for traceability.

											
										
										
											2026-05-18 06:52:59 +00:00
+								#!/usr/bin/env python3
 								"""
 _entity_summary.py — Synthesise narrative_summary EN+PT-BR for entities with
 								total_mentions >= threshold, via Claude Code OAuth subprocess (Sonnet).
 								Strategy per entity:
 . Query DB for top-K verbatim chunk snippets that mention the entity
 								     (joined via public.entity_mentions + public.chunks). K=10 default.
 . Build a Holmes-Watson voice prompt with the entity's canonical_name,
 								     class, alias list, and the verbatim snippets.
 . Call `claude -p --model sonnet --output-format json` → JSON with
 								     narrative_summary + narrative_summary_pt_br.
 . Update wiki/entities/<class>/<id>.md frontmatter:
 								     - narrative_summary, narrative_summary_pt_br
 								     - summary_status: 'synthesized'
 								     - summary_confidence: 'medium'
 								     - last_lint: now()
 								Idempotent: entities with summary_status in {'synthesized','curated','red_teamed'}
 								are skipped. Re-run safely advances any new ones.
 								Throttle: 1 entity at a time (sequential). Max 20x plan: 5h window.
 								Usage:
 								  ./20_entity_summary.py --min-mentions 20 --limit 200      # top entities
 								  ./20_entity_summary.py --classes person,organization      # subset
 								  ./20_entity_summary.py --dry-run --limit 5                # preview
 								"""
 								from __future__ import annotations
 								import argparse
 								import json
 								import os
 								import re
 								import subprocess
 								import sys
 								import time
 								from datetime import datetime, timezone
 								from pathlib import Path
 								try:
 								    import yaml
 								    import psycopg
 								except ImportError as e:
 								    sys.stderr.write(f"pip3 install pyyaml psycopg[binary]  # missing: {e}\n")
 								    sys.exit(1)
 								UFO_ROOT = Path(__file__).resolve().parents[2]
 								ENTITIES_BASE = UFO_ROOT / "wiki" / "entities"
 								LOG_PATH = UFO_ROOT / "wiki" / "log.md"
 								DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("SUPABASE_DB_URL")
 								# Map DB entity_class → filesystem folder
 								CLASS_FOLDER = {
 								    "person": "people",
 								    "organization": "organizations",
 								    "location": "locations",
 								    "event": "events",
 								    "uap_object": "uap-objects",
 								    "vehicle": "vehicles",
 								    "operation": "operations",
 								    "concept": "concepts",
 								}
 								def utc_iso() -> str:
 								    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
 								PROMPT_TEMPLATE = """You are writing an encyclopedic entry for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules:
 								- Holmes–Watson narrator: precise, fact-dense, no hype, no breathless language.
 								- Open with what this entity is and how it figures in the corpus. Cite who/where/when. Optionally mention notable patterns across the snippets.
 								- 3–6 sentences. No editorial speculation beyond what the snippets support.
 								- Original-language verbatim quotes stay as-is; the EN summary is in English, the PT-BR summary in Brazilian Portuguese (with full UTF-8 accents).
 								- If snippets contradict each other or are sparse, say so plainly.
 								- NEVER include placeholder text like "Will be enriched in Phase N", "[REDACTED]", or markdown headings — pure prose only.
 								ENTITY:
 								- Class: {entity_class}
 								- Canonical name: {name}
 								- Aliases: {aliases}
 								- Total mentions across corpus: {total_mentions}
 								- Documents that mention it: {documents_count}
 								TOP {n_snippets} VERBATIM SNIPPETS FROM THE CORPUS:
 								{snippets}
 								OUTPUT (STRICT JSON, no markdown fences, no commentary):
 								{{
 								  "narrative_summary": "<EN, 3-6 sentences>",
 								  "narrative_summary_pt_br": "<PT-BR brasileiro, 3-6 sentences>"
 								}}"""
 								def call_sonnet(prompt: str, timeout_s: int = 180) -> dict:
 								    """claude -p --model sonnet --output-format json subprocess."""
 								    try:
 								        res = subprocess.run(
 								            ["claude", "-p", "--model", "sonnet", "--output-format", "json"],
 								            input=prompt, capture_output=True, text=True,
 								            timeout=timeout_s, check=False,
 								        )
 								    except subprocess.TimeoutExpired:
 								        raise RuntimeError(f"claude subprocess timed out after {timeout_s}s")
 								    if res.returncode != 0:
 								        raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}")
 								    try:
 								        env = json.loads(res.stdout)
 								    except json.JSONDecodeError as e:
 								        raise RuntimeError(f"unparseable claude envelope: {e} :: {res.stdout[:300]}")
 								    txt = env.get("result") or env.get("response") or env.get("content") or ""
 								    txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip()
 								    try:
 								        return json.loads(txt)
 								    except json.JSONDecodeError:
 								        m = re.search(r"\{.*?\"narrative_summary\".*\}", txt, flags=re.DOTALL)
 								        if not m:
 								            raise RuntimeError(f"no JSON object in claude output: {txt[:300]}")
 								        return json.loads(m.group(0))
 								def load_md(path: Path) -> tuple[dict, str]:
 								    raw = path.read_text(encoding="utf-8")
 								    if not raw.startswith("---"):
 								        return {}, raw
 								    end = raw.find("---", 4)
 								    fm = yaml.safe_load(raw[3:end].strip()) or {}
 								    body = raw[end + 3:].lstrip("\n")
 								    return fm, body
 								def write_md(path: Path, fm: dict, body: str) -> None:
 								    yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
 								    sep = "" if body.startswith("\n") else "\n"
 								    path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8")
 								def fetch_top_entities(conn, min_mentions: int, limit: int, classes: list[str] | None,
 								                       require_status_none: bool):
 								    sql = """
 								        SELECT entity_pk, entity_class, entity_id, canonical_name,
 								               COALESCE(aliases, ARRAY[]::text[]) AS aliases,
 								               total_mentions, documents_count
 								        FROM public.entities
 								        WHERE total_mentions >= %s
 								    """
 								    params: list = [min_mentions]
 								    if classes:
 								        sql += " AND entity_class = ANY(%s)"
 								        params.append(classes)
 								    sql += " ORDER BY total_mentions DESC LIMIT %s"
 								    params.append(limit)
 								    with conn.cursor() as cur:
 								        cur.execute(sql, params)
 								        return cur.fetchall()
 								def fetch_snippets(conn, entity_pk: int, k: int = 10) -> list[str]:
 								    """Top-K longest chunks (by content length) mentioning the entity."""
 								    sql = """
 								        SELECT c.content_pt, c.content_en, c.doc_id, c.page, c.type
 								        FROM public.entity_mentions em
 								        JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
 								        WHERE em.entity_pk = %s
 								        ORDER BY GREATEST(COALESCE(LENGTH(c.content_pt),0), COALESCE(LENGTH(c.content_en),0)) DESC
 								        LIMIT %s
 								    """
 								    with conn.cursor() as cur:
 								        cur.execute(sql, (entity_pk, k))
 								        rows = cur.fetchall()
 								    out = []
 								    for pt, en, doc, page, typ in rows:
 								        body = (pt or en or "").strip()
 								        if not body:
 								            continue
 								        # Cap each snippet so the prompt stays compact
 								        body = body[:600]
 								        out.append(f"- ({doc} p{page} · {typ}) {body}")
 								    return out
 								def resolve_path(entity_class: str, entity_id: str) -> Path:
 								    folder = CLASS_FOLDER.get(entity_class)
 								    if not folder:
 								        raise ValueError(f"unknown entity_class {entity_class}")
 								    return ENTITIES_BASE / folder / f"{entity_id}.md"
 								def synthesise_one(conn, row, dry_run: bool, verbose: bool) -> str:
 								    entity_pk, entity_class, entity_id, canonical_name, aliases, total_mentions, documents_count = row
 								    path = resolve_path(entity_class, entity_id)
 								    if not path.exists():
 								        return "skipped (file missing)"
 								    fm, body = load_md(path)
 								    status = fm.get("summary_status")
 								    if status in ("synthesized", "curated", "red_teamed"):
 								        return f"skipped (already {status})"
 								    snippets = fetch_snippets(conn, entity_pk, k=10)
 								    if not snippets:
 								        return "skipped (no snippets)"
 								    prompt = PROMPT_TEMPLATE.format(
 								        entity_class=entity_class,
 								        name=canonical_name,
 								        aliases=", ".join((aliases or [])[:8]) or "—",
 								        total_mentions=total_mentions,
 								        documents_count=documents_count,
 								        n_snippets=len(snippets),
 								        snippets="\n".join(snippets),
 								    )
 								    if dry_run:
 								        return f"ok (dry — {len(snippets)} snippets, {len(prompt)} chars prompt)"
 								    if verbose:
 								        print(f"    → calling sonnet ({len(snippets)} snippets, {len(prompt)} chars)...", flush=True)
 								    out = call_sonnet(prompt)
 								    narr_en = (out.get("narrative_summary") or "").strip()
 								    narr_pt = (out.get("narrative_summary_pt_br") or "").strip()
 								    if len(narr_en) < 40 or len(narr_pt) < 40:
 								        return f"empty/short output (en={len(narr_en)}, pt={len(narr_pt)})"
 								    fm["narrative_summary"] = narr_en
 								    fm["narrative_summary_pt_br"] = narr_pt
 								    fm["summary_status"] = "synthesized"
 								    fm["summary_confidence"] = "medium"
 								    fm["last_lint"] = utc_iso()
 								    # Refresh canonical mention counts from DB so the wiki agrees with retrieval
 								    fm["total_mentions"] = int(total_mentions)
 								    fm["documents_count"] = int(documents_count)
 								    write_md(path, fm, body)
 								    return "ok"
 								def main() -> int:
 								    p = argparse.ArgumentParser()
 								    p.add_argument("--min-mentions", type=int, default=20)
 								    p.add_argument("--limit", type=int, default=200)
 								    p.add_argument("--classes", default=None,
 								                   help="comma-separated subset (e.g. 'person,organization,location')")
 								    p.add_argument("--dry-run", action="store_true")
 								    p.add_argument("--verbose", action="store_true")
 								    p.add_argument("--sleep", type=float, default=0.5,
 								                   help="seconds between calls (respect Max 20x rate)")
 								    args = p.parse_args()
 								    if not DATABASE_URL:
 								        sys.stderr.write("DATABASE_URL not set\n")
 								        return 1
 								    classes = [c.strip() for c in args.classes.split(",")] if args.classes else None
 								    print(f"connecting → {DATABASE_URL.split('@')[-1]}")
 								    with psycopg.connect(DATABASE_URL) as conn:
 								        rows = fetch_top_entities(conn, args.min_mentions, args.limit, classes, require_status_none=True)
 								        print(f"candidates: {len(rows)} (min_mentions={args.min_mentions}, limit={args.limit})")
 								        done = 0
 								        skipped = 0
 								        errors = 0
 								        for i, row in enumerate(rows, 1):
 								            entity_pk, entity_class, entity_id, canonical_name, _, total_mentions, _ = row
 								            label = f"[{i:>3}/{len(rows)}] {entity_class}/{entity_id} ({total_mentions}m) — {canonical_name[:40]}"
 								            try:
 								                msg = synthesise_one(conn, row, args.dry_run, args.verbose)
 								            except Exception as e:
 								                errors += 1
 								                print(f"  ✗ {label}  — ERROR: {e}", flush=True)
 								                continue
 								            if msg.startswith("ok"):
 								                done += 1
 								                print(f"  ✓ {label}  — {msg}", flush=True)
 								            else:
 								                skipped += 1
 								                print(f"  · {label}  — {msg}", flush=True)
 								            if not args.dry_run and args.sleep > 0:
 								                time.sleep(args.sleep)
 								    print(f"\ndone={done} skipped={skipped} errors={errors}")
 								    if not args.dry_run and done > 0:
 								        with LOG_PATH.open("a", encoding="utf-8") as f:
 								            f.write(
 								                f"\n## {utc_iso()} · SYNTHESIZE_ENTITY_SUMMARIES\n"
 								                f"- script: scripts/synthesize/20_entity_summary.py\n"
 								                f"- min_mentions: {args.min_mentions}\n"
 								                f"- limit: {args.limit}\n"
 								                f"- synthesised: {done}\n"
 								                f"- skipped: {skipped}\n"
 								                f"- errors: {errors}\n"
 								                f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n"
 								            )
 								    return 0
 								if __name__ == "__main__":
 								    sys.exit(main())