#!/usr/bin/env python3 """ 01_anchor_events.py — Seed the 20 anchor UAP events with curated bilingual narrative summaries via Claude Code OAuth subprocess (Sonnet). Anchor list comes from ADR-003 (Phase 0). Each event: - Gets/creates wiki/entities/events/EV--.md - Frontmatter: summary_status=curated, summary_confidence=high, narrative_summary=, narrative_summary_pt_br= - Body untouched if file already exists with manual edits. Idempotent: re-run skips events where summary_status == 'curated'. Usage: ./01_anchor_events.py # all anchors ./01_anchor_events.py --only roswell # one event (substring match) ./01_anchor_events.py --dry-run # print prompt + would-write, no LLM call """ from __future__ import annotations import argparse import json import re import subprocess import sys from datetime import datetime, timezone from pathlib import Path try: import yaml except ImportError: sys.stderr.write("pip3 install pyyaml\n") sys.exit(1) UFO_ROOT = Path(__file__).resolve().parents[2] EVENTS_DIR = UFO_ROOT / "wiki" / "entities" / "events" LOG_PATH = UFO_ROOT / "wiki" / "log.md" # Each tuple = (event_id, canonical_name_en, date_start, primary_location, event_class, observers_hint) # date_start uses YYYY-MM-DD when known, YYYY when only year, YYYY-MM-XX if no day. ANCHOR_EVENTS = [ ("EV-1897-04-17-aurora-airship-crash", "Aurora Airship Crash", "1897-04-17", "Aurora, Texas, USA", "uap-encounter", "Local townspeople and newspaper reporters (Dallas Morning News, 17 abr 1897)"), ("EV-1944-XX-XX-foo-fighters-european-theater", "Foo Fighters (European Theater)", "1944", "Western Front, Europe", "uap-encounter", "Allied bomber crews of the 415th Night Fighter Squadron and others"), ("EV-1947-06-21-maury-island-incident", "Maury Island Incident", "1947-06-21", "Puget Sound, Washington, USA", "uap-encounter", "Harold Dahl, Fred Crisman, Kenneth Arnold (later investigator)"), ("EV-1947-06-24-kenneth-arnold-mount-rainier", "Kenneth Arnold Mount Rainier Sighting", "1947-06-24", "Mount Rainier, Washington, USA", "uap-encounter", "Kenneth Arnold, civilian pilot"), ("EV-1947-07-08-roswell-incident", "Roswell Incident", "1947-07-08", "Roswell, New Mexico, USA", "uap-encounter", "USAAF 509th Bombardment Group, William Brazel, Major Jesse Marcel"), ("EV-1948-01-07-mantell-crash", "Mantell UFO Incident", "1948-01-07", "Franklin, Kentucky, USA", "uap-related-fatality", "Captain Thomas Mantell, Kentucky Air National Guard"), ("EV-1948-07-24-chiles-whitted-encounter", "Chiles-Whitted UFO Encounter", "1948-07-24", "Montgomery, Alabama, USA", "uap-encounter", "Eastern Airlines pilots Clarence Chiles and John Whitted"), ("EV-1952-09-XX-operation-mainbrace-sightings", "Operation Mainbrace UAP Sightings", "1952-09", "North Atlantic / Scandinavia", "uap-encounter", "NATO naval forces, RAF and USAF crews"), ("EV-1959-06-26-father-gill-papua-encounter", "Father Gill Papua Encounter", "1959-06-26", "Boianai, Papua New Guinea", "uap-encounter", "Reverend William Gill and 37 mission staff and locals"), ("EV-1964-04-24-lonnie-zamora-socorro", "Lonnie Zamora Socorro Landing", "1964-04-24", "Socorro, New Mexico, USA", "uap-encounter", "Police Sergeant Lonnie Zamora"), ("EV-1966-04-06-westall-school-encounter", "Westall School Encounter", "1966-04-06", "Clayton South, Victoria, Australia", "uap-encounter", "Over 200 students and teachers of Westall High School"), ("EV-1975-11-05-travis-walton-abduction", "Travis Walton Abduction", "1975-11-05", "Apache–Sitgreaves National Forest, Arizona, USA", "uap-abduction-claim", "Travis Walton and logging crew of six"), ("EV-1977-09-XX-operacao-prato", "Operação Prato", "1977-09", "Ilha de Colares, Pará, Brasil", "uap-encounter", "Força Aérea Brasileira (FAB), Captain Uyrangê Hollanda and local residents"), ("EV-1980-12-27-rendlesham-forest-incident", "Rendlesham Forest Incident", "1980-12-27", "Rendlesham Forest, Suffolk, UK (RAF Woodbridge / RAF Bentwaters)", "uap-encounter", "USAF personnel including Lt Col Charles Halt, Sgt Jim Penniston, John Burroughs"), ("EV-1980-12-29-cash-landrum-incident", "Cash-Landrum Incident", "1980-12-29", "Dayton, Texas, USA", "uap-related-injury", "Betty Cash, Vickie Landrum, Colby Landrum"), ("EV-1986-05-19-são-paulo-night-of-the-ufos", "São Paulo Noite Oficial dos OVNIs", "1986-05-19", "Costa do Brasil / São José dos Campos, SP", "uap-encounter", "FAB pilots flying Mirage III and F-5E intercepts, Brigadeiro Octavio Moreira Lima briefing"), ("EV-1989-11-XX-belgian-wave", "Belgian UFO Wave", "1989-11", "Belgium (multiple sites)", "uap-encounter", "Belgian Air Force, gendarmerie and over 13,500 civilian witnesses"), ("EV-1997-03-13-phoenix-lights", "Phoenix Lights", "1997-03-13", "Phoenix, Arizona, USA (and southern Arizona)", "uap-encounter", "Thousands of civilians, Governor Fife Symington (later)"), ("EV-2004-11-14-nimitz-tic-tac", "Nimitz Tic Tac Incident", "2004-11-14", "USS Nimitz Carrier Strike Group, off San Diego coast", "uap-encounter", "USN F/A-18F crews Cdr David Fravor, Lt Cdr Jim Slaight, Lt Cdr Chad Underwood; USS Princeton radar (Senior Chief Kevin Day)"), ("EV-2017-12-16-aatip-disclosure", "AATIP Public Disclosure", "2017-12-16", "New York / Washington, D.C., USA", "uap-disclosure-event", "New York Times reporting; Luis Elizondo, Harry Reid, Robert Bigelow"), ] # Voice rules (Holmes–Watson, fact-dense, no hype). PROMPT_TEMPLATE = """You are writing a curated encyclopedic event card for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules: - Holmes–Watson narrator: precise, fact-dense, no hype, no breathless language. - Open with what happened, where, when. Then who observed it. Then what made it remarkable. Optionally, what the official record / later investigations concluded. - 3–6 sentences. No editorial speculation beyond what is well-documented. - Use the *original language for verbatim quotes*; otherwise English for the EN summary and Brazilian Portuguese (pt-br with full UTF-8 accents) for the PT-BR summary. Do NOT translate already-Portuguese proper names ("Operação Prato" stays as-is in EN too). - Avoid the words "alegadamente"/"allegedly" unless it's genuinely contested. Be honest about uncertainty when warranted. - Never include sentences like "Will be enriched in Phase N" or any placeholder — this is the final text. EVENT TO DOCUMENT: - ID: {event_id} - Canonical name: {name} - Date: {date} - Primary location: {location} - Class: {cls} - Known observers / parties: {observers} OUTPUT (STRICT JSON, no markdown fences, no commentary): {{ "narrative_summary": "", "narrative_summary_pt_br": "" }}""" def utc_iso() -> str: return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") def call_sonnet(prompt: str, dry_run: bool = False) -> dict: """Spawn `claude -p` subprocess (uses CLAUDE_CODE_OAUTH_TOKEN env) and return parsed JSON.""" if dry_run: return {"narrative_summary": "[dry-run placeholder]", "narrative_summary_pt_br": "[dry-run placeholder]"} try: res = subprocess.run( ["claude", "-p", "--model", "sonnet", "--output-format", "json"], input=prompt, capture_output=True, text=True, timeout=180, check=False, ) except subprocess.TimeoutExpired: raise RuntimeError("claude subprocess timed out after 180s") if res.returncode != 0: raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}") # claude --output-format json returns wrapped envelope; extract `result`. try: env = json.loads(res.stdout) except json.JSONDecodeError as e: raise RuntimeError(f"unparseable claude stdout: {e} :: {res.stdout[:300]}") txt = env.get("result") or env.get("response") or env.get("content") or "" # Strip code fences if any txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip() # Try direct parse; on fail, extract first {...} block try: return json.loads(txt) except json.JSONDecodeError: m = re.search(r"\{[^{}]*\"narrative_summary\".*?\}", txt, flags=re.DOTALL) if not m: raise RuntimeError(f"no JSON object in claude output: {txt[:300]}") return json.loads(m.group(0)) def load_yaml_body(path: Path) -> tuple[dict, str]: raw = path.read_text(encoding="utf-8") if not raw.startswith("---"): return {}, raw end = raw.find("---", 4) fm = yaml.safe_load(raw[3:end].strip()) or {} body = raw[end + 3:].lstrip("\n") return fm, body def write_yaml_body(path: Path, fm: dict, body: str) -> None: yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False) sep = "" if body.startswith("\n") else "\n" path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8") def upsert_anchor(event_id: str, name: str, date: str, location: str, cls: str, observers: str, dry_run: bool, only: str | None) -> tuple[str, bool]: if only and only.lower() not in (event_id.lower() + " " + name.lower()): return ("skipped (not matched by --only)", False) path = EVENTS_DIR / f"{event_id}.md" existing_fm: dict = {} existing_body: str = "" if path.exists(): existing_fm, existing_body = load_yaml_body(path) if existing_fm.get("summary_status") == "curated": return ("skipped (already curated)", False) prompt = PROMPT_TEMPLATE.format( event_id=event_id, name=name, date=date, location=location, cls=cls, observers=observers, ) print(f" → calling sonnet for {event_id} ...", flush=True) out = call_sonnet(prompt, dry_run=dry_run) narr_en = (out.get("narrative_summary") or "").strip() narr_pt = (out.get("narrative_summary_pt_br") or "").strip() if not narr_en or not narr_pt: return (f"empty output (en={len(narr_en)}, pt={len(narr_pt)})", False) # Build/refresh frontmatter fm = { "schema_version": "0.1.0", "type": "entity", "entity_class": "event", "event_id": event_id, "canonical_name": name, "aliases": existing_fm.get("aliases") or [name], "event_class": cls, "date_start": date, "date_end": existing_fm.get("date_end") or date, "date_confidence": "high", "primary_location": location, "observers": existing_fm.get("observers") or [], "uap_objects": existing_fm.get("uap_objects") or [], "documented_in": existing_fm.get("documented_in") or [], "total_mentions": existing_fm.get("total_mentions") or 0, "documents_count": existing_fm.get("documents_count") or 0, "narrative_summary": narr_en, "narrative_summary_pt_br": narr_pt, "summary_status": "curated", "summary_confidence": "high", "enrichment_status": existing_fm.get("enrichment_status") or "none", "external_sources": existing_fm.get("external_sources") or [], "last_ingest": existing_fm.get("last_ingest") or utc_iso(), "last_lint": utc_iso(), "wiki_version": "0.1.0", } body = existing_body if existing_body.strip() else ( f"# {name}\n\n## Description (EN)\n\n{narr_en}\n\n## Descrição (PT-BR)\n\n{narr_pt}\n" ) if dry_run: return ("ok (dry)", True) EVENTS_DIR.mkdir(parents=True, exist_ok=True) write_yaml_body(path, fm, body) return ("ok", True) def main() -> int: p = argparse.ArgumentParser() p.add_argument("--only", default=None, help="filter anchor events by substring match") p.add_argument("--dry-run", action="store_true") args = p.parse_args() print(f"Anchor events: {len(ANCHOR_EVENTS)}") done = 0 for ev in ANCHOR_EVENTS: msg, ok = upsert_anchor(*ev, dry_run=args.dry_run, only=args.only) sign = "✓" if ok else "·" print(f" {sign} {ev[0]} — {msg}") if ok: done += 1 if not args.dry_run and done > 0: with LOG_PATH.open("a", encoding="utf-8") as f: f.write( f"\n## {utc_iso()} · CURATE_ANCHOR_EVENTS\n" f"- script: scripts/synthesize/01_anchor_events.py\n" f"- curated: {done}/{len(ANCHOR_EVENTS)}\n" f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n" ) print(f"\nCurated: {done}/{len(ANCHOR_EVENTS)}") return 0 if __name__ == "__main__": sys.exit(main())