274 lines
13 KiB
Python
274 lines
13 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
"""
|
|||
|
|
01_anchor_events.py — Seed the 20 anchor UAP events with curated bilingual
|
|||
|
|
narrative summaries via Claude Code OAuth subprocess (Sonnet).
|
|||
|
|
|
|||
|
|
Anchor list comes from ADR-003 (Phase 0). Each event:
|
|||
|
|
- Gets/creates wiki/entities/events/EV-<date>-<slug>.md
|
|||
|
|
- Frontmatter: summary_status=curated, summary_confidence=high,
|
|||
|
|
narrative_summary=<EN>, narrative_summary_pt_br=<PT-BR>
|
|||
|
|
- Body untouched if file already exists with manual edits.
|
|||
|
|
|
|||
|
|
Idempotent: re-run skips events where summary_status == 'curated'.
|
|||
|
|
|
|||
|
|
Usage:
|
|||
|
|
./01_anchor_events.py # all anchors
|
|||
|
|
./01_anchor_events.py --only roswell # one event (substring match)
|
|||
|
|
./01_anchor_events.py --dry-run # print prompt + would-write, no LLM call
|
|||
|
|
"""
|
|||
|
|
from __future__ import annotations
|
|||
|
|
|
|||
|
|
import argparse
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
import subprocess
|
|||
|
|
import sys
|
|||
|
|
from datetime import datetime, timezone
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
import yaml
|
|||
|
|
except ImportError:
|
|||
|
|
sys.stderr.write("pip3 install pyyaml\n")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
UFO_ROOT = Path(__file__).resolve().parents[2]
|
|||
|
|
EVENTS_DIR = UFO_ROOT / "wiki" / "entities" / "events"
|
|||
|
|
LOG_PATH = UFO_ROOT / "wiki" / "log.md"
|
|||
|
|
|
|||
|
|
# Each tuple = (event_id, canonical_name_en, date_start, primary_location, event_class, observers_hint)
|
|||
|
|
# date_start uses YYYY-MM-DD when known, YYYY when only year, YYYY-MM-XX if no day.
|
|||
|
|
ANCHOR_EVENTS = [
|
|||
|
|
("EV-1897-04-17-aurora-airship-crash", "Aurora Airship Crash", "1897-04-17",
|
|||
|
|
"Aurora, Texas, USA", "uap-encounter",
|
|||
|
|
"Local townspeople and newspaper reporters (Dallas Morning News, 17 abr 1897)"),
|
|||
|
|
("EV-1944-XX-XX-foo-fighters-european-theater", "Foo Fighters (European Theater)", "1944",
|
|||
|
|
"Western Front, Europe", "uap-encounter",
|
|||
|
|
"Allied bomber crews of the 415th Night Fighter Squadron and others"),
|
|||
|
|
("EV-1947-06-21-maury-island-incident", "Maury Island Incident", "1947-06-21",
|
|||
|
|
"Puget Sound, Washington, USA", "uap-encounter",
|
|||
|
|
"Harold Dahl, Fred Crisman, Kenneth Arnold (later investigator)"),
|
|||
|
|
("EV-1947-06-24-kenneth-arnold-mount-rainier", "Kenneth Arnold Mount Rainier Sighting", "1947-06-24",
|
|||
|
|
"Mount Rainier, Washington, USA", "uap-encounter",
|
|||
|
|
"Kenneth Arnold, civilian pilot"),
|
|||
|
|
("EV-1947-07-08-roswell-incident", "Roswell Incident", "1947-07-08",
|
|||
|
|
"Roswell, New Mexico, USA", "uap-encounter",
|
|||
|
|
"USAAF 509th Bombardment Group, William Brazel, Major Jesse Marcel"),
|
|||
|
|
("EV-1948-01-07-mantell-crash", "Mantell UFO Incident", "1948-01-07",
|
|||
|
|
"Franklin, Kentucky, USA", "uap-related-fatality",
|
|||
|
|
"Captain Thomas Mantell, Kentucky Air National Guard"),
|
|||
|
|
("EV-1948-07-24-chiles-whitted-encounter", "Chiles-Whitted UFO Encounter", "1948-07-24",
|
|||
|
|
"Montgomery, Alabama, USA", "uap-encounter",
|
|||
|
|
"Eastern Airlines pilots Clarence Chiles and John Whitted"),
|
|||
|
|
("EV-1952-09-XX-operation-mainbrace-sightings", "Operation Mainbrace UAP Sightings", "1952-09",
|
|||
|
|
"North Atlantic / Scandinavia", "uap-encounter",
|
|||
|
|
"NATO naval forces, RAF and USAF crews"),
|
|||
|
|
("EV-1959-06-26-father-gill-papua-encounter", "Father Gill Papua Encounter", "1959-06-26",
|
|||
|
|
"Boianai, Papua New Guinea", "uap-encounter",
|
|||
|
|
"Reverend William Gill and 37 mission staff and locals"),
|
|||
|
|
("EV-1964-04-24-lonnie-zamora-socorro", "Lonnie Zamora Socorro Landing", "1964-04-24",
|
|||
|
|
"Socorro, New Mexico, USA", "uap-encounter",
|
|||
|
|
"Police Sergeant Lonnie Zamora"),
|
|||
|
|
("EV-1966-04-06-westall-school-encounter", "Westall School Encounter", "1966-04-06",
|
|||
|
|
"Clayton South, Victoria, Australia", "uap-encounter",
|
|||
|
|
"Over 200 students and teachers of Westall High School"),
|
|||
|
|
("EV-1975-11-05-travis-walton-abduction", "Travis Walton Abduction", "1975-11-05",
|
|||
|
|
"Apache–Sitgreaves National Forest, Arizona, USA", "uap-abduction-claim",
|
|||
|
|
"Travis Walton and logging crew of six"),
|
|||
|
|
("EV-1977-09-XX-operacao-prato", "Operação Prato", "1977-09",
|
|||
|
|
"Ilha de Colares, Pará, Brasil", "uap-encounter",
|
|||
|
|
"Força Aérea Brasileira (FAB), Captain Uyrangê Hollanda and local residents"),
|
|||
|
|
("EV-1980-12-27-rendlesham-forest-incident", "Rendlesham Forest Incident", "1980-12-27",
|
|||
|
|
"Rendlesham Forest, Suffolk, UK (RAF Woodbridge / RAF Bentwaters)", "uap-encounter",
|
|||
|
|
"USAF personnel including Lt Col Charles Halt, Sgt Jim Penniston, John Burroughs"),
|
|||
|
|
("EV-1980-12-29-cash-landrum-incident", "Cash-Landrum Incident", "1980-12-29",
|
|||
|
|
"Dayton, Texas, USA", "uap-related-injury",
|
|||
|
|
"Betty Cash, Vickie Landrum, Colby Landrum"),
|
|||
|
|
("EV-1986-05-19-são-paulo-night-of-the-ufos", "São Paulo Noite Oficial dos OVNIs", "1986-05-19",
|
|||
|
|
"Costa do Brasil / São José dos Campos, SP", "uap-encounter",
|
|||
|
|
"FAB pilots flying Mirage III and F-5E intercepts, Brigadeiro Octavio Moreira Lima briefing"),
|
|||
|
|
("EV-1989-11-XX-belgian-wave", "Belgian UFO Wave", "1989-11",
|
|||
|
|
"Belgium (multiple sites)", "uap-encounter",
|
|||
|
|
"Belgian Air Force, gendarmerie and over 13,500 civilian witnesses"),
|
|||
|
|
("EV-1997-03-13-phoenix-lights", "Phoenix Lights", "1997-03-13",
|
|||
|
|
"Phoenix, Arizona, USA (and southern Arizona)", "uap-encounter",
|
|||
|
|
"Thousands of civilians, Governor Fife Symington (later)"),
|
|||
|
|
("EV-2004-11-14-nimitz-tic-tac", "Nimitz Tic Tac Incident", "2004-11-14",
|
|||
|
|
"USS Nimitz Carrier Strike Group, off San Diego coast", "uap-encounter",
|
|||
|
|
"USN F/A-18F crews Cdr David Fravor, Lt Cdr Jim Slaight, Lt Cdr Chad Underwood; USS Princeton radar (Senior Chief Kevin Day)"),
|
|||
|
|
("EV-2017-12-16-aatip-disclosure", "AATIP Public Disclosure", "2017-12-16",
|
|||
|
|
"New York / Washington, D.C., USA", "uap-disclosure-event",
|
|||
|
|
"New York Times reporting; Luis Elizondo, Harry Reid, Robert Bigelow"),
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
# Voice rules (Holmes–Watson, fact-dense, no hype).
|
|||
|
|
PROMPT_TEMPLATE = """You are writing a curated encyclopedic event card for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules:
|
|||
|
|
|
|||
|
|
- Holmes–Watson narrator: precise, fact-dense, no hype, no breathless language.
|
|||
|
|
- Open with what happened, where, when. Then who observed it. Then what made it remarkable. Optionally, what the official record / later investigations concluded.
|
|||
|
|
- 3–6 sentences. No editorial speculation beyond what is well-documented.
|
|||
|
|
- Use the *original language for verbatim quotes*; otherwise English for the EN summary and Brazilian Portuguese (pt-br with full UTF-8 accents) for the PT-BR summary. Do NOT translate already-Portuguese proper names ("Operação Prato" stays as-is in EN too).
|
|||
|
|
- Avoid the words "alegadamente"/"allegedly" unless it's genuinely contested. Be honest about uncertainty when warranted.
|
|||
|
|
- Never include sentences like "Will be enriched in Phase N" or any placeholder — this is the final text.
|
|||
|
|
|
|||
|
|
EVENT TO DOCUMENT:
|
|||
|
|
- ID: {event_id}
|
|||
|
|
- Canonical name: {name}
|
|||
|
|
- Date: {date}
|
|||
|
|
- Primary location: {location}
|
|||
|
|
- Class: {cls}
|
|||
|
|
- Known observers / parties: {observers}
|
|||
|
|
|
|||
|
|
OUTPUT (STRICT JSON, no markdown fences, no commentary):
|
|||
|
|
{{
|
|||
|
|
"narrative_summary": "<EN, 3-6 sentences>",
|
|||
|
|
"narrative_summary_pt_br": "<PT-BR brasileiro, 3-6 sentences>"
|
|||
|
|
}}"""
|
|||
|
|
|
|||
|
|
|
|||
|
|
def utc_iso() -> str:
|
|||
|
|
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def call_sonnet(prompt: str, dry_run: bool = False) -> dict:
|
|||
|
|
"""Spawn `claude -p` subprocess (uses CLAUDE_CODE_OAUTH_TOKEN env) and return parsed JSON."""
|
|||
|
|
if dry_run:
|
|||
|
|
return {"narrative_summary": "[dry-run placeholder]", "narrative_summary_pt_br": "[dry-run placeholder]"}
|
|||
|
|
try:
|
|||
|
|
res = subprocess.run(
|
|||
|
|
["claude", "-p", "--model", "sonnet", "--output-format", "json"],
|
|||
|
|
input=prompt,
|
|||
|
|
capture_output=True,
|
|||
|
|
text=True,
|
|||
|
|
timeout=180,
|
|||
|
|
check=False,
|
|||
|
|
)
|
|||
|
|
except subprocess.TimeoutExpired:
|
|||
|
|
raise RuntimeError("claude subprocess timed out after 180s")
|
|||
|
|
if res.returncode != 0:
|
|||
|
|
raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}")
|
|||
|
|
# claude --output-format json returns wrapped envelope; extract `result`.
|
|||
|
|
try:
|
|||
|
|
env = json.loads(res.stdout)
|
|||
|
|
except json.JSONDecodeError as e:
|
|||
|
|
raise RuntimeError(f"unparseable claude stdout: {e} :: {res.stdout[:300]}")
|
|||
|
|
txt = env.get("result") or env.get("response") or env.get("content") or ""
|
|||
|
|
# Strip code fences if any
|
|||
|
|
txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip()
|
|||
|
|
# Try direct parse; on fail, extract first {...} block
|
|||
|
|
try:
|
|||
|
|
return json.loads(txt)
|
|||
|
|
except json.JSONDecodeError:
|
|||
|
|
m = re.search(r"\{[^{}]*\"narrative_summary\".*?\}", txt, flags=re.DOTALL)
|
|||
|
|
if not m:
|
|||
|
|
raise RuntimeError(f"no JSON object in claude output: {txt[:300]}")
|
|||
|
|
return json.loads(m.group(0))
|
|||
|
|
|
|||
|
|
|
|||
|
|
def load_yaml_body(path: Path) -> tuple[dict, str]:
|
|||
|
|
raw = path.read_text(encoding="utf-8")
|
|||
|
|
if not raw.startswith("---"):
|
|||
|
|
return {}, raw
|
|||
|
|
end = raw.find("---", 4)
|
|||
|
|
fm = yaml.safe_load(raw[3:end].strip()) or {}
|
|||
|
|
body = raw[end + 3:].lstrip("\n")
|
|||
|
|
return fm, body
|
|||
|
|
|
|||
|
|
|
|||
|
|
def write_yaml_body(path: Path, fm: dict, body: str) -> None:
|
|||
|
|
yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
|||
|
|
sep = "" if body.startswith("\n") else "\n"
|
|||
|
|
path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def upsert_anchor(event_id: str, name: str, date: str, location: str, cls: str, observers: str,
|
|||
|
|
dry_run: bool, only: str | None) -> tuple[str, bool]:
|
|||
|
|
if only and only.lower() not in (event_id.lower() + " " + name.lower()):
|
|||
|
|
return ("skipped (not matched by --only)", False)
|
|||
|
|
|
|||
|
|
path = EVENTS_DIR / f"{event_id}.md"
|
|||
|
|
existing_fm: dict = {}
|
|||
|
|
existing_body: str = ""
|
|||
|
|
if path.exists():
|
|||
|
|
existing_fm, existing_body = load_yaml_body(path)
|
|||
|
|
if existing_fm.get("summary_status") == "curated":
|
|||
|
|
return ("skipped (already curated)", False)
|
|||
|
|
|
|||
|
|
prompt = PROMPT_TEMPLATE.format(
|
|||
|
|
event_id=event_id, name=name, date=date, location=location, cls=cls, observers=observers,
|
|||
|
|
)
|
|||
|
|
print(f" → calling sonnet for {event_id} ...", flush=True)
|
|||
|
|
out = call_sonnet(prompt, dry_run=dry_run)
|
|||
|
|
narr_en = (out.get("narrative_summary") or "").strip()
|
|||
|
|
narr_pt = (out.get("narrative_summary_pt_br") or "").strip()
|
|||
|
|
if not narr_en or not narr_pt:
|
|||
|
|
return (f"empty output (en={len(narr_en)}, pt={len(narr_pt)})", False)
|
|||
|
|
|
|||
|
|
# Build/refresh frontmatter
|
|||
|
|
fm = {
|
|||
|
|
"schema_version": "0.1.0",
|
|||
|
|
"type": "entity",
|
|||
|
|
"entity_class": "event",
|
|||
|
|
"event_id": event_id,
|
|||
|
|
"canonical_name": name,
|
|||
|
|
"aliases": existing_fm.get("aliases") or [name],
|
|||
|
|
"event_class": cls,
|
|||
|
|
"date_start": date,
|
|||
|
|
"date_end": existing_fm.get("date_end") or date,
|
|||
|
|
"date_confidence": "high",
|
|||
|
|
"primary_location": location,
|
|||
|
|
"observers": existing_fm.get("observers") or [],
|
|||
|
|
"uap_objects": existing_fm.get("uap_objects") or [],
|
|||
|
|
"documented_in": existing_fm.get("documented_in") or [],
|
|||
|
|
"total_mentions": existing_fm.get("total_mentions") or 0,
|
|||
|
|
"documents_count": existing_fm.get("documents_count") or 0,
|
|||
|
|
"narrative_summary": narr_en,
|
|||
|
|
"narrative_summary_pt_br": narr_pt,
|
|||
|
|
"summary_status": "curated",
|
|||
|
|
"summary_confidence": "high",
|
|||
|
|
"enrichment_status": existing_fm.get("enrichment_status") or "none",
|
|||
|
|
"external_sources": existing_fm.get("external_sources") or [],
|
|||
|
|
"last_ingest": existing_fm.get("last_ingest") or utc_iso(),
|
|||
|
|
"last_lint": utc_iso(),
|
|||
|
|
"wiki_version": "0.1.0",
|
|||
|
|
}
|
|||
|
|
body = existing_body if existing_body.strip() else (
|
|||
|
|
f"# {name}\n\n## Description (EN)\n\n{narr_en}\n\n## Descrição (PT-BR)\n\n{narr_pt}\n"
|
|||
|
|
)
|
|||
|
|
if dry_run:
|
|||
|
|
return ("ok (dry)", True)
|
|||
|
|
EVENTS_DIR.mkdir(parents=True, exist_ok=True)
|
|||
|
|
write_yaml_body(path, fm, body)
|
|||
|
|
return ("ok", True)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main() -> int:
|
|||
|
|
p = argparse.ArgumentParser()
|
|||
|
|
p.add_argument("--only", default=None, help="filter anchor events by substring match")
|
|||
|
|
p.add_argument("--dry-run", action="store_true")
|
|||
|
|
args = p.parse_args()
|
|||
|
|
|
|||
|
|
print(f"Anchor events: {len(ANCHOR_EVENTS)}")
|
|||
|
|
done = 0
|
|||
|
|
for ev in ANCHOR_EVENTS:
|
|||
|
|
msg, ok = upsert_anchor(*ev, dry_run=args.dry_run, only=args.only)
|
|||
|
|
sign = "✓" if ok else "·"
|
|||
|
|
print(f" {sign} {ev[0]} — {msg}")
|
|||
|
|
if ok:
|
|||
|
|
done += 1
|
|||
|
|
|
|||
|
|
if not args.dry_run and done > 0:
|
|||
|
|
with LOG_PATH.open("a", encoding="utf-8") as f:
|
|||
|
|
f.write(
|
|||
|
|
f"\n## {utc_iso()} · CURATE_ANCHOR_EVENTS\n"
|
|||
|
|
f"- script: scripts/synthesize/01_anchor_events.py\n"
|
|||
|
|
f"- curated: {done}/{len(ANCHOR_EVENTS)}\n"
|
|||
|
|
f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n"
|
|||
|
|
)
|
|||
|
|
print(f"\nCurated: {done}/{len(ANCHOR_EVENTS)}")
|
|||
|
|
return 0
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
sys.exit(main())
|