disclosure-bureau/scripts/synthesize/01_anchor_events.py
guto 4459bd17e4 phase-0: kill stubs, ship 20 curated anchor events, configure SMTP
- scripts/03-dedup-entities.py: stop emitting placeholder narrative ("Stub. Will
  be enriched in Phase 7"); write summary_status=none + null fields instead.
- scripts/maintain/41_strip_stubs.py: idempotent migration that cleaned the
  22,096 entity .md files (now zero stub strings in wiki/).
- scripts/synthesize/01_anchor_events.py: curated 20 anchor UAP events
  (Roswell, Nimitz Tic-Tac, Phoenix Lights, Operação Prato, AATIP, etc.) with
  bilingual Holmes-Watson narrative via claude -p --model sonnet
  (CLAUDE_CODE_OAUTH_TOKEN). All summary_status=curated, confidence=high.
- web/api/timeline + timeline-view: filter narrative-less events by default,
  render "curado" badge for hand-vetted ones, drop the date display alone.
- CLAUDE-schema-full.md: document the summary_status enum and the four states.
- docker-compose.yml: SMTP_HOST=mail.spacemail.com configured;
  GOTRUE_MAILER_AUTOCONFIRM flipped to false (real email confirmation working).
- .nirvana/outputs/.../systems-atelier/: 5 deliverables of the architecture
  audit that produced this roadmap.
2026-05-18 00:44:17 -03:00

273 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
01_anchor_events.py — Seed the 20 anchor UAP events with curated bilingual
narrative summaries via Claude Code OAuth subprocess (Sonnet).
Anchor list comes from ADR-003 (Phase 0). Each event:
- Gets/creates wiki/entities/events/EV-<date>-<slug>.md
- Frontmatter: summary_status=curated, summary_confidence=high,
narrative_summary=<EN>, narrative_summary_pt_br=<PT-BR>
- Body untouched if file already exists with manual edits.
Idempotent: re-run skips events where summary_status == 'curated'.
Usage:
./01_anchor_events.py # all anchors
./01_anchor_events.py --only roswell # one event (substring match)
./01_anchor_events.py --dry-run # print prompt + would-write, no LLM call
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
sys.stderr.write("pip3 install pyyaml\n")
sys.exit(1)
UFO_ROOT = Path(__file__).resolve().parents[2]
EVENTS_DIR = UFO_ROOT / "wiki" / "entities" / "events"
LOG_PATH = UFO_ROOT / "wiki" / "log.md"
# Each tuple = (event_id, canonical_name_en, date_start, primary_location, event_class, observers_hint)
# date_start uses YYYY-MM-DD when known, YYYY when only year, YYYY-MM-XX if no day.
ANCHOR_EVENTS = [
("EV-1897-04-17-aurora-airship-crash", "Aurora Airship Crash", "1897-04-17",
"Aurora, Texas, USA", "uap-encounter",
"Local townspeople and newspaper reporters (Dallas Morning News, 17 abr 1897)"),
("EV-1944-XX-XX-foo-fighters-european-theater", "Foo Fighters (European Theater)", "1944",
"Western Front, Europe", "uap-encounter",
"Allied bomber crews of the 415th Night Fighter Squadron and others"),
("EV-1947-06-21-maury-island-incident", "Maury Island Incident", "1947-06-21",
"Puget Sound, Washington, USA", "uap-encounter",
"Harold Dahl, Fred Crisman, Kenneth Arnold (later investigator)"),
("EV-1947-06-24-kenneth-arnold-mount-rainier", "Kenneth Arnold Mount Rainier Sighting", "1947-06-24",
"Mount Rainier, Washington, USA", "uap-encounter",
"Kenneth Arnold, civilian pilot"),
("EV-1947-07-08-roswell-incident", "Roswell Incident", "1947-07-08",
"Roswell, New Mexico, USA", "uap-encounter",
"USAAF 509th Bombardment Group, William Brazel, Major Jesse Marcel"),
("EV-1948-01-07-mantell-crash", "Mantell UFO Incident", "1948-01-07",
"Franklin, Kentucky, USA", "uap-related-fatality",
"Captain Thomas Mantell, Kentucky Air National Guard"),
("EV-1948-07-24-chiles-whitted-encounter", "Chiles-Whitted UFO Encounter", "1948-07-24",
"Montgomery, Alabama, USA", "uap-encounter",
"Eastern Airlines pilots Clarence Chiles and John Whitted"),
("EV-1952-09-XX-operation-mainbrace-sightings", "Operation Mainbrace UAP Sightings", "1952-09",
"North Atlantic / Scandinavia", "uap-encounter",
"NATO naval forces, RAF and USAF crews"),
("EV-1959-06-26-father-gill-papua-encounter", "Father Gill Papua Encounter", "1959-06-26",
"Boianai, Papua New Guinea", "uap-encounter",
"Reverend William Gill and 37 mission staff and locals"),
("EV-1964-04-24-lonnie-zamora-socorro", "Lonnie Zamora Socorro Landing", "1964-04-24",
"Socorro, New Mexico, USA", "uap-encounter",
"Police Sergeant Lonnie Zamora"),
("EV-1966-04-06-westall-school-encounter", "Westall School Encounter", "1966-04-06",
"Clayton South, Victoria, Australia", "uap-encounter",
"Over 200 students and teachers of Westall High School"),
("EV-1975-11-05-travis-walton-abduction", "Travis Walton Abduction", "1975-11-05",
"ApacheSitgreaves National Forest, Arizona, USA", "uap-abduction-claim",
"Travis Walton and logging crew of six"),
("EV-1977-09-XX-operacao-prato", "Operação Prato", "1977-09",
"Ilha de Colares, Pará, Brasil", "uap-encounter",
"Força Aérea Brasileira (FAB), Captain Uyrangê Hollanda and local residents"),
("EV-1980-12-27-rendlesham-forest-incident", "Rendlesham Forest Incident", "1980-12-27",
"Rendlesham Forest, Suffolk, UK (RAF Woodbridge / RAF Bentwaters)", "uap-encounter",
"USAF personnel including Lt Col Charles Halt, Sgt Jim Penniston, John Burroughs"),
("EV-1980-12-29-cash-landrum-incident", "Cash-Landrum Incident", "1980-12-29",
"Dayton, Texas, USA", "uap-related-injury",
"Betty Cash, Vickie Landrum, Colby Landrum"),
("EV-1986-05-19-são-paulo-night-of-the-ufos", "São Paulo Noite Oficial dos OVNIs", "1986-05-19",
"Costa do Brasil / São José dos Campos, SP", "uap-encounter",
"FAB pilots flying Mirage III and F-5E intercepts, Brigadeiro Octavio Moreira Lima briefing"),
("EV-1989-11-XX-belgian-wave", "Belgian UFO Wave", "1989-11",
"Belgium (multiple sites)", "uap-encounter",
"Belgian Air Force, gendarmerie and over 13,500 civilian witnesses"),
("EV-1997-03-13-phoenix-lights", "Phoenix Lights", "1997-03-13",
"Phoenix, Arizona, USA (and southern Arizona)", "uap-encounter",
"Thousands of civilians, Governor Fife Symington (later)"),
("EV-2004-11-14-nimitz-tic-tac", "Nimitz Tic Tac Incident", "2004-11-14",
"USS Nimitz Carrier Strike Group, off San Diego coast", "uap-encounter",
"USN F/A-18F crews Cdr David Fravor, Lt Cdr Jim Slaight, Lt Cdr Chad Underwood; USS Princeton radar (Senior Chief Kevin Day)"),
("EV-2017-12-16-aatip-disclosure", "AATIP Public Disclosure", "2017-12-16",
"New York / Washington, D.C., USA", "uap-disclosure-event",
"New York Times reporting; Luis Elizondo, Harry Reid, Robert Bigelow"),
]
# Voice rules (HolmesWatson, fact-dense, no hype).
PROMPT_TEMPLATE = """You are writing a curated encyclopedic event card for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules:
- HolmesWatson narrator: precise, fact-dense, no hype, no breathless language.
- Open with what happened, where, when. Then who observed it. Then what made it remarkable. Optionally, what the official record / later investigations concluded.
- 36 sentences. No editorial speculation beyond what is well-documented.
- Use the *original language for verbatim quotes*; otherwise English for the EN summary and Brazilian Portuguese (pt-br with full UTF-8 accents) for the PT-BR summary. Do NOT translate already-Portuguese proper names ("Operação Prato" stays as-is in EN too).
- Avoid the words "alegadamente"/"allegedly" unless it's genuinely contested. Be honest about uncertainty when warranted.
- Never include sentences like "Will be enriched in Phase N" or any placeholder — this is the final text.
EVENT TO DOCUMENT:
- ID: {event_id}
- Canonical name: {name}
- Date: {date}
- Primary location: {location}
- Class: {cls}
- Known observers / parties: {observers}
OUTPUT (STRICT JSON, no markdown fences, no commentary):
{{
"narrative_summary": "<EN, 3-6 sentences>",
"narrative_summary_pt_br": "<PT-BR brasileiro, 3-6 sentences>"
}}"""
def utc_iso() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def call_sonnet(prompt: str, dry_run: bool = False) -> dict:
"""Spawn `claude -p` subprocess (uses CLAUDE_CODE_OAUTH_TOKEN env) and return parsed JSON."""
if dry_run:
return {"narrative_summary": "[dry-run placeholder]", "narrative_summary_pt_br": "[dry-run placeholder]"}
try:
res = subprocess.run(
["claude", "-p", "--model", "sonnet", "--output-format", "json"],
input=prompt,
capture_output=True,
text=True,
timeout=180,
check=False,
)
except subprocess.TimeoutExpired:
raise RuntimeError("claude subprocess timed out after 180s")
if res.returncode != 0:
raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}")
# claude --output-format json returns wrapped envelope; extract `result`.
try:
env = json.loads(res.stdout)
except json.JSONDecodeError as e:
raise RuntimeError(f"unparseable claude stdout: {e} :: {res.stdout[:300]}")
txt = env.get("result") or env.get("response") or env.get("content") or ""
# Strip code fences if any
txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip()
# Try direct parse; on fail, extract first {...} block
try:
return json.loads(txt)
except json.JSONDecodeError:
m = re.search(r"\{[^{}]*\"narrative_summary\".*?\}", txt, flags=re.DOTALL)
if not m:
raise RuntimeError(f"no JSON object in claude output: {txt[:300]}")
return json.loads(m.group(0))
def load_yaml_body(path: Path) -> tuple[dict, str]:
raw = path.read_text(encoding="utf-8")
if not raw.startswith("---"):
return {}, raw
end = raw.find("---", 4)
fm = yaml.safe_load(raw[3:end].strip()) or {}
body = raw[end + 3:].lstrip("\n")
return fm, body
def write_yaml_body(path: Path, fm: dict, body: str) -> None:
yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
sep = "" if body.startswith("\n") else "\n"
path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8")
def upsert_anchor(event_id: str, name: str, date: str, location: str, cls: str, observers: str,
dry_run: bool, only: str | None) -> tuple[str, bool]:
if only and only.lower() not in (event_id.lower() + " " + name.lower()):
return ("skipped (not matched by --only)", False)
path = EVENTS_DIR / f"{event_id}.md"
existing_fm: dict = {}
existing_body: str = ""
if path.exists():
existing_fm, existing_body = load_yaml_body(path)
if existing_fm.get("summary_status") == "curated":
return ("skipped (already curated)", False)
prompt = PROMPT_TEMPLATE.format(
event_id=event_id, name=name, date=date, location=location, cls=cls, observers=observers,
)
print(f" → calling sonnet for {event_id} ...", flush=True)
out = call_sonnet(prompt, dry_run=dry_run)
narr_en = (out.get("narrative_summary") or "").strip()
narr_pt = (out.get("narrative_summary_pt_br") or "").strip()
if not narr_en or not narr_pt:
return (f"empty output (en={len(narr_en)}, pt={len(narr_pt)})", False)
# Build/refresh frontmatter
fm = {
"schema_version": "0.1.0",
"type": "entity",
"entity_class": "event",
"event_id": event_id,
"canonical_name": name,
"aliases": existing_fm.get("aliases") or [name],
"event_class": cls,
"date_start": date,
"date_end": existing_fm.get("date_end") or date,
"date_confidence": "high",
"primary_location": location,
"observers": existing_fm.get("observers") or [],
"uap_objects": existing_fm.get("uap_objects") or [],
"documented_in": existing_fm.get("documented_in") or [],
"total_mentions": existing_fm.get("total_mentions") or 0,
"documents_count": existing_fm.get("documents_count") or 0,
"narrative_summary": narr_en,
"narrative_summary_pt_br": narr_pt,
"summary_status": "curated",
"summary_confidence": "high",
"enrichment_status": existing_fm.get("enrichment_status") or "none",
"external_sources": existing_fm.get("external_sources") or [],
"last_ingest": existing_fm.get("last_ingest") or utc_iso(),
"last_lint": utc_iso(),
"wiki_version": "0.1.0",
}
body = existing_body if existing_body.strip() else (
f"# {name}\n\n## Description (EN)\n\n{narr_en}\n\n## Descrição (PT-BR)\n\n{narr_pt}\n"
)
if dry_run:
return ("ok (dry)", True)
EVENTS_DIR.mkdir(parents=True, exist_ok=True)
write_yaml_body(path, fm, body)
return ("ok", True)
def main() -> int:
p = argparse.ArgumentParser()
p.add_argument("--only", default=None, help="filter anchor events by substring match")
p.add_argument("--dry-run", action="store_true")
args = p.parse_args()
print(f"Anchor events: {len(ANCHOR_EVENTS)}")
done = 0
for ev in ANCHOR_EVENTS:
msg, ok = upsert_anchor(*ev, dry_run=args.dry_run, only=args.only)
sign = "" if ok else "·"
print(f" {sign} {ev[0]}{msg}")
if ok:
done += 1
if not args.dry_run and done > 0:
with LOG_PATH.open("a", encoding="utf-8") as f:
f.write(
f"\n## {utc_iso()} · CURATE_ANCHOR_EVENTS\n"
f"- script: scripts/synthesize/01_anchor_events.py\n"
f"- curated: {done}/{len(ANCHOR_EVENTS)}\n"
f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n"
)
print(f"\nCurated: {done}/{len(ANCHOR_EVENTS)}")
return 0
if __name__ == "__main__":
sys.exit(main())