- scripts/03-dedup-entities.py: stop emitting placeholder narrative ("Stub. Will
be enriched in Phase 7"); write summary_status=none + null fields instead.
- scripts/maintain/41_strip_stubs.py: idempotent migration that cleaned the
22,096 entity .md files (now zero stub strings in wiki/).
- scripts/synthesize/01_anchor_events.py: curated 20 anchor UAP events
(Roswell, Nimitz Tic-Tac, Phoenix Lights, Operação Prato, AATIP, etc.) with
bilingual Holmes-Watson narrative via claude -p --model sonnet
(CLAUDE_CODE_OAUTH_TOKEN). All summary_status=curated, confidence=high.
- web/api/timeline + timeline-view: filter narrative-less events by default,
render "curado" badge for hand-vetted ones, drop the date display alone.
- CLAUDE-schema-full.md: document the summary_status enum and the four states.
- docker-compose.yml: SMTP_HOST=mail.spacemail.com configured;
GOTRUE_MAILER_AUTOCONFIRM flipped to false (real email confirmation working).
- .nirvana/outputs/.../systems-atelier/: 5 deliverables of the architecture
audit that produced this roadmap.
273 lines
13 KiB
Python
273 lines
13 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
01_anchor_events.py — Seed the 20 anchor UAP events with curated bilingual
|
||
narrative summaries via Claude Code OAuth subprocess (Sonnet).
|
||
|
||
Anchor list comes from ADR-003 (Phase 0). Each event:
|
||
- Gets/creates wiki/entities/events/EV-<date>-<slug>.md
|
||
- Frontmatter: summary_status=curated, summary_confidence=high,
|
||
narrative_summary=<EN>, narrative_summary_pt_br=<PT-BR>
|
||
- Body untouched if file already exists with manual edits.
|
||
|
||
Idempotent: re-run skips events where summary_status == 'curated'.
|
||
|
||
Usage:
|
||
./01_anchor_events.py # all anchors
|
||
./01_anchor_events.py --only roswell # one event (substring match)
|
||
./01_anchor_events.py --dry-run # print prompt + would-write, no LLM call
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import re
|
||
import subprocess
|
||
import sys
|
||
from datetime import datetime, timezone
|
||
from pathlib import Path
|
||
|
||
try:
|
||
import yaml
|
||
except ImportError:
|
||
sys.stderr.write("pip3 install pyyaml\n")
|
||
sys.exit(1)
|
||
|
||
UFO_ROOT = Path(__file__).resolve().parents[2]
|
||
EVENTS_DIR = UFO_ROOT / "wiki" / "entities" / "events"
|
||
LOG_PATH = UFO_ROOT / "wiki" / "log.md"
|
||
|
||
# Each tuple = (event_id, canonical_name_en, date_start, primary_location, event_class, observers_hint)
|
||
# date_start uses YYYY-MM-DD when known, YYYY when only year, YYYY-MM-XX if no day.
|
||
ANCHOR_EVENTS = [
|
||
("EV-1897-04-17-aurora-airship-crash", "Aurora Airship Crash", "1897-04-17",
|
||
"Aurora, Texas, USA", "uap-encounter",
|
||
"Local townspeople and newspaper reporters (Dallas Morning News, 17 abr 1897)"),
|
||
("EV-1944-XX-XX-foo-fighters-european-theater", "Foo Fighters (European Theater)", "1944",
|
||
"Western Front, Europe", "uap-encounter",
|
||
"Allied bomber crews of the 415th Night Fighter Squadron and others"),
|
||
("EV-1947-06-21-maury-island-incident", "Maury Island Incident", "1947-06-21",
|
||
"Puget Sound, Washington, USA", "uap-encounter",
|
||
"Harold Dahl, Fred Crisman, Kenneth Arnold (later investigator)"),
|
||
("EV-1947-06-24-kenneth-arnold-mount-rainier", "Kenneth Arnold Mount Rainier Sighting", "1947-06-24",
|
||
"Mount Rainier, Washington, USA", "uap-encounter",
|
||
"Kenneth Arnold, civilian pilot"),
|
||
("EV-1947-07-08-roswell-incident", "Roswell Incident", "1947-07-08",
|
||
"Roswell, New Mexico, USA", "uap-encounter",
|
||
"USAAF 509th Bombardment Group, William Brazel, Major Jesse Marcel"),
|
||
("EV-1948-01-07-mantell-crash", "Mantell UFO Incident", "1948-01-07",
|
||
"Franklin, Kentucky, USA", "uap-related-fatality",
|
||
"Captain Thomas Mantell, Kentucky Air National Guard"),
|
||
("EV-1948-07-24-chiles-whitted-encounter", "Chiles-Whitted UFO Encounter", "1948-07-24",
|
||
"Montgomery, Alabama, USA", "uap-encounter",
|
||
"Eastern Airlines pilots Clarence Chiles and John Whitted"),
|
||
("EV-1952-09-XX-operation-mainbrace-sightings", "Operation Mainbrace UAP Sightings", "1952-09",
|
||
"North Atlantic / Scandinavia", "uap-encounter",
|
||
"NATO naval forces, RAF and USAF crews"),
|
||
("EV-1959-06-26-father-gill-papua-encounter", "Father Gill Papua Encounter", "1959-06-26",
|
||
"Boianai, Papua New Guinea", "uap-encounter",
|
||
"Reverend William Gill and 37 mission staff and locals"),
|
||
("EV-1964-04-24-lonnie-zamora-socorro", "Lonnie Zamora Socorro Landing", "1964-04-24",
|
||
"Socorro, New Mexico, USA", "uap-encounter",
|
||
"Police Sergeant Lonnie Zamora"),
|
||
("EV-1966-04-06-westall-school-encounter", "Westall School Encounter", "1966-04-06",
|
||
"Clayton South, Victoria, Australia", "uap-encounter",
|
||
"Over 200 students and teachers of Westall High School"),
|
||
("EV-1975-11-05-travis-walton-abduction", "Travis Walton Abduction", "1975-11-05",
|
||
"Apache–Sitgreaves National Forest, Arizona, USA", "uap-abduction-claim",
|
||
"Travis Walton and logging crew of six"),
|
||
("EV-1977-09-XX-operacao-prato", "Operação Prato", "1977-09",
|
||
"Ilha de Colares, Pará, Brasil", "uap-encounter",
|
||
"Força Aérea Brasileira (FAB), Captain Uyrangê Hollanda and local residents"),
|
||
("EV-1980-12-27-rendlesham-forest-incident", "Rendlesham Forest Incident", "1980-12-27",
|
||
"Rendlesham Forest, Suffolk, UK (RAF Woodbridge / RAF Bentwaters)", "uap-encounter",
|
||
"USAF personnel including Lt Col Charles Halt, Sgt Jim Penniston, John Burroughs"),
|
||
("EV-1980-12-29-cash-landrum-incident", "Cash-Landrum Incident", "1980-12-29",
|
||
"Dayton, Texas, USA", "uap-related-injury",
|
||
"Betty Cash, Vickie Landrum, Colby Landrum"),
|
||
("EV-1986-05-19-são-paulo-night-of-the-ufos", "São Paulo Noite Oficial dos OVNIs", "1986-05-19",
|
||
"Costa do Brasil / São José dos Campos, SP", "uap-encounter",
|
||
"FAB pilots flying Mirage III and F-5E intercepts, Brigadeiro Octavio Moreira Lima briefing"),
|
||
("EV-1989-11-XX-belgian-wave", "Belgian UFO Wave", "1989-11",
|
||
"Belgium (multiple sites)", "uap-encounter",
|
||
"Belgian Air Force, gendarmerie and over 13,500 civilian witnesses"),
|
||
("EV-1997-03-13-phoenix-lights", "Phoenix Lights", "1997-03-13",
|
||
"Phoenix, Arizona, USA (and southern Arizona)", "uap-encounter",
|
||
"Thousands of civilians, Governor Fife Symington (later)"),
|
||
("EV-2004-11-14-nimitz-tic-tac", "Nimitz Tic Tac Incident", "2004-11-14",
|
||
"USS Nimitz Carrier Strike Group, off San Diego coast", "uap-encounter",
|
||
"USN F/A-18F crews Cdr David Fravor, Lt Cdr Jim Slaight, Lt Cdr Chad Underwood; USS Princeton radar (Senior Chief Kevin Day)"),
|
||
("EV-2017-12-16-aatip-disclosure", "AATIP Public Disclosure", "2017-12-16",
|
||
"New York / Washington, D.C., USA", "uap-disclosure-event",
|
||
"New York Times reporting; Luis Elizondo, Harry Reid, Robert Bigelow"),
|
||
]
|
||
|
||
# Voice rules (Holmes–Watson, fact-dense, no hype).
|
||
PROMPT_TEMPLATE = """You are writing a curated encyclopedic event card for an investigative UAP/UFO wiki ("The Disclosure Bureau"). Voice rules:
|
||
|
||
- Holmes–Watson narrator: precise, fact-dense, no hype, no breathless language.
|
||
- Open with what happened, where, when. Then who observed it. Then what made it remarkable. Optionally, what the official record / later investigations concluded.
|
||
- 3–6 sentences. No editorial speculation beyond what is well-documented.
|
||
- Use the *original language for verbatim quotes*; otherwise English for the EN summary and Brazilian Portuguese (pt-br with full UTF-8 accents) for the PT-BR summary. Do NOT translate already-Portuguese proper names ("Operação Prato" stays as-is in EN too).
|
||
- Avoid the words "alegadamente"/"allegedly" unless it's genuinely contested. Be honest about uncertainty when warranted.
|
||
- Never include sentences like "Will be enriched in Phase N" or any placeholder — this is the final text.
|
||
|
||
EVENT TO DOCUMENT:
|
||
- ID: {event_id}
|
||
- Canonical name: {name}
|
||
- Date: {date}
|
||
- Primary location: {location}
|
||
- Class: {cls}
|
||
- Known observers / parties: {observers}
|
||
|
||
OUTPUT (STRICT JSON, no markdown fences, no commentary):
|
||
{{
|
||
"narrative_summary": "<EN, 3-6 sentences>",
|
||
"narrative_summary_pt_br": "<PT-BR brasileiro, 3-6 sentences>"
|
||
}}"""
|
||
|
||
|
||
def utc_iso() -> str:
|
||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
|
||
|
||
def call_sonnet(prompt: str, dry_run: bool = False) -> dict:
|
||
"""Spawn `claude -p` subprocess (uses CLAUDE_CODE_OAUTH_TOKEN env) and return parsed JSON."""
|
||
if dry_run:
|
||
return {"narrative_summary": "[dry-run placeholder]", "narrative_summary_pt_br": "[dry-run placeholder]"}
|
||
try:
|
||
res = subprocess.run(
|
||
["claude", "-p", "--model", "sonnet", "--output-format", "json"],
|
||
input=prompt,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=180,
|
||
check=False,
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
raise RuntimeError("claude subprocess timed out after 180s")
|
||
if res.returncode != 0:
|
||
raise RuntimeError(f"claude exit {res.returncode}: {res.stderr[:300]}")
|
||
# claude --output-format json returns wrapped envelope; extract `result`.
|
||
try:
|
||
env = json.loads(res.stdout)
|
||
except json.JSONDecodeError as e:
|
||
raise RuntimeError(f"unparseable claude stdout: {e} :: {res.stdout[:300]}")
|
||
txt = env.get("result") or env.get("response") or env.get("content") or ""
|
||
# Strip code fences if any
|
||
txt = re.sub(r"^```(?:json)?\s*|\s*```$", "", txt.strip(), flags=re.MULTILINE).strip()
|
||
# Try direct parse; on fail, extract first {...} block
|
||
try:
|
||
return json.loads(txt)
|
||
except json.JSONDecodeError:
|
||
m = re.search(r"\{[^{}]*\"narrative_summary\".*?\}", txt, flags=re.DOTALL)
|
||
if not m:
|
||
raise RuntimeError(f"no JSON object in claude output: {txt[:300]}")
|
||
return json.loads(m.group(0))
|
||
|
||
|
||
def load_yaml_body(path: Path) -> tuple[dict, str]:
|
||
raw = path.read_text(encoding="utf-8")
|
||
if not raw.startswith("---"):
|
||
return {}, raw
|
||
end = raw.find("---", 4)
|
||
fm = yaml.safe_load(raw[3:end].strip()) or {}
|
||
body = raw[end + 3:].lstrip("\n")
|
||
return fm, body
|
||
|
||
|
||
def write_yaml_body(path: Path, fm: dict, body: str) -> None:
|
||
yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
||
sep = "" if body.startswith("\n") else "\n"
|
||
path.write_text(f"---\n{yaml_str}---\n{sep}{body}", encoding="utf-8")
|
||
|
||
|
||
def upsert_anchor(event_id: str, name: str, date: str, location: str, cls: str, observers: str,
|
||
dry_run: bool, only: str | None) -> tuple[str, bool]:
|
||
if only and only.lower() not in (event_id.lower() + " " + name.lower()):
|
||
return ("skipped (not matched by --only)", False)
|
||
|
||
path = EVENTS_DIR / f"{event_id}.md"
|
||
existing_fm: dict = {}
|
||
existing_body: str = ""
|
||
if path.exists():
|
||
existing_fm, existing_body = load_yaml_body(path)
|
||
if existing_fm.get("summary_status") == "curated":
|
||
return ("skipped (already curated)", False)
|
||
|
||
prompt = PROMPT_TEMPLATE.format(
|
||
event_id=event_id, name=name, date=date, location=location, cls=cls, observers=observers,
|
||
)
|
||
print(f" → calling sonnet for {event_id} ...", flush=True)
|
||
out = call_sonnet(prompt, dry_run=dry_run)
|
||
narr_en = (out.get("narrative_summary") or "").strip()
|
||
narr_pt = (out.get("narrative_summary_pt_br") or "").strip()
|
||
if not narr_en or not narr_pt:
|
||
return (f"empty output (en={len(narr_en)}, pt={len(narr_pt)})", False)
|
||
|
||
# Build/refresh frontmatter
|
||
fm = {
|
||
"schema_version": "0.1.0",
|
||
"type": "entity",
|
||
"entity_class": "event",
|
||
"event_id": event_id,
|
||
"canonical_name": name,
|
||
"aliases": existing_fm.get("aliases") or [name],
|
||
"event_class": cls,
|
||
"date_start": date,
|
||
"date_end": existing_fm.get("date_end") or date,
|
||
"date_confidence": "high",
|
||
"primary_location": location,
|
||
"observers": existing_fm.get("observers") or [],
|
||
"uap_objects": existing_fm.get("uap_objects") or [],
|
||
"documented_in": existing_fm.get("documented_in") or [],
|
||
"total_mentions": existing_fm.get("total_mentions") or 0,
|
||
"documents_count": existing_fm.get("documents_count") or 0,
|
||
"narrative_summary": narr_en,
|
||
"narrative_summary_pt_br": narr_pt,
|
||
"summary_status": "curated",
|
||
"summary_confidence": "high",
|
||
"enrichment_status": existing_fm.get("enrichment_status") or "none",
|
||
"external_sources": existing_fm.get("external_sources") or [],
|
||
"last_ingest": existing_fm.get("last_ingest") or utc_iso(),
|
||
"last_lint": utc_iso(),
|
||
"wiki_version": "0.1.0",
|
||
}
|
||
body = existing_body if existing_body.strip() else (
|
||
f"# {name}\n\n## Description (EN)\n\n{narr_en}\n\n## Descrição (PT-BR)\n\n{narr_pt}\n"
|
||
)
|
||
if dry_run:
|
||
return ("ok (dry)", True)
|
||
EVENTS_DIR.mkdir(parents=True, exist_ok=True)
|
||
write_yaml_body(path, fm, body)
|
||
return ("ok", True)
|
||
|
||
|
||
def main() -> int:
|
||
p = argparse.ArgumentParser()
|
||
p.add_argument("--only", default=None, help="filter anchor events by substring match")
|
||
p.add_argument("--dry-run", action="store_true")
|
||
args = p.parse_args()
|
||
|
||
print(f"Anchor events: {len(ANCHOR_EVENTS)}")
|
||
done = 0
|
||
for ev in ANCHOR_EVENTS:
|
||
msg, ok = upsert_anchor(*ev, dry_run=args.dry_run, only=args.only)
|
||
sign = "✓" if ok else "·"
|
||
print(f" {sign} {ev[0]} — {msg}")
|
||
if ok:
|
||
done += 1
|
||
|
||
if not args.dry_run and done > 0:
|
||
with LOG_PATH.open("a", encoding="utf-8") as f:
|
||
f.write(
|
||
f"\n## {utc_iso()} · CURATE_ANCHOR_EVENTS\n"
|
||
f"- script: scripts/synthesize/01_anchor_events.py\n"
|
||
f"- curated: {done}/{len(ANCHOR_EVENTS)}\n"
|
||
f"- model: claude-sonnet (via CLAUDE_CODE_OAUTH_TOKEN)\n"
|
||
)
|
||
print(f"\nCurated: {done}/{len(ANCHOR_EVENTS)}")
|
||
return 0
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|