462 lines
20 KiB
Python
Executable file
462 lines
20 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""
|
|
11-generate-case-images.py — Generate "case images" (Nano Banana + Codex) per entity
|
|
|
|
For each completed video (wiki/videos/<id>.md) OR document (wiki/documents/<id>.md),
|
|
generate TWO conceptual images representing the case, using the executive_summary
|
|
and UAP observation fields as the prompt seed:
|
|
|
|
processing/case-images/<entity-id>/case-nanobanana.png
|
|
processing/case-images/<entity-id>/case-codex.png
|
|
|
|
These are "what the case might look like" reproductions — NOT evidence, NOT
|
|
real-data reconstructions. They are speculative visualizations for the chat UI
|
|
to display alongside citations (the future Sherlock chat app).
|
|
|
|
All output is tagged `synthetic: true` in the entity markdown and gets a
|
|
`synthesis_warnings` block.
|
|
|
|
Usage:
|
|
./11-generate-case-images.py --kind videos # process all wiki/videos/*.md
|
|
./11-generate-case-images.py --kind documents # process all wiki/documents/*.md
|
|
./11-generate-case-images.py --kind both # both
|
|
./11-generate-case-images.py --entity-id dod-111689005 # single entity (video or doc)
|
|
./11-generate-case-images.py --skip-codex # only Nano Banana (cheaper)
|
|
./11-generate-case-images.py --force # re-generate
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
sys.stderr.write("Missing pyyaml. Run: pip3 install pyyaml\n")
|
|
sys.exit(1)
|
|
|
|
try:
|
|
from google import genai
|
|
from PIL import Image as PILImage
|
|
from io import BytesIO
|
|
except ImportError:
|
|
sys.stderr.write("Missing google-genai or pillow. Run: pip3 install google-genai pillow\n")
|
|
sys.exit(1)
|
|
|
|
|
|
UFO_ROOT = Path("/Users/guto/ufo")
|
|
NANO_BANANA_MODEL = "gemini-3-pro-image-preview"
|
|
WIKI_VIDEOS_DIR = UFO_ROOT / "wiki" / "videos"
|
|
WIKI_DOCS_DIR = UFO_ROOT / "wiki" / "documents"
|
|
CASE_IMAGES_DIR = UFO_ROOT / "processing" / "case-images"
|
|
FRAMES_DIR = UFO_ROOT / "processing" / "uap-frames"
|
|
LOG_PATH = UFO_ROOT / "wiki" / "log.md"
|
|
|
|
NANO_BANANA_SCRIPT = Path.home() / ".claude" / "skills" / "nano-banana-pro" / "scripts" / "generate_image.py"
|
|
|
|
|
|
def find_best_frame(video_id: str) -> Path | None:
|
|
"""Return the most representative single frame jpg for a video."""
|
|
d = FRAMES_DIR / video_id
|
|
if not d.exists():
|
|
return None
|
|
candidates = sorted(d.glob("*.jpg"))
|
|
if not candidates:
|
|
return None
|
|
for keyword in ("-mi.jpg", "-firs.jpg", "-las.jpg", "-sample"):
|
|
for p in candidates:
|
|
if keyword in p.name:
|
|
return p
|
|
return candidates[0]
|
|
|
|
|
|
def find_all_frames(video_id: str, max_n: int = 5) -> list[Path]:
|
|
"""Return up to max_n frames covering the UAP timeline (first, mid, last, samples).
|
|
Order: first → samples → mid → last (chronological)."""
|
|
d = FRAMES_DIR / video_id
|
|
if not d.exists():
|
|
return []
|
|
all_jpgs = sorted(d.glob("*.jpg"))
|
|
if not all_jpgs:
|
|
return []
|
|
# Order by timestamp encoded in filename: frame-MM-SS_NN-label.jpg
|
|
def t_of(p: Path):
|
|
import re as _re
|
|
m = _re.match(r"frame-(\d+)-(\d+)_(\d+)", p.name)
|
|
if not m:
|
|
return 0
|
|
return int(m.group(1)) * 60 + int(m.group(2)) + int(m.group(3)) / 100
|
|
sorted_by_time = sorted(all_jpgs, key=t_of)
|
|
if len(sorted_by_time) <= max_n:
|
|
return sorted_by_time
|
|
# Evenly subsample
|
|
step = len(sorted_by_time) / max_n
|
|
indices = [int(i * step) for i in range(max_n)]
|
|
return [sorted_by_time[i] for i in indices]
|
|
|
|
|
|
def read_md(path: Path) -> tuple[dict, str]:
|
|
c = path.read_text(encoding="utf-8")
|
|
if not c.startswith("---"):
|
|
return {}, c
|
|
end = c.find("---", 4)
|
|
if end == -1:
|
|
return {}, c
|
|
try:
|
|
return (yaml.safe_load(c[3:end].strip()) or {}), c[end + 3 :].lstrip("\n")
|
|
except yaml.YAMLError:
|
|
return {}, c[end + 3 :].lstrip("\n")
|
|
|
|
|
|
def build_case_prompt(fm: dict, kind: str, has_reference_frame: bool) -> str:
|
|
"""Build the descriptive prompt for image generation. If has_reference_frame=True,
|
|
the prompt instructs to enhance the actual extracted frame; otherwise it's a
|
|
text-only conceptual scene."""
|
|
if kind == "video":
|
|
title = fm.get("video_id", "unknown")
|
|
summary = (fm.get("executive_summary_en", "") or "").strip()
|
|
uap = fm.get("uap_observation_fields") or {}
|
|
overview = {k.replace("overview_", ""): v for k, v in fm.items() if k.startswith("overview_")}
|
|
location_hint = uap.get("coordinates", {}).get("raw_text") or ""
|
|
sherlock = fm.get("sherlock_observations") or []
|
|
sherlock_summary = "; ".join(o.get("observation", "")[:120] for o in sherlock[:3])
|
|
else: # document
|
|
title = fm.get("canonical_title") or fm.get("doc_id", "unknown")
|
|
summary = (fm.get("executive_summary", "") or "").strip()
|
|
uap = {}
|
|
overview = {
|
|
"primary_subject": fm.get("document_class") or "",
|
|
"incident_date": (fm.get("war_gov") or {}).get("incident_date_official") or "",
|
|
"incident_location": (fm.get("war_gov") or {}).get("incident_location_official") or "",
|
|
}
|
|
location_hint = overview["incident_location"]
|
|
sherlock_summary = ""
|
|
|
|
shape = uap.get("shape") or "unknown"
|
|
color = uap.get("color") or "unknown"
|
|
altitude = uap.get("altitude_ft") or "unknown"
|
|
speed = uap.get("speed_kts") or "unknown"
|
|
maneuvers = ", ".join(uap.get("maneuver_descriptors") or []) or "no specific maneuvers reported"
|
|
sensor = (uap.get("sensor_observations") or [{}])
|
|
sensor_str = sensor[0].get("sensor", "unknown sensor") if sensor else "unknown sensor"
|
|
|
|
if has_reference_frame:
|
|
intro = f"""USE THE ATTACHED REFERENCE FRAME as your visual starting point. This is an actual frame extracted from the original UAP video at a moment when the UAP is visible. Enhance and re-interpret this exact scene cinematically while keeping ALL the real visual elements: same camera angle, same terrain/sensor view, same UAP position, same scale, same lighting conditions of the IR/FLIR/visible sensor.
|
|
|
|
The output should look like a CINEMATIC VERSION of the same moment captured in the frame — same scene, same UAP, but rendered with higher production value and atmospheric depth. DO NOT change the location of the UAP. DO NOT invent buildings, terrain, or atmosphere that aren't in the reference frame."""
|
|
else:
|
|
intro = """Create a photorealistic conceptual reproduction of a UAP/UFO incident scene from a U.S. Department of War declassified case."""
|
|
|
|
return f"""{intro}
|
|
|
|
CASE METADATA:
|
|
- title: {title}
|
|
- narrative: {summary[:600]}
|
|
- location: {location_hint or 'unknown'}
|
|
- primary subject: {overview.get('primary_subject', '')}
|
|
- camera vantage: {overview.get('camera_perspective', 'aerial')}
|
|
- sensor depicted: {sensor_str}
|
|
|
|
UAP CHARACTERISTICS:
|
|
- shape: {shape}
|
|
- color: {color}
|
|
- altitude: {altitude}
|
|
- speed: {speed}
|
|
- maneuvers: {maneuvers}
|
|
|
|
KEY OBSERVATIONS: {sherlock_summary[:400]}
|
|
|
|
ABSOLUTE RULES:
|
|
- Do NOT add any HUD telemetry text, altitude readouts, headings, coordinates, callsigns, or date/time stamps. These would be fabricated.
|
|
- Do NOT add classification banners with specific levels (SECRET, NOFORN, etc).
|
|
- Do NOT add ANY text at all.
|
|
- Cinematic photorealism, IMAX documentary aesthetic, somber investigative mood.
|
|
- 16:9 aspect ratio.
|
|
|
|
This is a CONCEPTUAL VISUALIZATION — artistic interpretation, not evidence."""
|
|
|
|
|
|
def build_diagram_prompt(fm: dict) -> str:
|
|
"""Sherlock investigation board annotation prompt — requires reference frame."""
|
|
sherlock = fm.get("sherlock_observations") or []
|
|
sherlock_text = " | ".join(
|
|
f"[{o.get('detective_lens','?')}] {o.get('observation','')[:100]}"
|
|
for o in sherlock[:4]
|
|
)
|
|
anomalies = fm.get("anomalies_detected") or []
|
|
anomaly_text = " | ".join(
|
|
f"{a.get('kind','?')}: {a.get('description','')[:80]}"
|
|
for a in anomalies[:2]
|
|
)
|
|
return f"""USE THE ATTACHED REFERENCE FRAME from the UAP video. Transform it into a Sherlock Holmes investigative diagram board. Keep the underlying scene (slightly brightened for legibility), and overlay handwritten-style red-pen detective annotations.
|
|
|
|
CONTEXT FROM ANALYSIS:
|
|
- Sherlock observations: {sherlock_text[:500]}
|
|
- Anomalies detected: {anomaly_text[:200]}
|
|
|
|
OVERLAY ANNOTATIONS (hand-drawn in red pen on transparent overlay):
|
|
- ◯ "UAP TARGET" circled around the most likely UAP position with arrow pointing to it
|
|
- ◯ "TRACKING LOCK" or "CROSSHAIR" if a tracking marker is visible in the frame
|
|
- ◯ "OBSERVED FROM" labeling the camera vantage (cockpit, ground, etc.)
|
|
- A dashed yellow-highlighter arrow showing the inferred motion direction with label "APPROXIMATE FLIGHT PATH"
|
|
- Bottom-left annotation in small red text summarizing ONE key observation (e.g. "IR signature: linear motion, no visible exhaust — anomalous")
|
|
- Top-right small annotation: "SOURCE: DOD VIDEO, DECLASSIFIED"
|
|
|
|
STYLE:
|
|
- Annotations look hand-drawn, slightly imperfect, like a real detective pinned the photo on a corkboard and circled clues
|
|
- The base scene from the frame stays intact (don't replace it, just annotate)
|
|
- Corkboard pins in the corners
|
|
- Slight grungy texture overlay
|
|
- Forensic investigation board / vintage detective work
|
|
|
|
ABSOLUTE RULES:
|
|
- Do NOT invent specific telemetry numbers (altitude, heading, coords, timestamps)
|
|
- All annotations are INTERPRETATIONS of what's visible, not data extracted from HUD
|
|
- Do NOT remove or alter the actual scene content"""
|
|
|
|
|
|
# Gemini SDK client (lazy)
|
|
_gemini_client = None
|
|
def _get_gemini_client():
|
|
global _gemini_client
|
|
if _gemini_client is None:
|
|
api_key = os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
|
|
if not api_key:
|
|
raise RuntimeError("GEMINI_API_KEY / GOOGLE_API_KEY not set")
|
|
_gemini_client = genai.Client(api_key=api_key)
|
|
return _gemini_client
|
|
|
|
|
|
def call_nano_banana(prompt: str, out_path: Path, input_images: list[Path] | None = None, resolution: str = "2K") -> bool:
|
|
"""Direct API call to Nano Banana Pro (Gemini 3 Pro Image) with support for
|
|
MULTIPLE reference images, which the official skill script does not support."""
|
|
# If 0 or 1 image, falls back to the simpler skill script (lets it handle resolution etc.)
|
|
if not input_images or len(input_images) <= 1:
|
|
cmd = [
|
|
"uv", "run", str(NANO_BANANA_SCRIPT),
|
|
"--prompt", prompt,
|
|
"--filename", str(out_path),
|
|
"--resolution", resolution,
|
|
]
|
|
if input_images:
|
|
cmd.extend(["--input-image", str(input_images[0])])
|
|
res = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
|
|
if res.returncode != 0:
|
|
sys.stderr.write(f" ✗ Nano Banana (skill) failed: {res.stderr[-400:]}\n")
|
|
return False
|
|
return out_path.exists() and out_path.stat().st_size > 0
|
|
|
|
# Multi-image path: direct SDK call
|
|
try:
|
|
client = _get_gemini_client()
|
|
pil_images = [PILImage.open(p) for p in input_images if p.exists()]
|
|
contents = [*pil_images, prompt]
|
|
response = client.models.generate_content(
|
|
model=NANO_BANANA_MODEL,
|
|
contents=contents,
|
|
)
|
|
# Extract image bytes from response.candidates[0].content.parts
|
|
for part in response.candidates[0].content.parts:
|
|
if hasattr(part, "inline_data") and part.inline_data and part.inline_data.data:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
img = PILImage.open(BytesIO(part.inline_data.data))
|
|
img.save(out_path, "PNG")
|
|
return True
|
|
sys.stderr.write(f" ✗ Nano Banana: no image in response\n")
|
|
return False
|
|
except Exception as e:
|
|
sys.stderr.write(f" ✗ Nano Banana (SDK multi-image) failed: {e}\n")
|
|
return False
|
|
|
|
|
|
def call_codex(prompt: str, out_path: Path, input_images: list[Path] | None = None) -> bool:
|
|
"""Use codex CLI (logged via ChatGPT subscription) to generate the image.
|
|
Supports multiple reference frames — copies them all into the working dir
|
|
and references them in the prompt for image-to-image with timeline context."""
|
|
ref_section = ""
|
|
if input_images:
|
|
import shutil
|
|
existing_frames = []
|
|
for p in input_images:
|
|
if p.exists():
|
|
local = out_path.parent / p.name
|
|
if not local.exists():
|
|
shutil.copy(p, local)
|
|
existing_frames.append(p.name)
|
|
if existing_frames:
|
|
file_list = ", ".join(f"'{n}'" for n in existing_frames)
|
|
ref_section = f"""
|
|
|
|
REFERENCE FRAMES (in order of timeline): {file_list}.
|
|
These are real frames extracted from the original UAP video at different timestamps.
|
|
USE THEM as visual input for gpt-image-1's image edit/composition endpoint.
|
|
The UAP appears in these frames — preserve its position, scale, and the scene composition.
|
|
Use the multiple frames to understand UAP motion / trajectory and convey a coherent moment in the cinematic output.
|
|
"""
|
|
|
|
codex_instruction = f"""Generate ONE high-quality image and save it to '{out_path.name}' in the current directory.{ref_section}
|
|
|
|
PROMPT:
|
|
{prompt}
|
|
|
|
Use gpt-image-1's image edit (image-to-image) capability with the reference frame(s) above. Combine them as multi-image input if your tool supports it; otherwise pick the most representative one. Output resolution at least 1024x1024. Save only ONE PNG with the exact filename '{out_path.name}'. Confirm the filename after saving."""
|
|
cmd = [
|
|
"codex", "exec",
|
|
"--skip-git-repo-check",
|
|
"--sandbox", "workspace-write",
|
|
"--cd", str(out_path.parent),
|
|
codex_instruction,
|
|
]
|
|
res = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
|
|
if res.returncode != 0:
|
|
sys.stderr.write(f" ✗ Codex failed: {res.stderr[-400:]}\n")
|
|
return False
|
|
return out_path.exists() and out_path.stat().st_size > 0
|
|
|
|
|
|
def append_case_image_refs(md_path: Path, nano_path: Path | None, codex_path: Path | None, diagram_path: Path | None, ref_frame: Path | None):
|
|
"""Add case_images block to entity frontmatter."""
|
|
fm, body = read_md(md_path)
|
|
case_images = {}
|
|
if nano_path and nano_path.exists():
|
|
case_images["nano_banana"] = {
|
|
"path": str(nano_path.relative_to(UFO_ROOT)),
|
|
"model": "gemini-3-pro-image",
|
|
"synthetic": True,
|
|
"factual_data_extraction": "NONE",
|
|
"reference_frame": str(ref_frame.relative_to(UFO_ROOT)) if ref_frame else None,
|
|
}
|
|
if codex_path and codex_path.exists():
|
|
case_images["codex"] = {
|
|
"path": str(codex_path.relative_to(UFO_ROOT)),
|
|
"model": "gpt-image-1",
|
|
"synthetic": True,
|
|
"factual_data_extraction": "NONE",
|
|
"reference_frame": str(ref_frame.relative_to(UFO_ROOT)) if ref_frame else None,
|
|
}
|
|
if diagram_path and diagram_path.exists():
|
|
case_images["investigation_diagram"] = {
|
|
"path": str(diagram_path.relative_to(UFO_ROOT)),
|
|
"model": "gemini-3-pro-image",
|
|
"synthetic": True,
|
|
"factual_data_extraction": "NONE",
|
|
"reference_frame": str(ref_frame.relative_to(UFO_ROOT)) if ref_frame else None,
|
|
"annotation_style": "sherlock-holmes-investigation-board",
|
|
}
|
|
if not case_images:
|
|
return
|
|
fm["case_images"] = case_images
|
|
fm["case_images_warnings"] = [
|
|
"Conceptual visualizations only — not evidence.",
|
|
"Do NOT extract numerical claims (altitude, coords, timestamps) from these images.",
|
|
"AI-enhanced from a real video frame; UAP position and scene composition come from the frame, but rendering and any annotations are interpretive.",
|
|
]
|
|
fm["case_images_generated_at"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
|
new = f"---\n{yaml_str}---\n\n{body}" if not body.startswith("\n") else f"---\n{yaml_str}---\n{body}"
|
|
md_path.write_text(new, encoding="utf-8")
|
|
|
|
|
|
def process_entity(md_path: Path, kind: str, force: bool, skip_codex: bool, skip_nano: bool):
|
|
fm, _ = read_md(md_path)
|
|
entity_id = fm.get("video_id") if kind == "video" else fm.get("doc_id")
|
|
if not entity_id:
|
|
sys.stderr.write(f" ✗ no entity id in {md_path.name}\n")
|
|
return
|
|
out_dir = CASE_IMAGES_DIR / entity_id
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
nano_out = out_dir / "case-nanobanana.png"
|
|
codex_out = out_dir / "case-codex.png"
|
|
diagram_out = out_dir / "investigation-diagram.png"
|
|
|
|
# Collect frames as references (videos only). Multi-image lets the model
|
|
# understand motion/trajectory across the UAP timeline.
|
|
if kind == "video":
|
|
ref_frames = find_all_frames(entity_id, max_n=5)
|
|
else:
|
|
ref_frames = []
|
|
primary_frame = ref_frames[len(ref_frames) // 2] if ref_frames else None # for metadata
|
|
|
|
print(f"\n=== {entity_id} ({kind}) ===", flush=True)
|
|
if ref_frames:
|
|
print(f" reference frames ({len(ref_frames)}): {[p.name for p in ref_frames]}", flush=True)
|
|
else:
|
|
print(f" (no reference frames — text-only generation)", flush=True)
|
|
|
|
case_prompt = build_case_prompt(fm, kind, has_reference_frame=bool(ref_frames))
|
|
|
|
if not skip_nano and (force or not nano_out.exists()):
|
|
print(f" → Nano Banana (case, {len(ref_frames)} frames)…", flush=True)
|
|
if call_nano_banana(case_prompt, nano_out, input_images=ref_frames):
|
|
print(f" ✓ {nano_out.relative_to(UFO_ROOT)}", flush=True)
|
|
|
|
if not skip_codex and (force or not codex_out.exists()):
|
|
print(f" → Codex (case, {len(ref_frames)} frames)…", flush=True)
|
|
if call_codex(case_prompt, codex_out, input_images=ref_frames):
|
|
print(f" ✓ {codex_out.relative_to(UFO_ROOT)}", flush=True)
|
|
|
|
# Investigation diagram — Nano Banana, multi-image (videos only)
|
|
if ref_frames and not skip_nano and (force or not diagram_out.exists()):
|
|
diagram_prompt = build_diagram_prompt(fm)
|
|
print(f" → Nano Banana (investigation diagram, {len(ref_frames)} frames)…", flush=True)
|
|
if call_nano_banana(diagram_prompt, diagram_out, input_images=ref_frames):
|
|
print(f" ✓ {diagram_out.relative_to(UFO_ROOT)}", flush=True)
|
|
|
|
append_case_image_refs(md_path, nano_out, codex_out, diagram_out, primary_frame)
|
|
|
|
|
|
def collect_entities(kind: str, entity_id: str | None) -> list[tuple[Path, str]]:
|
|
out = []
|
|
if kind in ("videos", "both"):
|
|
for p in sorted(WIKI_VIDEOS_DIR.glob("*.md")):
|
|
if entity_id and p.stem != entity_id:
|
|
continue
|
|
out.append((p, "video"))
|
|
if kind in ("documents", "both"):
|
|
for p in sorted(WIKI_DOCS_DIR.glob("*.md")):
|
|
if entity_id and p.stem != entity_id:
|
|
continue
|
|
out.append((p, "document"))
|
|
return out
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--kind", choices=["videos", "documents", "both"], default="videos")
|
|
ap.add_argument("--entity-id", help="single entity (video_id or doc_id)")
|
|
ap.add_argument("--skip-nano", action="store_true", help="skip Nano Banana")
|
|
ap.add_argument("--skip-codex", action="store_true", help="skip Codex")
|
|
ap.add_argument("--force", action="store_true", help="re-generate even if exists")
|
|
args = ap.parse_args()
|
|
|
|
if not os.environ.get("GEMINI_API_KEY") and not args.skip_nano:
|
|
sys.stderr.write("GEMINI_API_KEY not set (needed for Nano Banana)\n")
|
|
sys.exit(2)
|
|
|
|
entities = collect_entities(args.kind, args.entity_id)
|
|
print(f"Processing {len(entities)} entit(y/ies)…")
|
|
for md_path, kind in entities:
|
|
try:
|
|
process_entity(md_path, kind, args.force, args.skip_codex, args.skip_nano)
|
|
except Exception as e:
|
|
sys.stderr.write(f"FATAL on {md_path.name}: {e}\n")
|
|
continue
|
|
|
|
# Log
|
|
if entities:
|
|
with open(LOG_PATH, "a", encoding="utf-8") as fh:
|
|
fh.write(
|
|
f"\n## {datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')} — CASE IMAGES (Phase 4.6)\n"
|
|
f"- operator: archivist + case-writer\n- script: scripts/11-generate-case-images.py\n"
|
|
f"- kind: {args.kind}\n- entities: {len(entities)}\n"
|
|
f"- skip_nano: {args.skip_nano}\n- skip_codex: {args.skip_codex}\n"
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|