disclosure-bureau/scripts/09-extract-uap-frames.py

234 lines
8 KiB
Python
Executable file

#!/usr/bin/env python3
"""
09-extract-uap-frames.py — Extract key UAP frames from videos via ffmpeg
For each video analyzed by 08-video-analysis.py, read the Gemini JSON output
and extract still frames at the moments where the UAP is visible:
- first_visible_at (UAP enters frame)
- midpoint (visual peak)
- last_visible_at (UAP exits frame)
- additional samples every 1s within the visible window
Frames are written to /Users/guto/ufo/processing/uap-frames/<video-id>/
as JPEG at high quality (q=2). Filenames encode the timestamp:
frame-00-00-first.jpg
frame-00-02-mid.jpg
frame-00-04-last.jpg
frame-00-01-sample.jpg
...
The frame paths are appended back to the video's frontmatter under
`uap_frames` for traceability.
Usage:
./09-extract-uap-frames.py # all analyzed videos
./09-extract-uap-frames.py --video-id dod-111689005 # single video
./09-extract-uap-frames.py --force # re-extract
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
sys.stderr.write("Missing pyyaml. Run: pip3 install pyyaml\n")
sys.exit(1)
UFO_ROOT = Path("/Users/guto/ufo")
VIDEOS_DIR = UFO_ROOT / "raw" / "videos"
ANALYSIS_DIR = UFO_ROOT / "processing" / "video-analysis"
FRAMES_DIR = UFO_ROOT / "processing" / "uap-frames"
WIKI_VIDEOS_DIR = UFO_ROOT / "wiki" / "videos"
def parse_timestamp(ts: str) -> float | None:
"""Parse 'mm:ss' or 'h:mm:ss' or 'ss' into seconds (float)."""
if not ts:
return None
ts = ts.strip()
parts = ts.split(":")
try:
if len(parts) == 1:
return float(parts[0])
if len(parts) == 2:
return int(parts[0]) * 60 + float(parts[1])
if len(parts) == 3:
return int(parts[0]) * 3600 + int(parts[1]) * 60 + float(parts[2])
except ValueError:
return None
return None
def find_video_path(video_id: str) -> Path | None:
"""Map video_id back to the original .mp4 in raw/videos/."""
# Try a few derivations
for v in VIDEOS_DIR.glob("*.mp4"):
stem = v.stem
# video_id is lowercase kebab; raw is uppercase with underscores
normalized = re.sub(r"[^a-z0-9]+", "-", stem.lower()).strip("-")
if normalized == video_id or f"vid-{normalized}" == video_id:
return v
return None
def extract_frame(video_path: Path, timestamp_s: float, out_path: Path) -> bool:
"""Extract a single JPEG frame at the given timestamp using ffmpeg."""
out_path.parent.mkdir(parents=True, exist_ok=True)
cmd = [
"ffmpeg",
"-y", # overwrite
"-loglevel", "error",
"-ss", f"{timestamp_s:.3f}",
"-i", str(video_path),
"-frames:v", "1",
"-q:v", "2", # high quality JPEG
str(out_path),
]
res = subprocess.run(cmd, capture_output=True, text=True, check=False)
if res.returncode != 0 or not out_path.exists() or out_path.stat().st_size == 0:
sys.stderr.write(f" ✗ ffmpeg failed for {timestamp_s:.2f}s: {res.stderr[:200]}\n")
return False
return True
def collect_extraction_points(analysis: dict) -> list[tuple[float, str]]:
"""Return list of (timestamp_seconds, label) to extract."""
uap = analysis.get("uap_observation_fields") or {}
first = parse_timestamp(uap.get("first_visible_at", ""))
last = parse_timestamp(uap.get("last_visible_at", ""))
overview = analysis.get("video_overview") or {}
duration = overview.get("duration_seconds") or 0
points: list[tuple[float, str]] = []
if first is None and last is None:
# No UAP timestamps — sample evenly
if duration > 0:
for i in range(min(5, int(duration) + 1)):
t = duration * (i + 0.5) / 5
points.append((t, f"sample-{i:02d}"))
return points
first = first if first is not None else 0.0
last = last if last is not None else first + 1.0
if last <= first:
last = first + 0.5
# Always include first, mid, last
points.append((first, "first"))
mid = (first + last) / 2
if abs(mid - first) > 0.4:
points.append((mid, "mid"))
if last - first > 0.6:
points.append((last, "last"))
# Sample every ~1s within window
cur = first + 1.0
sample_idx = 0
while cur < last - 0.2:
# avoid duplicating mid
if abs(cur - mid) > 0.5:
points.append((cur, f"sample-{sample_idx:02d}"))
sample_idx += 1
cur += 1.0
return points
def format_filename(t: float, label: str) -> str:
"""frame-MM-SS[-fff]-label.jpg"""
mm = int(t // 60)
ss = t - mm * 60
return f"frame-{mm:02d}-{ss:05.2f}-{label}.jpg".replace(".", "_", 1).replace(".jpg", "", 1)[:-1] + ".jpg"
def process_video(video_id: str, force: bool = False) -> dict:
json_path = ANALYSIS_DIR / f"{video_id}.json"
if not json_path.exists():
return {"video_id": video_id, "status": "no-analysis", "frames": []}
data = json.loads(json_path.read_text(encoding="utf-8"))
analysis = data.get("analysis", {})
video_path = find_video_path(video_id)
if not video_path:
return {"video_id": video_id, "status": "no-source-video", "frames": []}
frames_subdir = FRAMES_DIR / video_id
if frames_subdir.exists() and not force and any(frames_subdir.glob("*.jpg")):
existing = [str(p.relative_to(UFO_ROOT)) for p in sorted(frames_subdir.glob("*.jpg"))]
return {"video_id": video_id, "status": "skipped-existing", "frames": existing}
points = collect_extraction_points(analysis)
if not points:
return {"video_id": video_id, "status": "no-extraction-points", "frames": []}
frames_subdir.mkdir(parents=True, exist_ok=True)
extracted = []
for t, label in points:
fname = format_filename(t, label)
out = frames_subdir / fname
if extract_frame(video_path, t, out):
extracted.append(str(out.relative_to(UFO_ROOT)))
print(f"{video_id} @ {t:6.2f}s [{label:8}] → {fname}", flush=True)
return {"video_id": video_id, "status": "ok", "frames": extracted}
def append_frames_to_md(video_id: str, frames: list[str]):
"""Add `uap_frames` list to the wiki/videos/<video-id>.md frontmatter."""
md_path = WIKI_VIDEOS_DIR / f"{video_id}.md"
if not md_path.exists():
return
content = md_path.read_text(encoding="utf-8")
if not content.startswith("---"):
return
end = content.find("---", 4)
if end == -1:
return
try:
fm = yaml.safe_load(content[3:end].strip()) or {}
except yaml.YAMLError:
return
body = content[end + 3 :].lstrip("\n")
fm["uap_frames"] = frames
fm["uap_frames_extracted_at"] = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
yaml_str = yaml.dump(fm, allow_unicode=True, sort_keys=False, default_flow_style=False)
new = f"---\n{yaml_str}---\n\n{body}"
md_path.write_text(new, encoding="utf-8")
def main():
ap = argparse.ArgumentParser(description="Extract UAP frames from analyzed videos via ffmpeg.")
g = ap.add_mutually_exclusive_group()
g.add_argument("--video-id", help="single video id (e.g. dod-111689005)")
g.add_argument("--all", action="store_true", help="all analyzed videos (default)")
ap.add_argument("--force", action="store_true", help="re-extract even if frames exist")
args = ap.parse_args()
if args.video_id:
targets = [args.video_id]
else:
targets = sorted(p.stem for p in ANALYSIS_DIR.glob("*.json"))
print(f"Processing {len(targets)} video(s)…")
for vid in targets:
res = process_video(vid, force=args.force)
if res["status"] == "ok":
append_frames_to_md(vid, res["frames"])
print(f"{vid}: {len(res['frames'])} frames extracted, md updated")
else:
print(f"{vid}: {res['status']}")
if __name__ == "__main__":
main()