#!/usr/bin/env bash # auto-resume-batch.sh — polls Anthropic quota every 30min via a tiny probe call. # When a small probe doc succeeds (or the probe also bails quickly with quota), # decides whether to launch the full batch. # # Stops itself when: # - batch reports successful completion (summary.json with successes >= queue_size) # - a probe goes 90s without quota error (interpreted as quota back) # # Usage: # nohup ./scripts/29-auto-resume-batch.sh > /tmp/auto-resume.log 2>&1 & set -uo pipefail UFO_ROOT=/Users/guto/ufo LOG_DIR="$UFO_ROOT/raw/_batch-rebuild" SLEEP_BETWEEN=1800 # 30min between probes MAX_ATTEMPTS=24 # 24 × 30min = 12h ceiling log() { echo "[$(date -u +%H:%M:%SZ)] $*"; } attempt=0 while [ $attempt -lt $MAX_ATTEMPTS ]; do attempt=$((attempt + 1)) log "attempt $attempt/$MAX_ATTEMPTS — probing batch" # Check if anything is already running — bail early if pgrep -f "28-batch-rebuild-all.py" >/dev/null; then log "batch already running, sleeping ${SLEEP_BETWEEN}s and re-checking" sleep $SLEEP_BETWEEN continue fi # Snapshot current archive count before=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ') log " archived before: $before" # Kick off batch (will early-abort if quota still throttled) cd "$UFO_ROOT" python3 scripts/28-batch-rebuild-all.py --workers 2 \ > /tmp/batch-rebuild-auto-$attempt.log 2>&1 & PID=$! log " started python orchestrator PID=$PID" # Wait for either: # - process exits (early-abort or done) # - 90s elapsed without exit (means it's actually running real work) for i in $(seq 1 90); do if ! kill -0 $PID 2>/dev/null; then break fi sleep 1 done if kill -0 $PID 2>/dev/null; then # Still running after 90s → real work, leave it alone and exit auto-resume log " ✓ batch is making real progress (still running after 90s)" log " auto-resume exits; full batch continues in background" log " monitor: tail -f /tmp/batch-rebuild-auto-$attempt.log" exit 0 fi # Process exited within 90s — must have hit quota or completed after=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ') delta=$((after - before)) log " process exited fast (likely quota); archived delta: $delta" if [ "$delta" -gt 0 ]; then log " ✓ some docs were processed — re-launching immediately" sleep 5 continue fi log " 💤 quota still throttled; sleeping ${SLEEP_BETWEEN}s" sleep $SLEEP_BETWEEN done log "max attempts reached, giving up. re-run manually." exit 1