disclosure-bureau/scripts/29-auto-resume-batch.sh

79 lines
2.5 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# auto-resume-batch.sh — polls Anthropic quota every 30min via a tiny probe call.
# When a small probe doc succeeds (or the probe also bails quickly with quota),
# decides whether to launch the full batch.
#
# Stops itself when:
# - batch reports successful completion (summary.json with successes >= queue_size)
# - a probe goes 90s without quota error (interpreted as quota back)
#
# Usage:
# nohup ./scripts/29-auto-resume-batch.sh > /tmp/auto-resume.log 2>&1 &
set -uo pipefail
UFO_ROOT=/Users/guto/ufo
LOG_DIR="$UFO_ROOT/raw/_batch-rebuild"
SLEEP_BETWEEN=1800 # 30min between probes
MAX_ATTEMPTS=24 # 24 × 30min = 12h ceiling
log() { echo "[$(date -u +%H:%M:%SZ)] $*"; }
attempt=0
while [ $attempt -lt $MAX_ATTEMPTS ]; do
attempt=$((attempt + 1))
log "attempt $attempt/$MAX_ATTEMPTS — probing batch"
# Check if anything is already running — bail early
if pgrep -f "28-batch-rebuild-all.py" >/dev/null; then
log "batch already running, sleeping ${SLEEP_BETWEEN}s and re-checking"
sleep $SLEEP_BETWEEN
continue
fi
# Snapshot current archive count
before=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ')
log " archived before: $before"
# Kick off batch (will early-abort if quota still throttled)
cd "$UFO_ROOT"
python3 scripts/28-batch-rebuild-all.py --workers 2 \
> /tmp/batch-rebuild-auto-$attempt.log 2>&1 &
PID=$!
log " started python orchestrator PID=$PID"
# Wait for either:
# - process exits (early-abort or done)
# - 90s elapsed without exit (means it's actually running real work)
for i in $(seq 1 90); do
if ! kill -0 $PID 2>/dev/null; then
break
fi
sleep 1
done
if kill -0 $PID 2>/dev/null; then
# Still running after 90s → real work, leave it alone and exit auto-resume
log " ✓ batch is making real progress (still running after 90s)"
log " auto-resume exits; full batch continues in background"
log " monitor: tail -f /tmp/batch-rebuild-auto-$attempt.log"
exit 0
fi
# Process exited within 90s — must have hit quota or completed
after=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ')
delta=$((after - before))
log " process exited fast (likely quota); archived delta: $delta"
if [ "$delta" -gt 0 ]; then
log " ✓ some docs were processed — re-launching immediately"
sleep 5
continue
fi
log " 💤 quota still throttled; sleeping ${SLEEP_BETWEEN}s"
sleep $SLEEP_BETWEEN
done
log "max attempts reached, giving up. re-run manually."
exit 1