79 lines
2.5 KiB
Bash
Executable file
79 lines
2.5 KiB
Bash
Executable file
#!/usr/bin/env bash
|
||
# auto-resume-batch.sh — polls Anthropic quota every 30min via a tiny probe call.
|
||
# When a small probe doc succeeds (or the probe also bails quickly with quota),
|
||
# decides whether to launch the full batch.
|
||
#
|
||
# Stops itself when:
|
||
# - batch reports successful completion (summary.json with successes >= queue_size)
|
||
# - a probe goes 90s without quota error (interpreted as quota back)
|
||
#
|
||
# Usage:
|
||
# nohup ./scripts/29-auto-resume-batch.sh > /tmp/auto-resume.log 2>&1 &
|
||
|
||
set -uo pipefail
|
||
|
||
UFO_ROOT=/Users/guto/ufo
|
||
LOG_DIR="$UFO_ROOT/raw/_batch-rebuild"
|
||
SLEEP_BETWEEN=1800 # 30min between probes
|
||
MAX_ATTEMPTS=24 # 24 × 30min = 12h ceiling
|
||
|
||
log() { echo "[$(date -u +%H:%M:%SZ)] $*"; }
|
||
|
||
attempt=0
|
||
while [ $attempt -lt $MAX_ATTEMPTS ]; do
|
||
attempt=$((attempt + 1))
|
||
log "attempt $attempt/$MAX_ATTEMPTS — probing batch"
|
||
|
||
# Check if anything is already running — bail early
|
||
if pgrep -f "28-batch-rebuild-all.py" >/dev/null; then
|
||
log "batch already running, sleeping ${SLEEP_BETWEEN}s and re-checking"
|
||
sleep $SLEEP_BETWEEN
|
||
continue
|
||
fi
|
||
|
||
# Snapshot current archive count
|
||
before=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ')
|
||
log " archived before: $before"
|
||
|
||
# Kick off batch (will early-abort if quota still throttled)
|
||
cd "$UFO_ROOT"
|
||
python3 scripts/28-batch-rebuild-all.py --workers 2 \
|
||
> /tmp/batch-rebuild-auto-$attempt.log 2>&1 &
|
||
PID=$!
|
||
log " started python orchestrator PID=$PID"
|
||
|
||
# Wait for either:
|
||
# - process exits (early-abort or done)
|
||
# - 90s elapsed without exit (means it's actually running real work)
|
||
for i in $(seq 1 90); do
|
||
if ! kill -0 $PID 2>/dev/null; then
|
||
break
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
if kill -0 $PID 2>/dev/null; then
|
||
# Still running after 90s → real work, leave it alone and exit auto-resume
|
||
log " ✓ batch is making real progress (still running after 90s)"
|
||
log " auto-resume exits; full batch continues in background"
|
||
log " monitor: tail -f /tmp/batch-rebuild-auto-$attempt.log"
|
||
exit 0
|
||
fi
|
||
|
||
# Process exited within 90s — must have hit quota or completed
|
||
after=$(ls -d "$UFO_ROOT"/raw/*--subagent 2>/dev/null | wc -l | tr -d ' ')
|
||
delta=$((after - before))
|
||
log " process exited fast (likely quota); archived delta: $delta"
|
||
|
||
if [ "$delta" -gt 0 ]; then
|
||
log " ✓ some docs were processed — re-launching immediately"
|
||
sleep 5
|
||
continue
|
||
fi
|
||
|
||
log " 💤 quota still throttled; sleeping ${SLEEP_BETWEEN}s"
|
||
sleep $SLEEP_BETWEEN
|
||
done
|
||
|
||
log "max attempts reached, giving up. re-run manually."
|
||
exit 1
|