#!/usr/bin/env bash # Generate the clean LLM reading version for every document, in parallel. # # - One doc per `claude -p` (Sonnet) via 40_reading_version.py # - Skips docs that already have reading.md (idempotent — safe to re-run) # - mkdir-based per-doc lock prevents two workers racing the same doc # - WORKERS parallel workers (default 2) # # Run: # ./run_reading_parallel.sh # all docs, 2 workers # WORKERS=3 ./run_reading_parallel.sh # 3 workers # ./run_reading_parallel.sh DOC1 DOC2 # specific docs only set -uo pipefail UFO="/Users/guto/ufo" RAW="$UFO/raw" GEN="$UFO/scripts/synthesize/40_reading_version.py" WORKERS="${WORKERS:-2}" if [ "$#" -gt 0 ]; then DOCS=("$@") else DOCS=() for d in "$RAW"/*--subagent; do [ -f "$d/_index.json" ] || continue DOCS+=("$(basename "$d" | sed 's/--subagent$//')") done fi echo "=== reading-version generator ===" echo " docs queued: ${#DOCS[@]}" echo " workers: $WORKERS" echo "" process_one() { local doc_id="$1" local sub="$RAW/$doc_id--subagent" local out="$sub/reading.md" local log="$sub/_reading.log" local lock="$sub/.reading.lock" if [ -f "$out" ]; then echo "[SKIP] $doc_id (already has reading.md)" return 0 fi if ! mkdir "$lock" 2>/dev/null; then echo "[LOCK] $doc_id (another worker)" return 0 fi trap "rmdir '$lock' 2>/dev/null || true" EXIT local t0=$(date +%s) echo "[BEGIN] $doc_id" if python3 "$GEN" "$doc_id" > "$log" 2>&1; then echo "[OK] $doc_id ($(($(date +%s) - t0))s)" else echo "[FAIL] $doc_id ($(($(date +%s) - t0))s) — see $log" fi rmdir "$lock" 2>/dev/null || true trap - EXIT } export -f process_one export RAW GEN printf '%s\n' "${DOCS[@]}" | xargs -n 1 -P "$WORKERS" -I {} bash -c 'process_one "$@"' _ {} echo "" echo "=== Done. reading.md count: ===" ls "$RAW"/*--subagent/reading.md 2>/dev/null | wc -l