70 lines
1.9 KiB
Bash
70 lines
1.9 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# Generate the clean LLM reading version for every document, in parallel.
|
||
|
|
#
|
||
|
|
# - One doc per `claude -p` (Sonnet) via 40_reading_version.py
|
||
|
|
# - Skips docs that already have reading.md (idempotent — safe to re-run)
|
||
|
|
# - mkdir-based per-doc lock prevents two workers racing the same doc
|
||
|
|
# - WORKERS parallel workers (default 2)
|
||
|
|
#
|
||
|
|
# Run:
|
||
|
|
# ./run_reading_parallel.sh # all docs, 2 workers
|
||
|
|
# WORKERS=3 ./run_reading_parallel.sh # 3 workers
|
||
|
|
# ./run_reading_parallel.sh DOC1 DOC2 # specific docs only
|
||
|
|
set -uo pipefail
|
||
|
|
|
||
|
|
UFO="/Users/guto/ufo"
|
||
|
|
RAW="$UFO/raw"
|
||
|
|
GEN="$UFO/scripts/synthesize/40_reading_version.py"
|
||
|
|
WORKERS="${WORKERS:-2}"
|
||
|
|
|
||
|
|
if [ "$#" -gt 0 ]; then
|
||
|
|
DOCS=("$@")
|
||
|
|
else
|
||
|
|
DOCS=()
|
||
|
|
for d in "$RAW"/*--subagent; do
|
||
|
|
[ -f "$d/_index.json" ] || continue
|
||
|
|
DOCS+=("$(basename "$d" | sed 's/--subagent$//')")
|
||
|
|
done
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo "=== reading-version generator ==="
|
||
|
|
echo " docs queued: ${#DOCS[@]}"
|
||
|
|
echo " workers: $WORKERS"
|
||
|
|
echo ""
|
||
|
|
|
||
|
|
process_one() {
|
||
|
|
local doc_id="$1"
|
||
|
|
local sub="$RAW/$doc_id--subagent"
|
||
|
|
local out="$sub/reading.md"
|
||
|
|
local log="$sub/_reading.log"
|
||
|
|
local lock="$sub/.reading.lock"
|
||
|
|
|
||
|
|
if [ -f "$out" ]; then
|
||
|
|
echo "[SKIP] $doc_id (already has reading.md)"
|
||
|
|
return 0
|
||
|
|
fi
|
||
|
|
if ! mkdir "$lock" 2>/dev/null; then
|
||
|
|
echo "[LOCK] $doc_id (another worker)"
|
||
|
|
return 0
|
||
|
|
fi
|
||
|
|
trap "rmdir '$lock' 2>/dev/null || true" EXIT
|
||
|
|
|
||
|
|
local t0=$(date +%s)
|
||
|
|
echo "[BEGIN] $doc_id"
|
||
|
|
if python3 "$GEN" "$doc_id" > "$log" 2>&1; then
|
||
|
|
echo "[OK] $doc_id ($(($(date +%s) - t0))s)"
|
||
|
|
else
|
||
|
|
echo "[FAIL] $doc_id ($(($(date +%s) - t0))s) — see $log"
|
||
|
|
fi
|
||
|
|
rmdir "$lock" 2>/dev/null || true
|
||
|
|
trap - EXIT
|
||
|
|
}
|
||
|
|
export -f process_one
|
||
|
|
export RAW GEN
|
||
|
|
|
||
|
|
printf '%s\n' "${DOCS[@]}" | xargs -n 1 -P "$WORKERS" -I {} bash -c 'process_one "$@"' _ {}
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== Done. reading.md count: ==="
|
||
|
|
ls "$RAW"/*--subagent/reading.md 2>/dev/null | wc -l
|