#!/usr/bin/env bash # # 99-finalize-pipeline.sh — Encadeia fases 3-retry → 4 → 4.8 → 5 → 6 → 7 → 8 → 9 # após o término da Fase 3 (vision Haiku). # # Cada fase é idempotente: re-rodar é seguro. # # Log único em /tmp/ufo-finalize.log com prefixo de fase, append-only. set -uo pipefail ROOT="/Users/guto/ufo" LOG="/tmp/ufo-finalize.log" PY="python3" cd "$ROOT" || exit 1 phase() { local name="$1"; shift echo "" | tee -a "$LOG" echo "================================================================" | tee -a "$LOG" echo "=== $(date -u +%Y-%m-%dT%H:%M:%SZ) — $name" | tee -a "$LOG" echo "================================================================" | tee -a "$LOG" "$@" 2>&1 | tee -a "$LOG" local rc=${PIPESTATUS[0]} echo "=== rc=$rc" | tee -a "$LOG" return $rc } echo "" >> "$LOG" echo "================================================================" >> "$LOG" echo "==== FINALIZE PIPELINE STARTED $(date -u +%Y-%m-%dT%H:%M:%SZ) ====" >> "$LOG" echo "================================================================" >> "$LOG" # --- Phase 3 pass3 — last safety net (idempotent, processes only failures) --- phase "Phase 3 pass3 (final retry)" \ $PY scripts/02-vision-page.py --all --workers 3 || true # --- Phase 4 — Aggregate pages into document.md --- phase "Phase 4 — build documents" \ $PY scripts/14-build-document-md.py || true # --- Phase 4.8 retry — table CSV extraction (one had failed JSON parse) --- phase "Phase 4.8 — retry remaining table CSVs" \ $PY scripts/16-extract-table-csv.py || true # --- Phase 5 — Entity dedup / upsert --- phase "Phase 5 — entity dedup" \ $PY scripts/03-dedup-entities.py || true # --- Phase 7 — Crop bboxes (needs page.md but not enrichment) --- phase "Phase 7 — crop bboxes" \ $PY scripts/05-crop-bboxes.py || true # --- Phase 8 — Graph export (after entity stubs exist) --- phase "Phase 8 — graph export" \ $PY scripts/06-graph-export.py || true # --- Phase 6 — Enrichment (heaviest, runs after dedup creates entity stubs) --- phase "Phase 6 — enrichment (deep tier only, 3 workers)" \ $PY scripts/17-enrich-entities.py --all --tier deep --workers 3 || true # --- Phase 9 — Lint (LAST: rebuilds mentioned_in[] after enrichment) --- phase "Phase 9 — lint + backlink rebuild" \ $PY scripts/04-lint.py || true # --- Final stats --- echo "" | tee -a "$LOG" echo "================================================================" | tee -a "$LOG" echo "==== FINALIZE PIPELINE FINISHED $(date -u +%Y-%m-%dT%H:%M:%SZ) ====" | tee -a "$LOG" echo "================================================================" | tee -a "$LOG" PAGES=$(find "$ROOT/wiki/pages" -name "p*.md" 2>/dev/null | wc -l | tr -d ' ') DOCS=$(ls "$ROOT/wiki/documents/" 2>/dev/null | wc -l | tr -d ' ') ENTITIES=$(find "$ROOT/wiki/entities" -name "*.md" 2>/dev/null | wc -l | tr -d ' ') ENRICHED=$(grep -l "enrichment_status: deep\|enrichment_status: shallow" "$ROOT/wiki/entities/"*/*.md 2>/dev/null | wc -l | tr -d ' ') TABLES=$(ls "$ROOT/wiki/tables/" 2>/dev/null | wc -l | tr -d ' ') echo "pages: $PAGES · documents: $DOCS · entities: $ENTITIES (enriched: $ENRICHED) · tables: $TABLES" | tee -a "$LOG"