79 lines
3.1 KiB
Bash
Executable file
79 lines
3.1 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
#
|
|
# 99-finalize-pipeline.sh — Encadeia fases 3-retry → 4 → 4.8 → 5 → 6 → 7 → 8 → 9
|
|
# após o término da Fase 3 (vision Haiku).
|
|
#
|
|
# Cada fase é idempotente: re-rodar é seguro.
|
|
#
|
|
# Log único em /tmp/ufo-finalize.log com prefixo de fase, append-only.
|
|
|
|
set -uo pipefail
|
|
|
|
ROOT="/Users/guto/ufo"
|
|
LOG="/tmp/ufo-finalize.log"
|
|
PY="python3"
|
|
|
|
cd "$ROOT" || exit 1
|
|
|
|
phase() {
|
|
local name="$1"; shift
|
|
echo "" | tee -a "$LOG"
|
|
echo "================================================================" | tee -a "$LOG"
|
|
echo "=== $(date -u +%Y-%m-%dT%H:%M:%SZ) — $name" | tee -a "$LOG"
|
|
echo "================================================================" | tee -a "$LOG"
|
|
"$@" 2>&1 | tee -a "$LOG"
|
|
local rc=${PIPESTATUS[0]}
|
|
echo "=== rc=$rc" | tee -a "$LOG"
|
|
return $rc
|
|
}
|
|
|
|
echo "" >> "$LOG"
|
|
echo "================================================================" >> "$LOG"
|
|
echo "==== FINALIZE PIPELINE STARTED $(date -u +%Y-%m-%dT%H:%M:%SZ) ====" >> "$LOG"
|
|
echo "================================================================" >> "$LOG"
|
|
|
|
# --- Phase 3 pass3 — last safety net (idempotent, processes only failures) ---
|
|
phase "Phase 3 pass3 (final retry)" \
|
|
$PY scripts/02-vision-page.py --all --workers 3 || true
|
|
|
|
# --- Phase 4 — Aggregate pages into document.md ---
|
|
phase "Phase 4 — build documents" \
|
|
$PY scripts/14-build-document-md.py || true
|
|
|
|
# --- Phase 4.8 retry — table CSV extraction (one had failed JSON parse) ---
|
|
phase "Phase 4.8 — retry remaining table CSVs" \
|
|
$PY scripts/16-extract-table-csv.py || true
|
|
|
|
# --- Phase 5 — Entity dedup / upsert ---
|
|
phase "Phase 5 — entity dedup" \
|
|
$PY scripts/03-dedup-entities.py || true
|
|
|
|
# --- Phase 7 — Crop bboxes (needs page.md but not enrichment) ---
|
|
phase "Phase 7 — crop bboxes" \
|
|
$PY scripts/05-crop-bboxes.py || true
|
|
|
|
# --- Phase 8 — Graph export (after entity stubs exist) ---
|
|
phase "Phase 8 — graph export" \
|
|
$PY scripts/06-graph-export.py || true
|
|
|
|
# --- Phase 6 — Enrichment (heaviest, runs after dedup creates entity stubs) ---
|
|
phase "Phase 6 — enrichment (deep tier only, 3 workers)" \
|
|
$PY scripts/17-enrich-entities.py --all --tier deep --workers 3 || true
|
|
|
|
# --- Phase 9 — Lint (LAST: rebuilds mentioned_in[] after enrichment) ---
|
|
phase "Phase 9 — lint + backlink rebuild" \
|
|
$PY scripts/04-lint.py || true
|
|
|
|
# --- Final stats ---
|
|
echo "" | tee -a "$LOG"
|
|
echo "================================================================" | tee -a "$LOG"
|
|
echo "==== FINALIZE PIPELINE FINISHED $(date -u +%Y-%m-%dT%H:%M:%SZ) ====" | tee -a "$LOG"
|
|
echo "================================================================" | tee -a "$LOG"
|
|
|
|
PAGES=$(find "$ROOT/wiki/pages" -name "p*.md" 2>/dev/null | wc -l | tr -d ' ')
|
|
DOCS=$(ls "$ROOT/wiki/documents/" 2>/dev/null | wc -l | tr -d ' ')
|
|
ENTITIES=$(find "$ROOT/wiki/entities" -name "*.md" 2>/dev/null | wc -l | tr -d ' ')
|
|
ENRICHED=$(grep -l "enrichment_status: deep\|enrichment_status: shallow" "$ROOT/wiki/entities/"*/*.md 2>/dev/null | wc -l | tr -d ' ')
|
|
TABLES=$(ls "$ROOT/wiki/tables/" 2>/dev/null | wc -l | tr -d ' ')
|
|
|
|
echo "pages: $PAGES · documents: $DOCS · entities: $ENTITIES (enriched: $ENRICHED) · tables: $TABLES" | tee -a "$LOG"
|