diff --git a/scripts/synthesize/32_reprocess_missing_pages.py b/scripts/synthesize/32_reprocess_missing_pages.py index 37ad996..7c26ab7 100644 --- a/scripts/synthesize/32_reprocess_missing_pages.py +++ b/scripts/synthesize/32_reprocess_missing_pages.py @@ -321,6 +321,10 @@ def process_one_page(doc_id: str, page_num: int) -> tuple[bool, int]: except Exception as e: print(f" [ERR ] {doc_id} p{page_num:03d} — integrate: {e}", flush=True) return (False, 0) + # Keep total_pages in sync with the real max page (recovered pages extend it) + max_page = max((c.get("page", 0) for c in idx.get("chunks") or []), default=0) + if max_page > idx.get("total_pages", 0): + idx["total_pages"] = max_page idx_path.write_text(json.dumps(idx, indent=2, ensure_ascii=False), encoding="utf-8") print(f" [OK ] {doc_id} p{page_num:03d} — {n} chunks", flush=True) return (True, n)