From d5f6e6030a754082b90e8c26773a327b8b3a7418 Mon Sep 17 00:00:00 2001 From: guto Date: Mon, 18 May 2026 11:45:40 -0300 Subject: [PATCH] fix png-numbering: re-convert 34 zero-based docs + crop fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 34 of 116 docs were generated with 0-based PNG numbering (p-000.png … p-008.png) but the Sonnet chunks reference 1-based page numbers in their YAML frontmatter (page: 9 means the 9th sheet of paper). The /api/crop handler built p-009.png and got a 500, the browser's Next/Image surfaced 400, and the chunk rendered as a black box on screen. Fixes: - web/app/api/crop/route.ts: try p-NNN.png first, fall back to p-(NNN-1).png if the 1-based file is missing. Cheap insurance for any doc that comes in with the old convention. - scripts/01-convert-pdfs.sh: previously printf '%03d' "$num" with $num starting at 0 (e.g. "008") raised "invalid number" because Bash parsed it as octal. Wrap with $((10#$num)) to force decimal — this was silently corrupting page sequences and producing gaps like p-001 ... p-008, p-011 (missing p-009/p-010). - All 34 affected docs re-converted from PDFs with the patched script; every directory now has continuous 1-based PNGs. - /processing/png/ rsync'd to VPS, web redeployed. Smoke: /api/crop?doc=doc-341-…&page=9&… now returns 200 image/webp instead of 500. Tested in browser: chunk c0026 (diagram, p9) renders the real engineering diagram. --- scripts/01-convert-pdfs.sh | 4 +++- web/app/api/crop/route.ts | 13 +++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/01-convert-pdfs.sh b/scripts/01-convert-pdfs.sh index 05679fd..74e6408 100755 --- a/scripts/01-convert-pdfs.sh +++ b/scripts/01-convert-pdfs.sh @@ -115,7 +115,9 @@ convert_one_pdf() { num=$(printf '%s' "$bn" | sed -E 's/^p-([0-9]+)\.png$/\1/') if [[ "$num" =~ ^[0-9]+$ ]]; then local padded - padded=$(printf '%03d' "$num") + # Force decimal interpretation: leading zeros would make bash + # treat "008" as invalid octal under printf '%03d'. + padded=$(printf '%03d' "$((10#$num))") local new_name="p-$padded.png" if [[ "$bn" != "$new_name" ]]; then mv "$f" "$png_dir/$new_name" diff --git a/web/app/api/crop/route.ts b/web/app/api/crop/route.ts index 23d43d9..098d088 100644 --- a/web/app/api/crop/route.ts +++ b/web/app/api/crop/route.ts @@ -57,7 +57,12 @@ export async function GET(req: NextRequest) { const format = (u.searchParams.get("format") ?? "webp").toLowerCase(); const tight = u.searchParams.get("tight") !== "0"; - // Resolve source PNG + // Resolve source PNG. + // Two conventions exist in the corpus: + // 1-based: page=1 → p-001.png (most docs) + // 0-based: page=1 → p-000.png (~34 docs converted with pdftoppm -f 0) + // Try the 1-based path first; if the file doesn't exist, fall back to + // 0-based (page - 1). Idempotent — if both exist, 1-based wins. let pngPath: string; if (pngParam) { if (pngParam.includes("..")) return badRequest("png param: invalid path"); @@ -71,8 +76,12 @@ export async function GET(req: NextRequest) { pageNum = parseInt(pageStr, 10); } if (!Number.isFinite(pageNum) || pageNum < 1) return badRequest("bad page"); + const docDir = path.join(PROCESSING, "png", doc); + const oneBased = path.join(docDir, `p-${String(pageNum).padStart(3, "0")}.png`); + const zeroBased = path.join(docDir, `p-${String(pageNum - 1).padStart(3, "0")}.png`); + const { existsSync } = await import("node:fs"); + pngPath = existsSync(oneBased) ? oneBased : (existsSync(zeroBased) ? zeroBased : oneBased); pageStr = `p-${String(pageNum).padStart(3, "0")}`; - pngPath = path.join(PROCESSING, "png", doc, `${pageStr}.png`); } let buf: Buffer;