diff --git a/scripts/01-convert-pdfs.sh b/scripts/01-convert-pdfs.sh index 05679fd..74e6408 100755 --- a/scripts/01-convert-pdfs.sh +++ b/scripts/01-convert-pdfs.sh @@ -115,7 +115,9 @@ convert_one_pdf() { num=$(printf '%s' "$bn" | sed -E 's/^p-([0-9]+)\.png$/\1/') if [[ "$num" =~ ^[0-9]+$ ]]; then local padded - padded=$(printf '%03d' "$num") + # Force decimal interpretation: leading zeros would make bash + # treat "008" as invalid octal under printf '%03d'. + padded=$(printf '%03d' "$((10#$num))") local new_name="p-$padded.png" if [[ "$bn" != "$new_name" ]]; then mv "$f" "$png_dir/$new_name" diff --git a/web/app/api/crop/route.ts b/web/app/api/crop/route.ts index 23d43d9..098d088 100644 --- a/web/app/api/crop/route.ts +++ b/web/app/api/crop/route.ts @@ -57,7 +57,12 @@ export async function GET(req: NextRequest) { const format = (u.searchParams.get("format") ?? "webp").toLowerCase(); const tight = u.searchParams.get("tight") !== "0"; - // Resolve source PNG + // Resolve source PNG. + // Two conventions exist in the corpus: + // 1-based: page=1 → p-001.png (most docs) + // 0-based: page=1 → p-000.png (~34 docs converted with pdftoppm -f 0) + // Try the 1-based path first; if the file doesn't exist, fall back to + // 0-based (page - 1). Idempotent — if both exist, 1-based wins. let pngPath: string; if (pngParam) { if (pngParam.includes("..")) return badRequest("png param: invalid path"); @@ -71,8 +76,12 @@ export async function GET(req: NextRequest) { pageNum = parseInt(pageStr, 10); } if (!Number.isFinite(pageNum) || pageNum < 1) return badRequest("bad page"); + const docDir = path.join(PROCESSING, "png", doc); + const oneBased = path.join(docDir, `p-${String(pageNum).padStart(3, "0")}.png`); + const zeroBased = path.join(docDir, `p-${String(pageNum - 1).padStart(3, "0")}.png`); + const { existsSync } = await import("node:fs"); + pngPath = existsSync(oneBased) ? oneBased : (existsSync(zeroBased) ? zeroBased : oneBased); pageStr = `p-${String(pageNum).padStart(3, "0")}`; - pngPath = path.join(PROCESSING, "png", doc, `${pageStr}.png`); } let buf: Buffer;