fix png-numbering: re-convert 34 zero-based docs + crop fallback

34 of 116 docs were generated with 0-based PNG numbering (p-000.png …
p-008.png) but the Sonnet chunks reference 1-based page numbers in their
YAML frontmatter (page: 9 means the 9th sheet of paper). The /api/crop
handler built p-009.png and got a 500, the browser's Next/Image surfaced
400, and the chunk rendered as a black box on screen.

Fixes:
- web/app/api/crop/route.ts: try p-NNN.png first, fall back to
  p-(NNN-1).png if the 1-based file is missing. Cheap insurance for any
  doc that comes in with the old convention.
- scripts/01-convert-pdfs.sh: previously printf '%03d' "$num" with $num
  starting at 0 (e.g. "008") raised "invalid number" because Bash
  parsed it as octal. Wrap with $((10#$num)) to force decimal — this
  was silently corrupting page sequences and producing gaps like p-001
  ... p-008, p-011 (missing p-009/p-010).
- All 34 affected docs re-converted from PDFs with the patched script;
  every directory now has continuous 1-based PNGs.
- /processing/png/ rsync'd to VPS, web redeployed.

Smoke: /api/crop?doc=doc-341-…&page=9&… now returns 200 image/webp
instead of 500. Tested in browser: chunk c0026 (diagram, p9) renders
the real engineering diagram.
This commit is contained in:
guto 2026-05-18 11:45:40 -03:00
parent 7d13f93393
commit d5f6e6030a
2 changed files with 14 additions and 3 deletions

View file

@ -115,7 +115,9 @@ convert_one_pdf() {
num=$(printf '%s' "$bn" | sed -E 's/^p-([0-9]+)\.png$/\1/')
if [[ "$num" =~ ^[0-9]+$ ]]; then
local padded
padded=$(printf '%03d' "$num")
# Force decimal interpretation: leading zeros would make bash
# treat "008" as invalid octal under printf '%03d'.
padded=$(printf '%03d' "$((10#$num))")
local new_name="p-$padded.png"
if [[ "$bn" != "$new_name" ]]; then
mv "$f" "$png_dir/$new_name"

View file

@ -57,7 +57,12 @@ export async function GET(req: NextRequest) {
const format = (u.searchParams.get("format") ?? "webp").toLowerCase();
const tight = u.searchParams.get("tight") !== "0";
// Resolve source PNG
// Resolve source PNG.
// Two conventions exist in the corpus:
// 1-based: page=1 → p-001.png (most docs)
// 0-based: page=1 → p-000.png (~34 docs converted with pdftoppm -f 0)
// Try the 1-based path first; if the file doesn't exist, fall back to
// 0-based (page - 1). Idempotent — if both exist, 1-based wins.
let pngPath: string;
if (pngParam) {
if (pngParam.includes("..")) return badRequest("png param: invalid path");
@ -71,8 +76,12 @@ export async function GET(req: NextRequest) {
pageNum = parseInt(pageStr, 10);
}
if (!Number.isFinite(pageNum) || pageNum < 1) return badRequest("bad page");
const docDir = path.join(PROCESSING, "png", doc);
const oneBased = path.join(docDir, `p-${String(pageNum).padStart(3, "0")}.png`);
const zeroBased = path.join(docDir, `p-${String(pageNum - 1).padStart(3, "0")}.png`);
const { existsSync } = await import("node:fs");
pngPath = existsSync(oneBased) ? oneBased : (existsSync(zeroBased) ? zeroBased : oneBased);
pageStr = `p-${String(pageNum).padStart(3, "0")}`;
pngPath = path.join(PROCESSING, "png", doc, `${pageStr}.png`);
}
let buf: Buffer;