fix png-numbering: re-convert 34 zero-based docs + crop fallback
34 of 116 docs were generated with 0-based PNG numbering (p-000.png … p-008.png) but the Sonnet chunks reference 1-based page numbers in their YAML frontmatter (page: 9 means the 9th sheet of paper). The /api/crop handler built p-009.png and got a 500, the browser's Next/Image surfaced 400, and the chunk rendered as a black box on screen. Fixes: - web/app/api/crop/route.ts: try p-NNN.png first, fall back to p-(NNN-1).png if the 1-based file is missing. Cheap insurance for any doc that comes in with the old convention. - scripts/01-convert-pdfs.sh: previously printf '%03d' "$num" with $num starting at 0 (e.g. "008") raised "invalid number" because Bash parsed it as octal. Wrap with $((10#$num)) to force decimal — this was silently corrupting page sequences and producing gaps like p-001 ... p-008, p-011 (missing p-009/p-010). - All 34 affected docs re-converted from PDFs with the patched script; every directory now has continuous 1-based PNGs. - /processing/png/ rsync'd to VPS, web redeployed. Smoke: /api/crop?doc=doc-341-…&page=9&… now returns 200 image/webp instead of 500. Tested in browser: chunk c0026 (diagram, p9) renders the real engineering diagram.
This commit is contained in:
parent
7d13f93393
commit
d5f6e6030a
2 changed files with 14 additions and 3 deletions
|
|
@ -115,7 +115,9 @@ convert_one_pdf() {
|
|||
num=$(printf '%s' "$bn" | sed -E 's/^p-([0-9]+)\.png$/\1/')
|
||||
if [[ "$num" =~ ^[0-9]+$ ]]; then
|
||||
local padded
|
||||
padded=$(printf '%03d' "$num")
|
||||
# Force decimal interpretation: leading zeros would make bash
|
||||
# treat "008" as invalid octal under printf '%03d'.
|
||||
padded=$(printf '%03d' "$((10#$num))")
|
||||
local new_name="p-$padded.png"
|
||||
if [[ "$bn" != "$new_name" ]]; then
|
||||
mv "$f" "$png_dir/$new_name"
|
||||
|
|
|
|||
|
|
@ -57,7 +57,12 @@ export async function GET(req: NextRequest) {
|
|||
const format = (u.searchParams.get("format") ?? "webp").toLowerCase();
|
||||
const tight = u.searchParams.get("tight") !== "0";
|
||||
|
||||
// Resolve source PNG
|
||||
// Resolve source PNG.
|
||||
// Two conventions exist in the corpus:
|
||||
// 1-based: page=1 → p-001.png (most docs)
|
||||
// 0-based: page=1 → p-000.png (~34 docs converted with pdftoppm -f 0)
|
||||
// Try the 1-based path first; if the file doesn't exist, fall back to
|
||||
// 0-based (page - 1). Idempotent — if both exist, 1-based wins.
|
||||
let pngPath: string;
|
||||
if (pngParam) {
|
||||
if (pngParam.includes("..")) return badRequest("png param: invalid path");
|
||||
|
|
@ -71,8 +76,12 @@ export async function GET(req: NextRequest) {
|
|||
pageNum = parseInt(pageStr, 10);
|
||||
}
|
||||
if (!Number.isFinite(pageNum) || pageNum < 1) return badRequest("bad page");
|
||||
const docDir = path.join(PROCESSING, "png", doc);
|
||||
const oneBased = path.join(docDir, `p-${String(pageNum).padStart(3, "0")}.png`);
|
||||
const zeroBased = path.join(docDir, `p-${String(pageNum - 1).padStart(3, "0")}.png`);
|
||||
const { existsSync } = await import("node:fs");
|
||||
pngPath = existsSync(oneBased) ? oneBased : (existsSync(zeroBased) ? zeroBased : oneBased);
|
||||
pageStr = `p-${String(pageNum).padStart(3, "0")}`;
|
||||
pngPath = path.join(PROCESSING, "png", doc, `${pageStr}.png`);
|
||||
}
|
||||
|
||||
let buf: Buffer;
|
||||
|
|
|
|||
Loading…
Reference in a new issue