disclosure-bureau/web/app/api/crop/route.ts

203 lines
8.3 KiB
TypeScript
Raw Permalink Normal View History

/**
* /api/crop On-demand bbox crop of a page PNG, sized + cached.
*
* Inputs (querystring):
* doc doc-id (or use png= absolute path inside UFO_ROOT)
* page page number (1-indexed) OR p001 / p-001
* x,y,w,h bbox in normalized [0..1]
* w_px output width in px (default 480, max 1600)
* pad relative padding 0..0.05 (default 0.005)
* format png | webp | jpeg (default webp)
* tight 1|0 auto-tighten to dark-pixel content inside declared bbox
* (default 1; turns OFF when type is text-like where margins matter).
*
* Sonnet's bboxes are ~1.43x bigger than the actual feature on average, so we
* post-process: find the tight content bbox inside the declared region and crop
* to that with a small margin. Falls back to the declared bbox if the content
* scan finds nothing meaningful.
*
* Caches in-memory for 1h via Cache-Control header. Next.js Image component
* can then layer on top for further format/size optimization.
*/
import { NextRequest } from "next/server";
import path from "node:path";
import sharp from "sharp";
import { PROCESSING } from "@/lib/wiki";
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
function clamp01(n: number): number {
if (!Number.isFinite(n)) return 0;
return Math.max(0, Math.min(1, n));
}
function badRequest(msg: string) {
return new Response(JSON.stringify({ error: msg }), {
status: 400,
headers: { "content-type": "application/json" },
});
}
export async function GET(req: NextRequest) {
const u = new URL(req.url);
const doc = u.searchParams.get("doc")?.trim() ?? "";
const pngParam = u.searchParams.get("png")?.trim() ?? "";
let pageStr = u.searchParams.get("page")?.trim() ?? "";
const x = clamp01(parseFloat(u.searchParams.get("x") ?? ""));
const y = clamp01(parseFloat(u.searchParams.get("y") ?? ""));
const w = clamp01(parseFloat(u.searchParams.get("w") ?? ""));
const h = clamp01(parseFloat(u.searchParams.get("h") ?? ""));
if (w <= 0 || h <= 0) return badRequest("bbox w/h must be > 0");
const w_px = Math.min(Math.max(parseInt(u.searchParams.get("w_px") ?? "480", 10), 64), 1600);
const pad = Math.min(Math.max(parseFloat(u.searchParams.get("pad") ?? "0.005"), 0), 0.05);
const format = (u.searchParams.get("format") ?? "webp").toLowerCase();
const tight = u.searchParams.get("tight") !== "0";
// Resolve source PNG.
// Two conventions exist in the corpus:
// 1-based: page=1 → p-001.png (most docs)
// 0-based: page=1 → p-000.png (~34 docs converted with pdftoppm -f 0)
// Try the 1-based path first; if the file doesn't exist, fall back to
// 0-based (page - 1). Idempotent — if both exist, 1-based wins.
let pngPath: string;
if (pngParam) {
if (pngParam.includes("..")) return badRequest("png param: invalid path");
pngPath = path.join(PROCESSING, "..", pngParam.replace(/^\/+/, ""));
} else {
if (!doc) return badRequest("doc or png required");
let pageNum: number;
if (/^p\d{1,3}$/i.test(pageStr)) {
pageNum = parseInt(pageStr.replace(/^p-?/i, ""), 10);
} else {
pageNum = parseInt(pageStr, 10);
}
if (!Number.isFinite(pageNum) || pageNum < 1) return badRequest("bad page");
const docDir = path.join(PROCESSING, "png", doc);
const oneBased = path.join(docDir, `p-${String(pageNum).padStart(3, "0")}.png`);
const zeroBased = path.join(docDir, `p-${String(pageNum - 1).padStart(3, "0")}.png`);
const { existsSync } = await import("node:fs");
pngPath = existsSync(oneBased) ? oneBased : (existsSync(zeroBased) ? zeroBased : oneBased);
pageStr = `p-${String(pageNum).padStart(3, "0")}`;
}
let buf: Buffer;
try {
const img = sharp(pngPath);
const meta = await img.metadata();
const W = meta.width ?? 0;
const H = meta.height ?? 0;
if (!W || !H) return new Response("source image has no dims", { status: 500 });
let x0 = Math.max(0, Math.floor((x - pad) * W));
let y0 = Math.max(0, Math.floor((y - pad) * H));
let x1 = Math.min(W, Math.ceil((x + w + pad) * W));
let y1 = Math.min(H, Math.ceil((y + h + pad) * H));
let cw = Math.max(1, x1 - x0);
let ch = Math.max(1, y1 - y0);
// Auto-tighten + auto-recenter: Sonnet bboxes are ~1.43x bigger AND can be
// shifted up to ±15% off. We search in an EXPANDED area (50% margin around
// the declared bbox) for dark content, then crop to the tight bbox of that
// content — but only if it overlaps the declared bbox center (else we'd
// capture unrelated content nearby).
if (tight) {
try {
const searchMargin = 0.5; // search ±50% beyond declared bbox
const sx0n = Math.max(0, x - w * searchMargin);
const sy0n = Math.max(0, y - h * searchMargin);
const sx1n = Math.min(1, x + w + w * searchMargin);
const sy1n = Math.min(1, y + h + h * searchMargin);
const sx0 = Math.floor(sx0n * W);
const sy0 = Math.floor(sy0n * H);
const sx1 = Math.ceil(sx1n * W);
const sy1 = Math.ceil(sy1n * H);
const searchW = sx1 - sx0;
const searchH = sy1 - sy0;
const raw = await sharp(pngPath)
.extract({ left: sx0, top: sy0, width: searchW, height: searchH })
.greyscale()
.raw()
.toBuffer({ resolveWithObject: true });
const data = raw.data;
const rw = raw.info.width;
const rh = raw.info.height;
const THRESH = 200;
let minX = rw, minY = rh, maxX = -1, maxY = -1;
for (let py = 0; py < rh; py++) {
const rowOff = py * rw;
for (let px = 0; px < rw; px++) {
if (data[rowOff + px] < THRESH) {
if (px < minX) minX = px;
if (px > maxX) maxX = px;
if (py < minY) minY = py;
if (py > maxY) maxY = py;
}
}
}
if (maxX >= 0) {
// Tight bbox in search-area coords
const tx0 = minX, ty0 = minY, tx1 = maxX, ty1 = maxY;
// Convert to page-pixel coords
const absTX0 = sx0 + tx0;
const absTY0 = sy0 + ty0;
const absTX1 = sx0 + tx1;
const absTY1 = sy0 + ty1;
// Validate: tight bbox center must lie inside declared bbox (with 25% slack).
const declCX = (x + w / 2) * W;
const declCY = (y + h / 2) * H;
const tightCX = (absTX0 + absTX1) / 2;
const tightCY = (absTY0 + absTY1) / 2;
const slackX = (w * W) * 0.75;
const slackY = (h * H) * 0.75;
const overlapsDeclared =
Math.abs(tightCX - declCX) <= slackX &&
Math.abs(tightCY - declCY) <= slackY;
// Sanity: tight area must be at least 1% of search area (filter pure noise)
const tightArea = (tx1 - tx0) * (ty1 - ty0);
const minArea = rw * rh * 0.01;
if (overlapsDeclared && tightArea > minArea) {
const marginPx = Math.max(6, Math.round(Math.min(absTX1 - absTX0, absTY1 - absTY0) * 0.06));
x0 = Math.max(0, absTX0 - marginPx);
y0 = Math.max(0, absTY0 - marginPx);
x1 = Math.min(W, absTX1 + marginPx);
y1 = Math.min(H, absTY1 + marginPx);
cw = x1 - x0;
ch = y1 - y0;
}
}
} catch {
/* fall through to declared bbox */
}
}
let pipeline = img.extract({ left: x0, top: y0, width: cw, height: ch });
if (cw > w_px) {
pipeline = pipeline.resize({ width: w_px, withoutEnlargement: true });
}
if (format === "png") buf = await pipeline.png({ compressionLevel: 9 }).toBuffer();
else if (format === "jpeg" || format === "jpg") buf = await pipeline.jpeg({ quality: 84 }).toBuffer();
else buf = await pipeline.webp({ quality: 86 }).toBuffer();
} catch (e) {
return new Response(JSON.stringify({ error: "crop_failed", message: (e as Error).message }), {
status: 500,
headers: { "content-type": "application/json" },
});
}
const mime = format === "png" ? "image/png" : format === "jpeg" || format === "jpg" ? "image/jpeg" : "image/webp";
return new Response(new Uint8Array(buf), {
status: 200,
headers: {
"content-type": mime,
// Crops are pure function of inputs → cache aggressively
"cache-control": "public, max-age=31536000, immutable",
},
});
}