disclosure-bureau/scripts/00c-download-missing.js

164 lines
6.8 KiB
JavaScript

/**
* 00c-download-missing.js — Programmatic download (fetch+blob) via the war.gov UI
*
* Improved over the previous version:
* - Uses fetch() to grab the asset from the same origin (browser cookies +
* same-origin policy → Akamai accepts).
* - Creates a Blob and triggers `<a download="filename">` to control the
* filename exactly (no "(1)" duplicates).
* - Reads the PDF URL from the modal's download button data and/or thumbnail
* src pattern (`/thumbnail/foo.jpg` → `/foo.pdf|.mp4|.jpg`).
* - Skips records whose file already exists (best-effort by checking the
* expected filename — you can also clear the lists below).
*
* USAGE (Chrome on https://www.war.gov/UFO/Release-NN/):
* 1. Set Chrome download folder to /Users/guto/ufo/raw/ (Settings → Downloads).
* For videos, the script will rename to land in /Users/guto/ufo/raw/videos/
* after — just MOVE them manually after this finishes.
* 2. Open DevTools → Console.
* 3. Paste this whole file. Press Enter.
* 4. Chrome prompts "Allow multiple downloads" → click **Allow**.
* 5. Wait ~30s (1s between downloads). Files land in Downloads folder.
*/
(async function downloadMissing() {
// ============================================================
// PDFs still missing (Release-01, verified 2026-05-13).
// The 28 .VID videos are already in /Users/guto/ufo/raw/videos/ from
// a prior bulk download — they share file names like DOD_111688723.mp4.
// ============================================================
const TARGETS = [
"record-140", // NASA-UAP-D003 GEMINI 7 TRANSCRIPT 1965
"record-154", // STATE CABLE 003 TBILISI GEORGIA
"record-155", // STATE CABLE 004 ASHGABAT TURKMENISTAN
"record-156", // STATE CABLE 005 MEXICO
];
console.log(`[dl] ${TARGETS.length} records to download`);
// ----------------------------------------------------------------------
const sleep = ms => new Promise(r => setTimeout(r, ms));
function pollUntil(predicate, opts = {}) {
const timeout = opts.timeout ?? 4000;
const interval = opts.interval ?? 50;
return new Promise((resolve, reject) => {
const start = Date.now();
const tick = () => {
const v = predicate();
if (v) return resolve(v);
if (Date.now() - start > timeout) return reject(new Error("timeout"));
setTimeout(tick, interval);
};
tick();
});
}
async function findRowOnAllPages(recordId) {
for (let i = 0; i < 25; i++) {
const row = document.querySelector(`button.record-row[data-record-id="${recordId}"]`);
if (row) return row;
const next = document.querySelector("button.pagination-next");
if (!next || next.disabled || next.getAttribute("aria-disabled") === "true") return null;
const before = document.querySelector("button.record-row")?.dataset.recordId;
next.click();
try {
await pollUntil(() => {
const f = document.querySelector("button.record-row")?.dataset.recordId;
return f && f !== before ? f : null;
});
} catch { /* ignore */ }
await sleep(150);
}
return null;
}
async function goToFirstPage() {
const firstBtn = Array.from(document.querySelectorAll(".pagination-button")).find(b => b.innerText.trim() === "1");
if (firstBtn) { firstBtn.click(); await sleep(400); return; }
}
function buildFilenameFromThumb(thumbUrl, extHint) {
// .../thumbnail/foo.jpg → foo + ext
const m = thumbUrl.match(/\/thumbnail\/([^?#]+)\.[a-z]+$/i);
if (!m) return null;
const base = decodeURIComponent(m[1]);
return `${base}${extHint}`;
}
function buildAssetUrlFromThumb(thumbUrl, extHint) {
// strip "/thumbnail/" segment, swap extension
return thumbUrl.replace("/thumbnail/", "/").replace(/\.[a-z]+$/i, extHint);
}
async function downloadOne(recordId) {
const row = await findRowOnAllPages(recordId);
if (!row) { console.warn(`${recordId}: row not found`); return false; }
row.click();
try {
await pollUntil(() => document.querySelector(".record-modal-shell"));
} catch {
console.warn(`${recordId}: modal didn't open`);
return false;
}
const modal = document.querySelector(".record-modal-shell");
const kind = (modal.getAttribute("data-record-kind") || "pdf").toLowerCase();
const title = modal.querySelector("[data-record-modal-title]")?.innerText?.trim() || recordId;
const docTypeEl = Array.from(modal.querySelectorAll(".record-modal-fact dd"))
.find(d => d.previousElementSibling?.innerText?.trim() === "Document Type");
let ext = ".pdf";
if (docTypeEl) {
const raw = docTypeEl.innerText.trim().replace(/[\[\]]/g, "").toLowerCase();
ext = raw.startsWith(".") ? raw : "." + raw;
// Normalize uncommon: .vid → .mp4 (guess; site serves mp4 for videos), .img → .jpg
if (ext === ".vid") ext = ".mp4";
if (ext === ".img") ext = ".jpg";
}
const img = modal.querySelector("#record-main-image, img");
const thumb = img?.src;
if (!thumb) {
console.warn(`${recordId}: no thumbnail src — cannot infer URL`);
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
if (close) close.click(); await sleep(300);
return false;
}
const assetUrl = buildAssetUrlFromThumb(thumb, ext);
const filename = buildFilenameFromThumb(thumb, ext) || `${recordId}${ext}`;
console.log(`${recordId}: fetching "${filename}" from ${assetUrl}`);
try {
const res = await fetch(assetUrl, { credentials: "include", referrer: location.href });
if (!res.ok) throw new Error(`HTTP ${res.status}`);
const blob = await res.blob();
const url = URL.createObjectURL(blob);
const a = document.createElement("a");
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
a.remove();
setTimeout(() => URL.revokeObjectURL(url), 5000);
console.log(`${recordId}: ${filename} (${(blob.size/1024/1024).toFixed(2)} MB)`);
} catch (e) {
console.warn(`${recordId}: fetch failed — ${e.message}`);
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
if (close) close.click();
return false;
}
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
if (close) close.click();
await sleep(800);
return true;
}
await goToFirstPage();
let ok = 0, fail = [];
for (const id of TARGETS) {
const success = await downloadOne(id);
if (success) ok++;
else fail.push(id);
await sleep(500);
}
console.log(`\n[dl] DONE — ok=${ok}, failed=${fail.length}`);
if (fail.length) console.log("failed:", fail);
console.log("Move videos from Downloads/ → /Users/guto/ufo/raw/videos/ when done.");
})();