164 lines
6.8 KiB
JavaScript
164 lines
6.8 KiB
JavaScript
/**
|
|
* 00c-download-missing.js — Programmatic download (fetch+blob) via the war.gov UI
|
|
*
|
|
* Improved over the previous version:
|
|
* - Uses fetch() to grab the asset from the same origin (browser cookies +
|
|
* same-origin policy → Akamai accepts).
|
|
* - Creates a Blob and triggers `<a download="filename">` to control the
|
|
* filename exactly (no "(1)" duplicates).
|
|
* - Reads the PDF URL from the modal's download button data and/or thumbnail
|
|
* src pattern (`/thumbnail/foo.jpg` → `/foo.pdf|.mp4|.jpg`).
|
|
* - Skips records whose file already exists (best-effort by checking the
|
|
* expected filename — you can also clear the lists below).
|
|
*
|
|
* USAGE (Chrome on https://www.war.gov/UFO/Release-NN/):
|
|
* 1. Set Chrome download folder to /Users/guto/ufo/raw/ (Settings → Downloads).
|
|
* For videos, the script will rename to land in /Users/guto/ufo/raw/videos/
|
|
* after — just MOVE them manually after this finishes.
|
|
* 2. Open DevTools → Console.
|
|
* 3. Paste this whole file. Press Enter.
|
|
* 4. Chrome prompts "Allow multiple downloads" → click **Allow**.
|
|
* 5. Wait ~30s (1s between downloads). Files land in Downloads folder.
|
|
*/
|
|
|
|
(async function downloadMissing() {
|
|
// ============================================================
|
|
// PDFs still missing (Release-01, verified 2026-05-13).
|
|
// The 28 .VID videos are already in /Users/guto/ufo/raw/videos/ from
|
|
// a prior bulk download — they share file names like DOD_111688723.mp4.
|
|
// ============================================================
|
|
const TARGETS = [
|
|
"record-140", // NASA-UAP-D003 GEMINI 7 TRANSCRIPT 1965
|
|
"record-154", // STATE CABLE 003 TBILISI GEORGIA
|
|
"record-155", // STATE CABLE 004 ASHGABAT TURKMENISTAN
|
|
"record-156", // STATE CABLE 005 MEXICO
|
|
];
|
|
console.log(`[dl] ${TARGETS.length} records to download`);
|
|
|
|
// ----------------------------------------------------------------------
|
|
|
|
const sleep = ms => new Promise(r => setTimeout(r, ms));
|
|
function pollUntil(predicate, opts = {}) {
|
|
const timeout = opts.timeout ?? 4000;
|
|
const interval = opts.interval ?? 50;
|
|
return new Promise((resolve, reject) => {
|
|
const start = Date.now();
|
|
const tick = () => {
|
|
const v = predicate();
|
|
if (v) return resolve(v);
|
|
if (Date.now() - start > timeout) return reject(new Error("timeout"));
|
|
setTimeout(tick, interval);
|
|
};
|
|
tick();
|
|
});
|
|
}
|
|
|
|
async function findRowOnAllPages(recordId) {
|
|
for (let i = 0; i < 25; i++) {
|
|
const row = document.querySelector(`button.record-row[data-record-id="${recordId}"]`);
|
|
if (row) return row;
|
|
const next = document.querySelector("button.pagination-next");
|
|
if (!next || next.disabled || next.getAttribute("aria-disabled") === "true") return null;
|
|
const before = document.querySelector("button.record-row")?.dataset.recordId;
|
|
next.click();
|
|
try {
|
|
await pollUntil(() => {
|
|
const f = document.querySelector("button.record-row")?.dataset.recordId;
|
|
return f && f !== before ? f : null;
|
|
});
|
|
} catch { /* ignore */ }
|
|
await sleep(150);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
async function goToFirstPage() {
|
|
const firstBtn = Array.from(document.querySelectorAll(".pagination-button")).find(b => b.innerText.trim() === "1");
|
|
if (firstBtn) { firstBtn.click(); await sleep(400); return; }
|
|
}
|
|
|
|
function buildFilenameFromThumb(thumbUrl, extHint) {
|
|
// .../thumbnail/foo.jpg → foo + ext
|
|
const m = thumbUrl.match(/\/thumbnail\/([^?#]+)\.[a-z]+$/i);
|
|
if (!m) return null;
|
|
const base = decodeURIComponent(m[1]);
|
|
return `${base}${extHint}`;
|
|
}
|
|
|
|
function buildAssetUrlFromThumb(thumbUrl, extHint) {
|
|
// strip "/thumbnail/" segment, swap extension
|
|
return thumbUrl.replace("/thumbnail/", "/").replace(/\.[a-z]+$/i, extHint);
|
|
}
|
|
|
|
async function downloadOne(recordId) {
|
|
const row = await findRowOnAllPages(recordId);
|
|
if (!row) { console.warn(` ✗ ${recordId}: row not found`); return false; }
|
|
row.click();
|
|
try {
|
|
await pollUntil(() => document.querySelector(".record-modal-shell"));
|
|
} catch {
|
|
console.warn(` ✗ ${recordId}: modal didn't open`);
|
|
return false;
|
|
}
|
|
const modal = document.querySelector(".record-modal-shell");
|
|
const kind = (modal.getAttribute("data-record-kind") || "pdf").toLowerCase();
|
|
const title = modal.querySelector("[data-record-modal-title]")?.innerText?.trim() || recordId;
|
|
const docTypeEl = Array.from(modal.querySelectorAll(".record-modal-fact dd"))
|
|
.find(d => d.previousElementSibling?.innerText?.trim() === "Document Type");
|
|
let ext = ".pdf";
|
|
if (docTypeEl) {
|
|
const raw = docTypeEl.innerText.trim().replace(/[\[\]]/g, "").toLowerCase();
|
|
ext = raw.startsWith(".") ? raw : "." + raw;
|
|
// Normalize uncommon: .vid → .mp4 (guess; site serves mp4 for videos), .img → .jpg
|
|
if (ext === ".vid") ext = ".mp4";
|
|
if (ext === ".img") ext = ".jpg";
|
|
}
|
|
const img = modal.querySelector("#record-main-image, img");
|
|
const thumb = img?.src;
|
|
if (!thumb) {
|
|
console.warn(` ✗ ${recordId}: no thumbnail src — cannot infer URL`);
|
|
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
|
|
if (close) close.click(); await sleep(300);
|
|
return false;
|
|
}
|
|
const assetUrl = buildAssetUrlFromThumb(thumb, ext);
|
|
const filename = buildFilenameFromThumb(thumb, ext) || `${recordId}${ext}`;
|
|
console.log(` ↓ ${recordId}: fetching "${filename}" from ${assetUrl}`);
|
|
try {
|
|
const res = await fetch(assetUrl, { credentials: "include", referrer: location.href });
|
|
if (!res.ok) throw new Error(`HTTP ${res.status}`);
|
|
const blob = await res.blob();
|
|
const url = URL.createObjectURL(blob);
|
|
const a = document.createElement("a");
|
|
a.href = url;
|
|
a.download = filename;
|
|
document.body.appendChild(a);
|
|
a.click();
|
|
a.remove();
|
|
setTimeout(() => URL.revokeObjectURL(url), 5000);
|
|
console.log(` ✓ ${recordId}: ${filename} (${(blob.size/1024/1024).toFixed(2)} MB)`);
|
|
} catch (e) {
|
|
console.warn(` ✗ ${recordId}: fetch failed — ${e.message}`);
|
|
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
|
|
if (close) close.click();
|
|
return false;
|
|
}
|
|
const close = document.querySelector(".record-modal-close, [data-record-modal-close]");
|
|
if (close) close.click();
|
|
await sleep(800);
|
|
return true;
|
|
}
|
|
|
|
await goToFirstPage();
|
|
|
|
let ok = 0, fail = [];
|
|
for (const id of TARGETS) {
|
|
const success = await downloadOne(id);
|
|
if (success) ok++;
|
|
else fail.push(id);
|
|
await sleep(500);
|
|
}
|
|
console.log(`\n[dl] DONE — ok=${ok}, failed=${fail.length}`);
|
|
if (fail.length) console.log("failed:", fail);
|
|
console.log("Move videos from Downloads/ → /Users/guto/ufo/raw/videos/ when done.");
|
|
})();
|