/**
 * sitemap.xml — dynamic, regenerated per request.
 *
 * Aggregates:
 *   - Static pages (home, bureau, sub-pages)
 *   - Every declassified document (/d/<id>)
 *   - Every case report (/c/<slug>)
 *   - Every entity with an AI summary (/e/<class>/<id>) — these get a
 *     lastModified from summary_generated_at, which helps crawlers
 *     re-index when we re-enrich.
 *   - Every top-mentioned entity even without summary (cap at 500 per
 *     class so the sitemap doesn't balloon past Google's 50k limit).
 *
 * Per Next.js the file must export a default function returning a flat
 * MetadataRoute.Sitemap array. ChangeFreq/priority are honoured by most
 * crawlers as hints.
 */
import type { MetadataRoute } from "next";
import { listDocuments } from "@/lib/wiki";
import { pgQuery } from "@/lib/retrieval/db";
import { readdir } from "node:fs/promises";
import path from "node:path";

// Without these, Next.js statically generates the sitemap at build time
// when the DB is unreachable from the build container — which is why we
// were getting only 9 static URLs in production.
export const dynamic = "force-dynamic";
export const revalidate = 3600;

const SITE_URL = process.env.NEXT_PUBLIC_SITE_URL ?? "https://disclosure.top";
const CASE_ROOT = process.env.CASE_ROOT ?? "/data/ufo/case";

type Url = MetadataRoute.Sitemap[number];

const ENTITY_FOLDER_BY_CLASS: Record<string, string> = {
  event: "events",
  person: "people",
  uap_object: "uap-objects",
  location: "locations",
  organization: "organizations",
};

export default async function sitemap(): Promise<MetadataRoute.Sitemap> {
  const out: Url[] = [];
  const now = new Date();

  // 1. Top-level pages
  const STATIC_PAGES = [
    { url: "/", priority: 1.0, changeFrequency: "daily" as const },
    { url: "/bureau", priority: 0.9, changeFrequency: "weekly" as const },
    { url: "/sightings", priority: 0.9, changeFrequency: "weekly" as const },
    { url: "/witnesses", priority: 0.8, changeFrequency: "weekly" as const },
    { url: "/objects", priority: 0.8, changeFrequency: "weekly" as const },
    { url: "/locations", priority: 0.8, changeFrequency: "weekly" as const },
    { url: "/operations", priority: 0.8, changeFrequency: "weekly" as const },
    { url: "/documents", priority: 0.8, changeFrequency: "weekly" as const },
    { url: "/search", priority: 0.5, changeFrequency: "monthly" as const },
  ];
  for (const p of STATIC_PAGES) {
    out.push({
      url: `${SITE_URL}${p.url}`,
      lastModified: now,
      changeFrequency: p.changeFrequency,
      priority: p.priority,
    });
  }

  // 2. Documents
  try {
    const docIds = await listDocuments();
    for (const id of docIds) {
      out.push({
        url: `${SITE_URL}/d/${id}`,
        lastModified: now,
        changeFrequency: "monthly",
        priority: 0.7,
      });
    }
  } catch { /* fs failure — skip docs */ }

  // 3. Case reports — read filesystem for /c/[slug]
  try {
    const dir = path.join(CASE_ROOT, "reports");
    const files = await readdir(dir);
    for (const f of files.filter((x) => x.endsWith(".md"))) {
      out.push({
        url: `${SITE_URL}/c/${f.replace(/\.md$/, "")}`,
        lastModified: now,
        changeFrequency: "monthly",
        priority: 0.95,
      });
    }
  } catch { /* no case files yet */ }

  // 4. Entities — surface those with summaries first (high priority), plus
  // the top by mention count up to 500/class. Cap per class avoids blowing
  // past sitemap size limits (Google: 50k urls, 50MB).
  for (const [klass, folder] of Object.entries(ENTITY_FOLDER_BY_CLASS)) {
    try {
      const rows = await pgQuery<{
        entity_id: string;
        summary_generated_at: string | null;
        summary_status: string | null;
        total_mentions: number;
      }>(
        `SELECT entity_id, summary_generated_at, summary_status, total_mentions
           FROM public.entities
          WHERE entity_class = $1
            AND total_mentions >= 1
          ORDER BY (summary_status IN ('ai_generated','curated')) DESC,
                   total_mentions DESC, entity_id ASC
          LIMIT 500`,
        [klass],
      );
      for (const r of rows) {
        const hasSummary = r.summary_status === "ai_generated" || r.summary_status === "curated";
        out.push({
          url: `${SITE_URL}/e/${folder}/${r.entity_id}`,
          lastModified: r.summary_generated_at ? new Date(r.summary_generated_at) : now,
          changeFrequency: "monthly",
          priority: hasSummary ? 0.7 : 0.4,
        });
      }
    } catch { /* db unavailable for this class — skip */ }
  }

  return out;
}