/**
 * Renders a full document — concatenates per-page OCR + interleaves images,
 * tables, and entity highlights based on each page's frontmatter.
 *
 * Returns a structured representation that the React view renders directly.
 */
import fs from "node:fs/promises";
import path from "node:path";
import { PROCESSING, readPage, readOcr, readTable, type MdFile } from "./wiki";
import { readMatches, type EntityMatch } from "./entity-index";
import type {
  AnyFrontmatter, BBox, ImageDetected, TableDetected,
  Redaction, SignatureObserved,
} from "./fm-types";

export interface InlineImage {
  kind: "image";
  bboxY: number;
  bbox: BBox;
  src: string;
  caption?: string;
  imageType?: string;
}
export interface InlineTable {
  kind: "table";
  bboxY: number;
  bbox: BBox;
  tableId?: string;
  csv?: string[][];
  rowEstimate?: number;
  colEstimate?: number;
  headersSummary?: string;
  fallbackCropY: number;     // y to derive crop from page PNG if no CSV
  docId: string;
  pageNum: number;
}
export interface InlineRedaction {
  kind: "redaction";
  bboxY: number;
  bbox: BBox;
  code?: string;
  description?: string;
  docId: string;
  pageNum: number;
}
export interface InlineSignature {
  kind: "signature";
  bboxY: number;
  bbox: BBox;
  signer?: string;
  docId: string;
  pageNum: number;
}
export type Inline = InlineImage | InlineTable | InlineRedaction | InlineSignature;

export interface RenderedPage {
  pageStem: string;            // "p007"
  pageNum: number;
  pageId: string;              // "doc-id/p007"
  pngUrl: string;
  ocr: string;                 // raw layout-preserved text
  matches: EntityMatch[];
  inline: Inline[];            // images + tables + redactions + signatures, sorted by bboxY
  visionEn?: string;
  visionPt?: string;
  pageType?: string;
  classification?: string;
  contentClassification?: string[];
  redactionsCount: number;
  signaturesCount: number;
}

export interface RenderedDoc {
  docId: string;
  canonicalTitle: string;
  pageCount: number;
  pages: RenderedPage[];
  frontmatter: AnyFrontmatter;
  documentBody: string;        // markdown body from documents/<id>.md
}

/** Find all the OCR pages for a doc + assemble into a RenderedDoc. */
export async function loadFullDocument(docId: string): Promise<RenderedDoc | null> {
  // Read document.md
  const docPath = path.join(PROCESSING, "..", "wiki", "documents", `${docId}.md`);
  let docMd: MdFile;
  try {
    const raw = await fs.readFile(docPath, "utf-8");
    const matter = (await import("gray-matter")).default(raw);
    docMd = { fm: matter.data as AnyFrontmatter, body: matter.content };
  } catch {
    return null;
  }

  const pagesDir = path.join(PROCESSING, "..", "wiki", "pages", docId);
  let pageStems: string[];
  try {
    const files = await fs.readdir(pagesDir);
    pageStems = files
      .filter((f) => /^p\d{3}\.md$/.test(f))
      .map((f) => f.replace(/\.md$/, ""))
      .sort();
  } catch {
    return null;
  }

  const pages = await Promise.all(
    pageStems.map(async (stem): Promise<RenderedPage | null> => {
      const md = await readPage(docId, stem);
      if (!md) return null;
      const fm = md.fm as AnyFrontmatter;
      const pageNum = parseInt(stem.replace("p", ""), 10);
      const padded = String(pageNum).padStart(3, "0");
      const ocr = (await readOcr(docId, pageNum)) ?? "";
      const matches = await readMatches(docId, stem);

      const inline: Inline[] = [];

      for (const im of (fm.images_detected ?? []) as ImageDetected[]) {
        if (!im.bbox) continue;
        const idx = im.local_image_index ?? 1;
        inline.push({
          kind: "image",
          bboxY: im.bbox.y ?? 0,
          bbox: im.bbox,
          src: `/api/static/processing/png/${docId}/p-${padded}.png`,
          caption: im.caption_ocr,
          imageType: im.image_type,
        });
        // local_image_index recorded but not used as URL key (FmBboxThumb uses CSS crop)
        void idx;
      }
      for (const t of (fm.tables_detected ?? []) as TableDetected[]) {
        if (!t.bbox) continue;
        let csv: string[][] | undefined;
        if (t.table_id) {
          const { csv: c } = await readTable(t.table_id);
          if (c) csv = c;
        }
        inline.push({
          kind: "table",
          bboxY: t.bbox.y ?? 0,
          bbox: t.bbox,
          tableId: t.table_id,
          csv,
          rowEstimate: t.row_count_estimate,
          colEstimate: t.col_count_estimate,
          headersSummary: t.headers_summary,
          fallbackCropY: t.bbox.y ?? 0,
          docId,
          pageNum,
        });
      }
      for (const r of (fm.redactions ?? []) as Redaction[]) {
        if (!r.bbox) continue;
        inline.push({
          kind: "redaction",
          bboxY: r.bbox.y ?? 0,
          bbox: r.bbox,
          code: r.code,
          description: r.description,
          docId,
          pageNum,
        });
      }
      for (const s of (fm.signatures_observed ?? []) as SignatureObserved[]) {
        if (!s.bbox) continue;
        inline.push({
          kind: "signature",
          bboxY: s.bbox.y ?? 0,
          bbox: s.bbox,
          signer: s.signer_inferred ?? undefined,
          docId,
          pageNum,
        });
      }
      inline.sort((a, b) => a.bboxY - b.bboxY);

      return {
        pageStem: stem,
        pageNum,
        pageId: `${docId}/${stem}`,
        pngUrl: `/api/static/processing/png/${docId}/p-${padded}.png`,
        ocr,
        matches,
        inline,
        visionEn: typeof fm.vision_description === "string" ? fm.vision_description : undefined,
        visionPt: typeof fm.vision_description_pt_br === "string" ? fm.vision_description_pt_br : undefined,
        pageType: typeof fm.page_type === "string" ? fm.page_type : undefined,
        classification: typeof fm.highest_classification === "string" ? fm.highest_classification : undefined,
        contentClassification: Array.isArray(fm.content_classification) ? (fm.content_classification as string[]) : undefined,
        redactionsCount: Array.isArray(fm.redactions) ? fm.redactions.length : 0,
        signaturesCount: Array.isArray(fm.signatures_observed) ? fm.signatures_observed.length : 0,
      };
    }),
  );

  return {
    docId,
    canonicalTitle: (docMd.fm.canonical_title as string) ?? docId,
    pageCount: pageStems.length,
    pages: pages.filter((p): p is RenderedPage => p !== null),
    frontmatter: docMd.fm,
    documentBody: docMd.body,
  };
}

/**
 * Splits OCR text into N segments (by approximate Y coordinate based on
 * line count). Used to interleave inline blocks at their bbox.y.
 */
export function splitOcrIntoSegments(ocr: string, nBlocks: number): string[] {
  if (nBlocks <= 1) return [ocr];
  const lines = ocr.split("\n");
  if (lines.length === 0) return [ocr];
  const segments: string[] = [];
  const linesPerSeg = Math.max(1, Math.ceil(lines.length / nBlocks));
  for (let i = 0; i < nBlocks; i++) {
    const start = i * linesPerSeg;
    const end = i === nBlocks - 1 ? lines.length : (i + 1) * linesPerSeg;
    segments.push(lines.slice(start, end).join("\n"));
  }
  return segments;
}