/** * Data layer for the Disclosure Bureau wiki. * * Reads markdown files directly from /Users/guto/ufo/wiki and /Users/guto/ufo/processing. * No database — markdown IS the database (Karpathy LLM-wiki pattern). */ import fs from "node:fs/promises"; import path from "node:path"; import matter from "gray-matter"; export const UFO_ROOT = process.env.UFO_ROOT ?? "/Users/guto/ufo"; export const WIKI = path.join(UFO_ROOT, "wiki"); export const PROCESSING = path.join(UFO_ROOT, "processing"); export type EntityClass = | "people" | "organizations" | "locations" | "events" | "uap-objects" | "vehicles" | "operations" | "concepts"; export interface Frontmatter { [key: string]: unknown; } export interface MdFile { fm: Frontmatter; body: string; } export async function readMd(absPath: string): Promise { const raw = await fs.readFile(absPath, "utf-8"); const parsed = matter(raw); return { fm: parsed.data as Frontmatter, body: parsed.content }; } export async function safeReadMd(absPath: string): Promise { try { return await readMd(absPath); } catch { return null; } } export async function listDocuments(): Promise { const docsDir = path.join(WIKI, "documents"); try { const files = await fs.readdir(docsDir); return files .filter((f) => f.endsWith(".md")) .map((f) => f.replace(/\.md$/, "")) .sort(); } catch { return []; } } export async function readDocument(docId: string): Promise { return safeReadMd(path.join(WIKI, "documents", `${docId}.md`)); } export async function listPages(docId: string): Promise { const dir = path.join(WIKI, "pages", docId); try { const files = await fs.readdir(dir); return files .filter((f) => /^p\d{3}\.md$/.test(f)) .map((f) => f.replace(/\.md$/, "")) .sort(); } catch { return []; } } export async function readPage(docId: string, pageStem: string): Promise { return safeReadMd(path.join(WIKI, "pages", docId, `${pageStem}.md`)); } export async function readOcr(docId: string, pageNum: number): Promise { const padded = String(pageNum).padStart(3, "0"); const ocrPath = path.join(PROCESSING, "ocr", docId, `p-${padded}.txt`); try { return await fs.readFile(ocrPath, "utf-8"); } catch { return null; } } export async function readEntity(cls: EntityClass, id: string): Promise { return safeReadMd(path.join(WIKI, "entities", cls, `${id}.md`)); } export async function readVideo(id: string): Promise { return safeReadMd(path.join(WIKI, "videos", `${id}.md`)); } export async function readImageDirect(id: string): Promise { return safeReadMd(path.join(WIKI, "images-direct", `${id}.md`)); } export async function readTable(tableId: string): Promise<{ md: MdFile | null; csv: string[][] | null }> { const md = await safeReadMd(path.join(WIKI, "tables", `${tableId}.md`)); const csvPath = path.join(PROCESSING, "tables", `${tableId}.csv`); let csv: string[][] | null = null; try { const raw = await fs.readFile(csvPath, "utf-8"); csv = parseCsv(raw); } catch {} return { md, csv }; } function parseCsv(text: string): string[][] { // Minimal CSV parser with quoted-field support const rows: string[][] = []; let row: string[] = []; let field = ""; let inQ = false; for (let i = 0; i < text.length; i++) { const c = text[i]; if (inQ) { if (c === '"' && text[i + 1] === '"') { field += '"'; i++; } else if (c === '"') { inQ = false; } else { field += c; } } else { if (c === '"') inQ = true; else if (c === ",") { row.push(field); field = ""; } else if (c === "\n") { row.push(field); rows.push(row); row = []; field = ""; } else if (c === "\r") { // skip } else { field += c; } } } if (field.length > 0 || row.length > 0) { row.push(field); rows.push(row); } return rows; } export function entityClassFromPath(filePath: string): EntityClass | null { const m = filePath.match(/entities\/(people|organizations|locations|events|uap-objects|vehicles|operations|concepts)\//); return m ? (m[1] as EntityClass) : null; } export function classKeyToFolder(key: string): EntityClass | null { const map: Record = { person: "people", people: "people", organization: "organizations", organizations: "organizations", location: "locations", locations: "locations", event: "events", events: "events", uap_object: "uap-objects", "uap-object": "uap-objects", "uap-objects": "uap-objects", vehicle: "vehicles", vehicles: "vehicles", operation: "operations", operations: "operations", concept: "concepts", concepts: "concepts", }; return map[key] ?? null; }