disclosure-bureau/web/lib/wiki.ts

183 lines
4.9 KiB
TypeScript
Raw Permalink Normal View History

/**
* Data layer for the Disclosure Bureau wiki.
*
* Reads markdown files directly from /Users/guto/ufo/wiki and /Users/guto/ufo/processing.
* No database markdown IS the database (Karpathy LLM-wiki pattern).
*/
import fs from "node:fs/promises";
import path from "node:path";
import matter from "gray-matter";
export const UFO_ROOT = process.env.UFO_ROOT ?? "/Users/guto/ufo";
export const WIKI = path.join(UFO_ROOT, "wiki");
export const PROCESSING = path.join(UFO_ROOT, "processing");
export type EntityClass =
| "people"
| "organizations"
| "locations"
| "events"
| "uap-objects"
| "vehicles"
| "operations"
| "concepts";
export interface Frontmatter {
[key: string]: unknown;
}
export interface MdFile {
fm: Frontmatter;
body: string;
}
export async function readMd(absPath: string): Promise<MdFile> {
const raw = await fs.readFile(absPath, "utf-8");
const parsed = matter(raw);
return { fm: parsed.data as Frontmatter, body: parsed.content };
}
export async function safeReadMd(absPath: string): Promise<MdFile | null> {
try {
return await readMd(absPath);
} catch {
return null;
}
}
export async function listDocuments(): Promise<string[]> {
const docsDir = path.join(WIKI, "documents");
try {
const files = await fs.readdir(docsDir);
return files
.filter((f) => f.endsWith(".md"))
.map((f) => f.replace(/\.md$/, ""))
.sort();
} catch {
return [];
}
}
export async function readDocument(docId: string): Promise<MdFile | null> {
return safeReadMd(path.join(WIKI, "documents", `${docId}.md`));
}
export async function listPages(docId: string): Promise<string[]> {
const dir = path.join(WIKI, "pages", docId);
try {
const files = await fs.readdir(dir);
return files
.filter((f) => /^p\d{3}\.md$/.test(f))
.map((f) => f.replace(/\.md$/, ""))
.sort();
} catch {
return [];
}
}
export async function readPage(docId: string, pageStem: string): Promise<MdFile | null> {
return safeReadMd(path.join(WIKI, "pages", docId, `${pageStem}.md`));
}
export async function readOcr(docId: string, pageNum: number): Promise<string | null> {
const padded = String(pageNum).padStart(3, "0");
const ocrPath = path.join(PROCESSING, "ocr", docId, `p-${padded}.txt`);
try {
return await fs.readFile(ocrPath, "utf-8");
} catch {
return null;
}
}
export async function readEntity(cls: EntityClass, id: string): Promise<MdFile | null> {
return safeReadMd(path.join(WIKI, "entities", cls, `${id}.md`));
}
export async function readVideo(id: string): Promise<MdFile | null> {
return safeReadMd(path.join(WIKI, "videos", `${id}.md`));
}
export async function readImageDirect(id: string): Promise<MdFile | null> {
return safeReadMd(path.join(WIKI, "images-direct", `${id}.md`));
}
export async function readTable(tableId: string): Promise<{ md: MdFile | null; csv: string[][] | null }> {
const md = await safeReadMd(path.join(WIKI, "tables", `${tableId}.md`));
const csvPath = path.join(PROCESSING, "tables", `${tableId}.csv`);
let csv: string[][] | null = null;
try {
const raw = await fs.readFile(csvPath, "utf-8");
csv = parseCsv(raw);
} catch {}
return { md, csv };
}
function parseCsv(text: string): string[][] {
// Minimal CSV parser with quoted-field support
const rows: string[][] = [];
let row: string[] = [];
let field = "";
let inQ = false;
for (let i = 0; i < text.length; i++) {
const c = text[i];
if (inQ) {
if (c === '"' && text[i + 1] === '"') {
field += '"';
i++;
} else if (c === '"') {
inQ = false;
} else {
field += c;
}
} else {
if (c === '"') inQ = true;
else if (c === ",") {
row.push(field);
field = "";
} else if (c === "\n") {
row.push(field);
rows.push(row);
row = [];
field = "";
} else if (c === "\r") {
// skip
} else {
field += c;
}
}
}
if (field.length > 0 || row.length > 0) {
row.push(field);
rows.push(row);
}
return rows;
}
export function entityClassFromPath(filePath: string): EntityClass | null {
const m = filePath.match(/entities\/(people|organizations|locations|events|uap-objects|vehicles|operations|concepts)\//);
return m ? (m[1] as EntityClass) : null;
}
export function classKeyToFolder(key: string): EntityClass | null {
const map: Record<string, EntityClass> = {
person: "people",
people: "people",
organization: "organizations",
organizations: "organizations",
location: "locations",
locations: "locations",
event: "events",
events: "events",
uap_object: "uap-objects",
"uap-object": "uap-objects",
"uap-objects": "uap-objects",
vehicle: "vehicles",
vehicles: "vehicles",
operation: "operations",
operations: "operations",
concept: "concepts",
concepts: "concepts",
};
return map[key] ?? null;
}