183 lines
4.9 KiB
TypeScript
183 lines
4.9 KiB
TypeScript
|
|
/**
|
||
|
|
* Data layer for the Disclosure Bureau wiki.
|
||
|
|
*
|
||
|
|
* Reads markdown files directly from /Users/guto/ufo/wiki and /Users/guto/ufo/processing.
|
||
|
|
* No database — markdown IS the database (Karpathy LLM-wiki pattern).
|
||
|
|
*/
|
||
|
|
import fs from "node:fs/promises";
|
||
|
|
import path from "node:path";
|
||
|
|
import matter from "gray-matter";
|
||
|
|
|
||
|
|
export const UFO_ROOT = process.env.UFO_ROOT ?? "/Users/guto/ufo";
|
||
|
|
export const WIKI = path.join(UFO_ROOT, "wiki");
|
||
|
|
export const PROCESSING = path.join(UFO_ROOT, "processing");
|
||
|
|
|
||
|
|
export type EntityClass =
|
||
|
|
| "people"
|
||
|
|
| "organizations"
|
||
|
|
| "locations"
|
||
|
|
| "events"
|
||
|
|
| "uap-objects"
|
||
|
|
| "vehicles"
|
||
|
|
| "operations"
|
||
|
|
| "concepts";
|
||
|
|
|
||
|
|
export interface Frontmatter {
|
||
|
|
[key: string]: unknown;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface MdFile {
|
||
|
|
fm: Frontmatter;
|
||
|
|
body: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readMd(absPath: string): Promise<MdFile> {
|
||
|
|
const raw = await fs.readFile(absPath, "utf-8");
|
||
|
|
const parsed = matter(raw);
|
||
|
|
return { fm: parsed.data as Frontmatter, body: parsed.content };
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function safeReadMd(absPath: string): Promise<MdFile | null> {
|
||
|
|
try {
|
||
|
|
return await readMd(absPath);
|
||
|
|
} catch {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function listDocuments(): Promise<string[]> {
|
||
|
|
const docsDir = path.join(WIKI, "documents");
|
||
|
|
try {
|
||
|
|
const files = await fs.readdir(docsDir);
|
||
|
|
return files
|
||
|
|
.filter((f) => f.endsWith(".md"))
|
||
|
|
.map((f) => f.replace(/\.md$/, ""))
|
||
|
|
.sort();
|
||
|
|
} catch {
|
||
|
|
return [];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readDocument(docId: string): Promise<MdFile | null> {
|
||
|
|
return safeReadMd(path.join(WIKI, "documents", `${docId}.md`));
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function listPages(docId: string): Promise<string[]> {
|
||
|
|
const dir = path.join(WIKI, "pages", docId);
|
||
|
|
try {
|
||
|
|
const files = await fs.readdir(dir);
|
||
|
|
return files
|
||
|
|
.filter((f) => /^p\d{3}\.md$/.test(f))
|
||
|
|
.map((f) => f.replace(/\.md$/, ""))
|
||
|
|
.sort();
|
||
|
|
} catch {
|
||
|
|
return [];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readPage(docId: string, pageStem: string): Promise<MdFile | null> {
|
||
|
|
return safeReadMd(path.join(WIKI, "pages", docId, `${pageStem}.md`));
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readOcr(docId: string, pageNum: number): Promise<string | null> {
|
||
|
|
const padded = String(pageNum).padStart(3, "0");
|
||
|
|
const ocrPath = path.join(PROCESSING, "ocr", docId, `p-${padded}.txt`);
|
||
|
|
try {
|
||
|
|
return await fs.readFile(ocrPath, "utf-8");
|
||
|
|
} catch {
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readEntity(cls: EntityClass, id: string): Promise<MdFile | null> {
|
||
|
|
return safeReadMd(path.join(WIKI, "entities", cls, `${id}.md`));
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readVideo(id: string): Promise<MdFile | null> {
|
||
|
|
return safeReadMd(path.join(WIKI, "videos", `${id}.md`));
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readImageDirect(id: string): Promise<MdFile | null> {
|
||
|
|
return safeReadMd(path.join(WIKI, "images-direct", `${id}.md`));
|
||
|
|
}
|
||
|
|
|
||
|
|
export async function readTable(tableId: string): Promise<{ md: MdFile | null; csv: string[][] | null }> {
|
||
|
|
const md = await safeReadMd(path.join(WIKI, "tables", `${tableId}.md`));
|
||
|
|
const csvPath = path.join(PROCESSING, "tables", `${tableId}.csv`);
|
||
|
|
let csv: string[][] | null = null;
|
||
|
|
try {
|
||
|
|
const raw = await fs.readFile(csvPath, "utf-8");
|
||
|
|
csv = parseCsv(raw);
|
||
|
|
} catch {}
|
||
|
|
return { md, csv };
|
||
|
|
}
|
||
|
|
|
||
|
|
function parseCsv(text: string): string[][] {
|
||
|
|
// Minimal CSV parser with quoted-field support
|
||
|
|
const rows: string[][] = [];
|
||
|
|
let row: string[] = [];
|
||
|
|
let field = "";
|
||
|
|
let inQ = false;
|
||
|
|
for (let i = 0; i < text.length; i++) {
|
||
|
|
const c = text[i];
|
||
|
|
if (inQ) {
|
||
|
|
if (c === '"' && text[i + 1] === '"') {
|
||
|
|
field += '"';
|
||
|
|
i++;
|
||
|
|
} else if (c === '"') {
|
||
|
|
inQ = false;
|
||
|
|
} else {
|
||
|
|
field += c;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
if (c === '"') inQ = true;
|
||
|
|
else if (c === ",") {
|
||
|
|
row.push(field);
|
||
|
|
field = "";
|
||
|
|
} else if (c === "\n") {
|
||
|
|
row.push(field);
|
||
|
|
rows.push(row);
|
||
|
|
row = [];
|
||
|
|
field = "";
|
||
|
|
} else if (c === "\r") {
|
||
|
|
// skip
|
||
|
|
} else {
|
||
|
|
field += c;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
if (field.length > 0 || row.length > 0) {
|
||
|
|
row.push(field);
|
||
|
|
rows.push(row);
|
||
|
|
}
|
||
|
|
return rows;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function entityClassFromPath(filePath: string): EntityClass | null {
|
||
|
|
const m = filePath.match(/entities\/(people|organizations|locations|events|uap-objects|vehicles|operations|concepts)\//);
|
||
|
|
return m ? (m[1] as EntityClass) : null;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function classKeyToFolder(key: string): EntityClass | null {
|
||
|
|
const map: Record<string, EntityClass> = {
|
||
|
|
person: "people",
|
||
|
|
people: "people",
|
||
|
|
organization: "organizations",
|
||
|
|
organizations: "organizations",
|
||
|
|
location: "locations",
|
||
|
|
locations: "locations",
|
||
|
|
event: "events",
|
||
|
|
events: "events",
|
||
|
|
uap_object: "uap-objects",
|
||
|
|
"uap-object": "uap-objects",
|
||
|
|
"uap-objects": "uap-objects",
|
||
|
|
vehicle: "vehicles",
|
||
|
|
vehicles: "vehicles",
|
||
|
|
operation: "operations",
|
||
|
|
operations: "operations",
|
||
|
|
concept: "concepts",
|
||
|
|
concepts: "concepts",
|
||
|
|
};
|
||
|
|
return map[key] ?? null;
|
||
|
|
}
|