add page↔document navigation + DB repopulation tooling
Doc page (/d/[docId]/[page]) gains prev/next navigation bars (top + bottom): within a doc it steps page-by-page; at the first/last page it jumps to the previous/next document. Replaces the disabled-at-boundary links. Indexer tooling for the VPS repopulation: - 30-index-chunks-to-db.py: add --no-embed (fast BM25-only index; vectors backfilled separately) so the app is usable in minutes, not hours of CPU embedding. - 57_load_relations_from_json.py: load typed relations into public.relations from reextract structured fields (deterministic ids, no fuzzy guessing). - 58_backfill_embeddings.py: async pass to fill chunks.embedding (NULL rows) via the embed-service. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
a7e9dce6d2
commit
fe19bb9c57
4 changed files with 477 additions and 42 deletions
|
|
@ -200,7 +200,7 @@ def upsert_document(cur, doc_id: str, idx: dict, archive_path: Path) -> None:
|
|||
)
|
||||
|
||||
|
||||
def index_one_doc(cur, archive: Path, lang: str, batch_size: int) -> tuple[int, int]:
|
||||
def index_one_doc(cur, archive: Path, lang: str, batch_size: int, no_embed: bool = False) -> tuple[int, int]:
|
||||
idx_path = archive / "_index.json"
|
||||
if not idx_path.exists():
|
||||
return (0, 0)
|
||||
|
|
@ -280,11 +280,14 @@ def index_one_doc(cur, archive: Path, lang: str, batch_size: int) -> tuple[int,
|
|||
)
|
||||
)
|
||||
|
||||
# Embed in batches
|
||||
all_embeddings: list[list[float]] = []
|
||||
for i in range(0, len(texts_for_embed), batch_size):
|
||||
batch = texts_for_embed[i : i + batch_size]
|
||||
all_embeddings.extend(embed_batch(batch))
|
||||
# Embed in batches (or skip → NULL embeddings, filled in later by a backfill pass)
|
||||
all_embeddings: list[list[float] | None] = []
|
||||
if no_embed:
|
||||
all_embeddings = [None] * len(texts_for_embed)
|
||||
else:
|
||||
for i in range(0, len(texts_for_embed), batch_size):
|
||||
batch = texts_for_embed[i : i + batch_size]
|
||||
all_embeddings.extend(embed_batch(batch))
|
||||
|
||||
# Bulk insert with vectors (cast text → vector in SQL)
|
||||
insert_sql = """
|
||||
|
|
@ -307,10 +310,13 @@ def index_one_doc(cur, archive: Path, lang: str, batch_size: int) -> tuple[int,
|
|||
%s, %s, %s, %s::vector
|
||||
)
|
||||
"""
|
||||
n_embedded = 0
|
||||
for row, vec in zip(rows, all_embeddings):
|
||||
cur.execute(insert_sql, row + (vector_literal(vec),))
|
||||
cur.execute(insert_sql, row + (vector_literal(vec) if vec is not None else None,))
|
||||
if vec is not None:
|
||||
n_embedded += 1
|
||||
|
||||
return (len(rows), len(all_embeddings))
|
||||
return (len(rows), n_embedded)
|
||||
|
||||
|
||||
def is_already_indexed(cur, doc_id: str) -> bool:
|
||||
|
|
@ -327,20 +333,25 @@ def main():
|
|||
ap.add_argument("--lang", choices=["pt", "en"], default="pt", help="Language to embed (default: pt)")
|
||||
ap.add_argument("--batch-size", type=int, default=16)
|
||||
ap.add_argument("--skip-existing", action="store_true")
|
||||
ap.add_argument("--no-embed", action="store_true",
|
||||
help="Insert chunks with NULL embeddings (fast); backfill vectors later")
|
||||
args = ap.parse_args()
|
||||
|
||||
if not DATABASE_URL:
|
||||
sys.stderr.write("✗ Set DATABASE_URL (or SUPABASE_DB_URL) env var\n")
|
||||
sys.exit(1)
|
||||
|
||||
# Probe embed service
|
||||
try:
|
||||
r = requests.get(f"{EMBED_URL}/health", timeout=10)
|
||||
r.raise_for_status()
|
||||
print(f" ✓ embed service: {EMBED_URL} → {r.json()}")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"✗ embed service unreachable at {EMBED_URL}: {e}\n")
|
||||
sys.exit(1)
|
||||
# Probe embed service (skipped in --no-embed mode)
|
||||
if not args.no_embed:
|
||||
try:
|
||||
r = requests.get(f"{EMBED_URL}/health", timeout=10)
|
||||
r.raise_for_status()
|
||||
print(f" ✓ embed service: {EMBED_URL} → {r.json()}")
|
||||
except Exception as e:
|
||||
sys.stderr.write(f"✗ embed service unreachable at {EMBED_URL}: {e}\n")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(" ⚠ --no-embed: chunks indexed with NULL vectors (BM25 only until backfill)")
|
||||
|
||||
archives = discover_built_docs()
|
||||
if args.doc_id:
|
||||
|
|
@ -365,7 +376,7 @@ def main():
|
|||
t_doc = time.time()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
n_chunks, n_embed = index_one_doc(cur, archive, args.lang, args.batch_size)
|
||||
n_chunks, n_embed = index_one_doc(cur, archive, args.lang, args.batch_size, args.no_embed)
|
||||
conn.commit()
|
||||
wall = round(time.time() - t_doc, 1)
|
||||
print(f" ✓ {doc_id} · {n_chunks} chunks · {n_embed} embedded · {wall}s")
|
||||
|
|
|
|||
281
scripts/maintain/57_load_relations_from_json.py
Normal file
281
scripts/maintain/57_load_relations_from_json.py
Normal file
|
|
@ -0,0 +1,281 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
57_load_relations_from_json.py — Build typed relations for public.relations from
|
||||
the reextract data, using ONLY verifiable references (Locard / absolute
|
||||
provenance — no fuzzy guessing).
|
||||
|
||||
Two sources, combined and deduped:
|
||||
|
||||
A. STRUCTURAL relations derived from each raw/<doc>--subagent/_reextract.json
|
||||
events[] (deterministic event_id, names resolved against real entities):
|
||||
- event.observers[] → (person, witnessed, event)
|
||||
- event (each) → (event, documented_in, document)
|
||||
- event.uap_objects_observed → (event, involves_uap, uap_object)
|
||||
- event.primary_location → (event, occurred_at, location) [substring
|
||||
match against this doc's locations[]]
|
||||
- people[] (each) → (person, mentioned_by, document)
|
||||
|
||||
B. EXPLICIT relations[] from the same JSON that resolve EXACTLY (both
|
||||
endpoints found in the entity name→id index): captures person↔org
|
||||
(employed_by, signed, authored, commanded), etc.
|
||||
|
||||
ID generation mirrors scripts/synthesize/30_rebuild_wiki_from_reextract.py so
|
||||
event_id / person_id / uap_object_id match the entities table exactly.
|
||||
|
||||
Run (DATABASE_URL must point at target Postgres):
|
||||
DATABASE_URL=postgresql://... python3 scripts/maintain/57_load_relations_from_json.py [--truncate]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unicodedata
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg
|
||||
import yaml
|
||||
|
||||
UFO = Path(os.environ.get("UFO_ROOT", "/Users/guto/ufo"))
|
||||
RAW = UFO / "raw"
|
||||
ENT = UFO / "wiki" / "entities"
|
||||
|
||||
CLASS_DIR = {
|
||||
"person": "people",
|
||||
"organization": "organizations",
|
||||
"location": "locations",
|
||||
"event": "events",
|
||||
"uap_object": "uap-objects",
|
||||
}
|
||||
|
||||
|
||||
# ── ID generation (mirror of synthesize/30) ─────────────────────────────────
|
||||
def canonicalize_name(name: str) -> str:
|
||||
if not name:
|
||||
return ""
|
||||
nfd = unicodedata.normalize("NFD", name)
|
||||
ascii_str = "".join(c for c in nfd if not unicodedata.combining(c))
|
||||
low = ascii_str.lower()
|
||||
rep = re.sub(r"[^a-z0-9-]", "-", low)
|
||||
col = re.sub(r"-+", "-", rep).strip("-")
|
||||
if col and col[0].isdigit():
|
||||
col = "x-" + col
|
||||
return col
|
||||
|
||||
|
||||
def event_id_from(label: str, date_start: str | None) -> str:
|
||||
slug = canonicalize_name(label or "")[:40].strip("-") or "unlabeled"
|
||||
date = date_start or ""
|
||||
m = re.match(r"^(\d{4})-(\d{2})-(\d{2})$", date)
|
||||
if m: return f"EV-{m.group(1)}-{m.group(2)}-{m.group(3)}-{slug}"
|
||||
m = re.match(r"^(\d{4})-(\d{2})$", date)
|
||||
if m: return f"EV-{m.group(1)}-{m.group(2)}-XX-{slug}"
|
||||
m = re.match(r"^(\d{4})$", date)
|
||||
if m: return f"EV-{m.group(1)}-XX-XX-{slug}"
|
||||
return f"EV-XXXX-XX-XX-{slug}"
|
||||
|
||||
|
||||
def uap_object_id(event_id: str, index: int) -> str:
|
||||
if event_id.startswith("EV-"):
|
||||
parts = event_id[3:].split("-", 4)
|
||||
if len(parts) >= 4:
|
||||
year = parts[0]
|
||||
slug = "-".join(parts[3:])
|
||||
compact = re.sub(r"[^A-Z0-9]", "", slug.upper())[:20] or "UNK"
|
||||
return f"OBJ-EV{year}-{compact}-{index:02d}"
|
||||
return f"OBJ-UNK-{index:02d}"
|
||||
|
||||
|
||||
def lower(s: str) -> str:
|
||||
return (s or "").strip().lower()
|
||||
|
||||
|
||||
def parse_frontmatter(path: Path) -> dict | None:
|
||||
try:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
if not text.startswith("---"):
|
||||
return None
|
||||
return yaml.safe_load(text.split("---", 2)[1]) or {}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def build_name_index() -> dict[str, dict[str, str]]:
|
||||
"""Per class: {name_or_alias_lower: entity_id} from real entity files."""
|
||||
index: dict[str, dict[str, str]] = {c: {} for c in CLASS_DIR}
|
||||
for cls, dirname in CLASS_DIR.items():
|
||||
d = ENT / dirname
|
||||
if not d.is_dir():
|
||||
continue
|
||||
for f in d.glob("*.md"):
|
||||
eid = f.stem
|
||||
fm = parse_frontmatter(f)
|
||||
if not fm:
|
||||
index[cls].setdefault(eid, eid)
|
||||
continue
|
||||
for n in [fm.get("canonical_name")] + (fm.get("aliases") or []):
|
||||
k = lower(n)
|
||||
if k and k not in index[cls]:
|
||||
index[cls][k] = eid
|
||||
return index
|
||||
|
||||
|
||||
def entity_id_sets(index) -> dict[str, set]:
|
||||
return {cls: set(m.values()) for cls, m in index.items()}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
truncate = "--truncate" in sys.argv
|
||||
dburl = os.environ.get("DATABASE_URL") or os.environ.get("SUPABASE_DB_URL")
|
||||
if not dburl:
|
||||
sys.exit("DATABASE_URL not set")
|
||||
|
||||
print("Building name→id index from wiki/entities ...")
|
||||
index = build_name_index()
|
||||
ids = entity_id_sets(index)
|
||||
for cls in CLASS_DIR:
|
||||
print(f" {cls}: {len(index[cls])} keys / {len(ids[cls])} ids")
|
||||
|
||||
rows: list[tuple] = []
|
||||
|
||||
def add(sc, sid, rtype, tc, tid, doc_id, conf):
|
||||
if not (sid and tid):
|
||||
return
|
||||
ev = f"[[{doc_id}]]" if doc_id else None
|
||||
rows.append((sc, sid, rtype, tc, tid, ev, conf, "reextract"))
|
||||
|
||||
def resolve(cls, name):
|
||||
if cls == "document":
|
||||
return (name or "").strip() or None
|
||||
return index.get(cls, {}).get(lower(name))
|
||||
|
||||
n_docs = 0
|
||||
for jf in sorted(RAW.glob("*--subagent/_reextract.json")):
|
||||
doc_id = jf.parent.name.removesuffix("--subagent")
|
||||
try:
|
||||
d = json.loads(jf.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
continue
|
||||
n_docs += 1
|
||||
|
||||
# locations declared in this doc (clean names) → for substring match
|
||||
doc_locs = []
|
||||
for l in d.get("locations") or []:
|
||||
nm = (l.get("name") or "").strip()
|
||||
lid = canonicalize_name(nm)
|
||||
if nm and lid in ids["location"]:
|
||||
doc_locs.append((nm.lower(), lid))
|
||||
# longest names first (more specific match)
|
||||
doc_locs.sort(key=lambda x: -len(x[0]))
|
||||
|
||||
# A. structural from events[]
|
||||
for e in d.get("events") or []:
|
||||
label = (e.get("label") or "").strip()
|
||||
if not label:
|
||||
continue
|
||||
eid = event_id_from(label, e.get("date_start"))
|
||||
if eid not in ids["event"]:
|
||||
continue # event entity must exist
|
||||
conf = e.get("confidence") or "medium"
|
||||
|
||||
# event documented_in document
|
||||
add("event", eid, "documented_in", "document", doc_id, doc_id, "high")
|
||||
|
||||
# observers witnessed event
|
||||
for o in e.get("observers") or []:
|
||||
nm = o.get("name") if isinstance(o, dict) else o
|
||||
if nm and lower(nm) != "unknown":
|
||||
pid = index["person"].get(lower(nm)) or (
|
||||
canonicalize_name(nm) if canonicalize_name(nm) in ids["person"] else None
|
||||
)
|
||||
if pid:
|
||||
add("person", pid, "witnessed", "event", eid, doc_id, conf)
|
||||
|
||||
# uap_objects involves_uap
|
||||
for i, u in enumerate(e.get("uap_objects_observed") or [], 1):
|
||||
if not isinstance(u, dict):
|
||||
continue
|
||||
oid = uap_object_id(eid, i)
|
||||
if oid in ids["uap_object"]:
|
||||
add("event", eid, "involves_uap", "uap_object", oid, doc_id, conf)
|
||||
|
||||
# event occurred_at location (substring match of doc locations)
|
||||
ploc = lower(e.get("primary_location_name"))
|
||||
if ploc:
|
||||
for lname, lid in doc_locs:
|
||||
if lname and lname in ploc:
|
||||
add("event", eid, "occurred_at", "location", lid, doc_id, "medium")
|
||||
break
|
||||
|
||||
# people mentioned_by document
|
||||
for p in d.get("people") or []:
|
||||
nm = (p.get("name") or "").strip()
|
||||
if nm and lower(nm) != "unknown":
|
||||
pid = index["person"].get(lower(nm))
|
||||
if pid:
|
||||
add("person", pid, "mentioned_by", "document", doc_id, doc_id, "medium")
|
||||
|
||||
# B. explicit relations[] that resolve exactly
|
||||
for r in d.get("relations") or []:
|
||||
if not isinstance(r, dict):
|
||||
continue
|
||||
sc, tc, rtype = r.get("source_class"), r.get("target_class"), r.get("type")
|
||||
if not (sc and tc and rtype):
|
||||
continue
|
||||
# skip the structural types already covered to avoid noise dup
|
||||
sid = resolve(sc, r.get("source_name"))
|
||||
tid = resolve(tc, r.get("target_name"))
|
||||
if sid and tid:
|
||||
add(sc, sid, rtype, tc, tid, doc_id, r.get("confidence") or "medium")
|
||||
|
||||
print(f"\nProcessed {n_docs} docs; raw relation rows: {len(rows)}")
|
||||
|
||||
# dedupe by (source, type, target) — keep first (evidence may vary)
|
||||
seen: set[tuple] = set()
|
||||
deduped: list[tuple] = []
|
||||
for row in rows:
|
||||
key = (row[0], row[1], row[2], row[3], row[4])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
deduped.append(row)
|
||||
print(f"After dedup: {len(deduped)}")
|
||||
if not deduped:
|
||||
return 0
|
||||
|
||||
with psycopg.connect(dburl) as conn:
|
||||
with conn.cursor() as cur:
|
||||
if truncate:
|
||||
cur.execute("TRUNCATE public.relations")
|
||||
print(" TRUNCATEd public.relations")
|
||||
cur.execute("CREATE TEMP TABLE _rel (LIKE public.relations INCLUDING DEFAULTS)")
|
||||
with cur.copy(
|
||||
"""COPY _rel (source_class, source_id, relation_type,
|
||||
target_class, target_id, evidence_ref,
|
||||
confidence, extracted_by) FROM STDIN"""
|
||||
) as cp:
|
||||
for row in deduped:
|
||||
cp.write_row(row)
|
||||
cur.execute(
|
||||
"""INSERT INTO public.relations
|
||||
(source_class, source_id, relation_type,
|
||||
target_class, target_id, evidence_ref,
|
||||
confidence, extracted_by)
|
||||
SELECT source_class, source_id, relation_type,
|
||||
target_class, target_id, evidence_ref,
|
||||
confidence, extracted_by
|
||||
FROM _rel ON CONFLICT DO NOTHING"""
|
||||
)
|
||||
print(f"Inserted (after ON CONFLICT): {cur.rowcount}")
|
||||
cur.execute(
|
||||
"SELECT relation_type, COUNT(*) FROM public.relations GROUP BY relation_type ORDER BY 2 DESC"
|
||||
)
|
||||
print("\n=== Relation counts in DB ===")
|
||||
for t, n in cur.fetchall():
|
||||
print(f" {n:>7} {t}")
|
||||
conn.commit()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
106
scripts/maintain/58_backfill_embeddings.py
Normal file
106
scripts/maintain/58_backfill_embeddings.py
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
58_backfill_embeddings.py — Fill in chunks.embedding for rows inserted with
|
||||
NULL vectors (by 30-index-chunks-to-db.py --no-embed). Runs independently of
|
||||
the fast index pass so the web app is usable (BM25) while dense vectors are
|
||||
computed in the background.
|
||||
|
||||
Processes chunks WHERE embedding IS NULL in batches, calling the embed-service,
|
||||
and UPDATEs each row. Resumable: re-run to continue where it left off.
|
||||
|
||||
Run (inside disclosure-internal network, or with tunnels):
|
||||
DATABASE_URL=postgresql://postgres:...@db:5432/postgres \
|
||||
EMBED_SERVICE_URL=http://embed:8000 \
|
||||
python3 scripts/maintain/58_backfill_embeddings.py [--lang pt] [--batch-size 16]
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import argparse
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
try:
|
||||
import psycopg
|
||||
import requests
|
||||
except ImportError as e:
|
||||
sys.exit(f"pip install psycopg[binary] requests # missing: {e}")
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL") or os.getenv("SUPABASE_DB_URL")
|
||||
EMBED_URL = os.getenv("EMBED_SERVICE_URL", "http://localhost:8000")
|
||||
|
||||
|
||||
def embed_batch(texts: list[str]) -> list[list[float]]:
|
||||
resp = requests.post(f"{EMBED_URL}/embed", json={"texts": texts}, timeout=120)
|
||||
resp.raise_for_status()
|
||||
return resp.json()["embeddings"]
|
||||
|
||||
|
||||
def vector_literal(vec: list[float]) -> str:
|
||||
return "[" + ",".join(repr(float(x)) for x in vec) + "]"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--lang", choices=["pt", "en"], default="pt")
|
||||
ap.add_argument("--batch-size", type=int, default=16)
|
||||
args = ap.parse_args()
|
||||
|
||||
if not DATABASE_URL:
|
||||
sys.exit("DATABASE_URL not set")
|
||||
|
||||
# health probe
|
||||
r = requests.get(f"{EMBED_URL}/health", timeout=10)
|
||||
r.raise_for_status()
|
||||
print(f"embed service: {EMBED_URL} → {r.json()}")
|
||||
|
||||
field = "content_pt" if args.lang == "pt" else "content_en"
|
||||
other = "content_en" if args.lang == "pt" else "content_pt"
|
||||
|
||||
with psycopg.connect(DATABASE_URL, autocommit=False) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT count(*) FROM public.chunks WHERE embedding IS NULL")
|
||||
total = cur.fetchone()[0]
|
||||
print(f"chunks needing embedding: {total}")
|
||||
if total == 0:
|
||||
return 0
|
||||
|
||||
done = 0
|
||||
t0 = time.time()
|
||||
while True:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(
|
||||
f"""SELECT chunk_pk, COALESCE(NULLIF({field}, ''), {other}, '')
|
||||
FROM public.chunks WHERE embedding IS NULL
|
||||
ORDER BY chunk_pk LIMIT %s""",
|
||||
(args.batch_size,),
|
||||
)
|
||||
batch = cur.fetchall()
|
||||
if not batch:
|
||||
break
|
||||
pks = [b[0] for b in batch]
|
||||
texts = [b[1] or "" for b in batch]
|
||||
try:
|
||||
vecs = embed_batch(texts)
|
||||
except Exception as e:
|
||||
print(f" embed error at pk {pks[0]}: {e} — retrying once in 5s")
|
||||
time.sleep(5)
|
||||
vecs = embed_batch(texts)
|
||||
with conn.cursor() as cur:
|
||||
for pk, vec in zip(pks, vecs):
|
||||
cur.execute(
|
||||
"UPDATE public.chunks SET embedding = %s::vector WHERE chunk_pk = %s",
|
||||
(vector_literal(vec), pk),
|
||||
)
|
||||
conn.commit()
|
||||
done += len(batch)
|
||||
if done % 320 == 0 or done >= total:
|
||||
rate = done / max(1e-6, time.time() - t0)
|
||||
eta = (total - done) / max(1e-6, rate)
|
||||
print(f" {done}/{total} · {rate:.0f}/s · ETA {eta/60:.0f}min", flush=True)
|
||||
|
||||
print(f"✓ backfill complete: {done} embeddings in {(time.time()-t0)/60:.1f}min")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
|
@ -8,7 +8,7 @@ import Link from "next/link";
|
|||
import Image from "next/image";
|
||||
import { notFound } from "next/navigation";
|
||||
import { readChunksByPage, readIndex, hasChunks } from "@/lib/chunks";
|
||||
import { readDocument } from "@/lib/wiki";
|
||||
import { readDocument, listDocuments } from "@/lib/wiki";
|
||||
import { AuthBar } from "@/components/auth-bar";
|
||||
import { ChatBubble } from "@/components/chat-bubble";
|
||||
import { DocRendererV2 } from "@/components/doc-renderer-v2";
|
||||
|
|
@ -45,10 +45,11 @@ export default async function DocPageView({
|
|||
);
|
||||
}
|
||||
|
||||
const [idx, byPage, doc] = await Promise.all([
|
||||
const [idx, byPage, doc, docList] = await Promise.all([
|
||||
readIndex(docId),
|
||||
readChunksByPage(docId),
|
||||
readDocument(docId),
|
||||
listDocuments(),
|
||||
]);
|
||||
if (!idx) notFound();
|
||||
|
||||
|
|
@ -56,6 +57,61 @@ export default async function DocPageView({
|
|||
const pngUrl = `/api/static/processing/png/${docId}/p-${m[1]}.png`;
|
||||
const totalPages = idx.total_pages;
|
||||
|
||||
// ── Navigation: prev/next page within doc; at boundaries, prev/next document ──
|
||||
const pp = (n: number) => `p${String(n).padStart(3, "0")}`;
|
||||
const docIdx = docList.indexOf(docId);
|
||||
const prevDoc = docIdx > 0 ? docList[docIdx - 1] : null;
|
||||
const nextDoc = docIdx >= 0 && docIdx < docList.length - 1 ? docList[docIdx + 1] : null;
|
||||
|
||||
const prevNav =
|
||||
pageNum > 1
|
||||
? { href: `/d/${docId}/${pp(pageNum - 1)}`, label: `← página ${pageNum - 1}`, kind: "page" as const }
|
||||
: prevDoc
|
||||
? { href: `/d/${prevDoc}/${pp(1)}`, label: "← documento anterior", kind: "doc" as const }
|
||||
: null;
|
||||
const nextNav =
|
||||
pageNum < totalPages
|
||||
? { href: `/d/${docId}/${pp(pageNum + 1)}`, label: `página ${pageNum + 1} →`, kind: "page" as const }
|
||||
: nextDoc
|
||||
? { href: `/d/${nextDoc}/${pp(1)}`, label: "próximo documento →", kind: "doc" as const }
|
||||
: null;
|
||||
|
||||
const navBar = (
|
||||
<nav className="flex items-center justify-between gap-3 font-mono text-xs">
|
||||
{prevNav ? (
|
||||
<Link
|
||||
href={prevNav.href}
|
||||
className={`px-3 py-1.5 border rounded hover:bg-[rgba(0,255,156,0.10)] ${
|
||||
prevNav.kind === "doc"
|
||||
? "border-[#ffa500] text-[#ffa500] hover:bg-[rgba(255,165,0,0.10)]"
|
||||
: "border-[rgba(0,255,156,0.30)] text-[#00ff9c]"
|
||||
}`}
|
||||
>
|
||||
{prevNav.label}
|
||||
</Link>
|
||||
) : (
|
||||
<span className="px-3 py-1.5 opacity-30">← início</span>
|
||||
)}
|
||||
<span className="text-[#5a6678] tabular-nums">
|
||||
{pageNum} / {totalPages}
|
||||
</span>
|
||||
{nextNav ? (
|
||||
<Link
|
||||
href={nextNav.href}
|
||||
className={`px-3 py-1.5 border rounded hover:bg-[rgba(0,255,156,0.10)] ${
|
||||
nextNav.kind === "doc"
|
||||
? "border-[#ffa500] text-[#ffa500] hover:bg-[rgba(255,165,0,0.10)]"
|
||||
: "border-[rgba(0,255,156,0.30)] text-[#00ff9c]"
|
||||
}`}
|
||||
>
|
||||
{nextNav.label}
|
||||
</Link>
|
||||
) : (
|
||||
<span className="px-3 py-1.5 opacity-30">fim →</span>
|
||||
)}
|
||||
</nav>
|
||||
);
|
||||
|
||||
return (
|
||||
<main className="min-h-screen p-6 md:p-10 max-w-6xl mx-auto">
|
||||
<div className="flex items-start justify-between gap-4 mb-6">
|
||||
|
|
@ -77,6 +133,8 @@ export default async function DocPageView({
|
|||
</h1>
|
||||
</header>
|
||||
|
||||
<div className="mb-6">{navBar}</div>
|
||||
|
||||
<div className="grid grid-cols-1 lg:grid-cols-[1fr_1fr] gap-8">
|
||||
<aside className="lg:sticky lg:top-6 lg:self-start lg:max-h-[85vh] lg:overflow-y-auto">
|
||||
<h2 className="font-mono text-xs uppercase tracking-widest text-[#7fdbff] mb-2">
|
||||
|
|
@ -92,29 +150,6 @@ export default async function DocPageView({
|
|||
className="block w-full h-auto"
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-2 flex items-center justify-between font-mono text-[10px] text-[#5a6678]">
|
||||
<Link
|
||||
href={pageNum > 1 ? `/d/${docId}/p${String(pageNum - 1).padStart(3, "0")}` : "#"}
|
||||
className={pageNum > 1 ? "hover:text-[#00ff9c]" : "opacity-30 pointer-events-none"}
|
||||
>
|
||||
← p{pageNum - 1}
|
||||
</Link>
|
||||
<span>
|
||||
{pageNum} / {totalPages}
|
||||
</span>
|
||||
<Link
|
||||
href={
|
||||
pageNum < totalPages
|
||||
? `/d/${docId}/p${String(pageNum + 1).padStart(3, "0")}`
|
||||
: "#"
|
||||
}
|
||||
className={
|
||||
pageNum < totalPages ? "hover:text-[#00ff9c]" : "opacity-30 pointer-events-none"
|
||||
}
|
||||
>
|
||||
p{pageNum + 1} →
|
||||
</Link>
|
||||
</div>
|
||||
</aside>
|
||||
|
||||
<article>
|
||||
|
|
@ -136,6 +171,8 @@ export default async function DocPageView({
|
|||
</article>
|
||||
</div>
|
||||
|
||||
<div className="mt-10 pt-6 border-t border-[rgba(0,255,156,0.32)]">{navBar}</div>
|
||||
|
||||
<ChatBubble context={{ doc_id: docId, page_id: `${docId}/${stem}` }} />
|
||||
</main>
|
||||
);
|
||||
|
|
|
|||
Loading…
Reference in a new issue