disclosure-bureau/scripts/07-test-agent.py

279 lines
10 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
"""
07-test-agent.py Minimal chat-agent CLI that validates the schema end-to-end
Simulates one chat-bubble round trip:
1. User asks a free-text query.
2. Agent walks wiki/ + case/ and collects relevant context markdowns.
3. Calls Claude Haiku (via claude CLI OAuth same path as 02-vision-page.py)
with a system prompt that asks for STRUCTURED output:
{
"answer_en": "...",
"answer_pt_br": "...",
"citations": [
{
"kind": "page|crop|entity",
"page_id": "doc-id/pNNN", # for kind=page/crop
"entity_link": "[[loc/.../...]]", # for kind=entity
"png_url": "/static/png/doc-id/p-NNN.png",
"crop_url": "/static/crops/doc-id/CROP-ID.png", # if available
"bbox": {"x": .., "y": .., "w": .., "h": ..}, # if applicable
"snippet_en": "...",
"snippet_pt_br": "..."
}
]
}
4. Renders the JSON pretty-printed so the schema-to-UI contract is visible.
This is NOT the production agent it's a smoke test that proves the wiki
schema carries everything the future chat UI will need (citations at page +
bbox, bilingual snippets, crop URLs).
Usage:
./07-test-agent.py "What UAP was observed in the Mediterranean?"
./07-test-agent.py --max-context 20 "How many redacted pages does D54 have?"
"""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
try:
import yaml
except ImportError:
sys.stderr.write("Missing pyyaml. Run: pip3 install pyyaml\n")
sys.exit(1)
UFO_ROOT = Path("/Users/guto/ufo")
WIKI_BASE = UFO_ROOT / "wiki"
CASE_BASE = UFO_ROOT / "case"
PNG_BASE = UFO_ROOT / "processing" / "png"
CROPS_BASE = UFO_ROOT / "processing" / "crops"
MODEL = "haiku"
MAX_TURNS = 3
DEFAULT_MAX_CONTEXT_FILES = 12
# Future server prefixes (placeholder; real server resolves these to actual paths)
PNG_URL_PREFIX = "/static/png"
CROP_URL_PREFIX = "/static/crops"
def utc_now_iso() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def read_md(path: Path) -> tuple[dict, str]:
c = path.read_text(encoding="utf-8")
if not c.startswith("---"):
return {}, c
end = c.find("---", 4)
if end == -1:
return {}, c
try:
return (yaml.safe_load(c[3:end].strip()) or {}), c[end + 3 :]
except yaml.YAMLError:
return {}, c[end + 3 :]
def tokenize(text: str) -> set[str]:
return {t.lower() for t in re.findall(r"[a-zA-Z0-9À-ſ]{3,}", text or "")}
def score_file(query_tokens: set[str], file_text: str, file_fm: dict) -> float:
"""Trivial keyword-overlap score; good enough for smoke test."""
file_tokens = tokenize(file_text)
# Boost: include canonical_name and aliases in search tokens
if file_fm:
for k in ("canonical_name", "canonical_title", "aliases"):
v = file_fm.get(k)
if isinstance(v, str):
file_tokens |= tokenize(v)
elif isinstance(v, list):
for it in v:
if isinstance(it, str):
file_tokens |= tokenize(it)
if not file_tokens:
return 0.0
overlap = len(query_tokens & file_tokens)
return overlap / max(1, len(query_tokens))
def gather_context(query: str, max_files: int) -> list[Path]:
"""Return list of markdown paths most relevant to the query, by keyword overlap."""
q_tokens = tokenize(query)
scored: list[tuple[float, Path]] = []
for base in (WIKI_BASE, CASE_BASE):
for p in base.rglob("*.md"):
if p.name == "graph.json":
continue
try:
fm, body = read_md(p)
except Exception:
continue
score = score_file(q_tokens, body, fm)
if score > 0:
scored.append((score, p))
scored.sort(key=lambda x: -x[0])
return [p for _, p in scored[:max_files]]
def crop_url_for(image_id: str) -> str:
"""Return URL for a crop image."""
# image_id format: IMG-DOCSHORT-pNNN-NN, TBL-..., SIG-...
# Convert to file path: processing/crops/<doc-id>/<image_id>.png
# but doc-id is encoded compactly in the crop_id. We need to scan instead.
matches = list(CROPS_BASE.rglob(f"{image_id}.png"))
if matches:
rel = matches[0].relative_to(UFO_ROOT / "processing" / "crops")
return f"{CROP_URL_PREFIX}/{rel}"
return ""
def page_url_for(page_id: str) -> str:
"""page_id format: <doc-id>/pNNN. PNG file: processing/png/<doc-id>/p-NNN.png"""
m = re.match(r"^(.+)/p(\d{3})$", page_id)
if not m:
return ""
doc_id, num = m.group(1), m.group(2)
return f"{PNG_URL_PREFIX}/{doc_id}/p-{num}.png"
def build_system_prompt() -> str:
return """You are a research assistant for the war.gov/ufo UAP/UFO document corpus.
The user asks a question. You receive a set of markdown files from a curated wiki (Karpathy-style LLM wiki) plus case-investigation artifacts. Each file's frontmatter carries strict provenance: doc_id, page_id, bbox coordinates, classifications, etc. Body text is bilingual (EN + PT-BR).
Your output MUST be a single JSON object with this exact shape (no markdown fence, no commentary, no preamble):
{
"answer_en": "2-5 sentence English answer grounded in the provided files. Every factual claim must be traceable to a citation below.",
"answer_pt_br": "Same answer translated to Brazilian Portuguese (pt-br). Use Brazilian vocabulary and spelling.",
"citations": [
{
"kind": "page",
"page_id": "doc-id/pNNN",
"snippet_en": "short verbatim or near-verbatim excerpt supporting the claim (English)",
"snippet_pt_br": "same in Brazilian Portuguese",
"bbox": null
},
{
"kind": "entity",
"entity_link": "[[loc/aegean-sea-off-santorini-greece]] or similar wiki-link",
"snippet_en": "...",
"snippet_pt_br": "..."
}
]
}
Rules:
- ONLY cite files that you were given. Do not invent page_ids or entity links.
- snippet_en and snippet_pt_br must be SHORT (1-2 sentences each).
- Brazilian Portuguese only for *_pt_br fields. Preserve UTF-8 accents.
- Verbatim quotes FROM the source documents stay in their original language (English) inside snippets only the surrounding narrative is translated.
- If no file supports an answer, return: {"answer_en":"Insufficient evidence in corpus.","answer_pt_br":"Evidências insuficientes no corpus.","citations":[]}
- Output ONLY the JSON. No fence."""
def call_claude(user_prompt: str, system_prompt: str) -> dict:
cmd = [
"claude",
"-p",
"--model", MODEL,
"--output-format", "json",
"--max-turns", str(MAX_TURNS),
"--allowedTools", "Read",
"--add-dir", str(UFO_ROOT),
"--append-system-prompt", system_prompt,
"--",
user_prompt,
]
res = subprocess.run(cmd, capture_output=True, text=True, timeout=600, check=False)
if res.returncode != 0:
raise RuntimeError(f"claude CLI failed (rc={res.returncode}): {res.stderr[-1000:]}")
if not res.stdout.strip():
raise RuntimeError(f"claude CLI returned empty stdout. stderr: {res.stderr[-1000:]}")
try:
cli = json.loads(res.stdout)
except json.JSONDecodeError as e:
raise RuntimeError(f"claude CLI returned invalid JSON: {e}. stdout[:500]={res.stdout[:500]!r}")
if cli.get("is_error"):
raise RuntimeError(f"claude reported error: {cli.get('result','')[:500]}")
result = cli.get("result", "").strip()
if result.startswith("```"):
result = re.sub(r"^```(?:json)?\s*", "", result)
result = re.sub(r"\s*```$", "", result)
return {
"parsed": json.loads(result),
"meta": {
"duration_ms": cli.get("duration_ms"),
"total_cost_usd": cli.get("total_cost_usd"),
"session_id": cli.get("session_id"),
},
}
def enrich_citations(parsed: dict) -> dict:
"""Add png_url and crop_url to each page citation when possible."""
for cit in parsed.get("citations", []):
if cit.get("kind") == "page":
pid = cit.get("page_id", "")
cit["png_url"] = page_url_for(pid)
elif cit.get("kind") == "crop":
crop_id = cit.get("crop_id", "")
if crop_id:
cit["crop_url"] = crop_url_for(crop_id)
return parsed
def main():
ap = argparse.ArgumentParser(description="Minimal chat-agent smoke test for the UFO wiki.")
ap.add_argument("query", help="user question (in English or PT-BR)")
ap.add_argument("--max-context", type=int, default=DEFAULT_MAX_CONTEXT_FILES,
help=f"max number of markdown files to surface as context (default {DEFAULT_MAX_CONTEXT_FILES})")
args = ap.parse_args()
print(f"Query: {args.query}\n", flush=True)
print(f"Gathering context (max {args.max_context} files)...", flush=True)
context_files = gather_context(args.query, args.max_context)
for f in context_files:
print(f" - {f.relative_to(UFO_ROOT)}", flush=True)
if not context_files:
print(" (no relevant files found)", flush=True)
result = {"answer_en": "No relevant files found in the wiki.", "answer_pt_br": "Nenhum arquivo relevante encontrado.", "citations": []}
print("\n" + json.dumps(result, indent=2, ensure_ascii=False))
return
# Build user prompt: list of file paths for the agent to Read
file_list = "\n".join(f"- {p.relative_to(UFO_ROOT)}" for p in context_files)
user_prompt = (
f"User question:\n{args.query}\n\n"
f"Read the following files from /Users/guto/ufo/ "
f"(use the Read tool on each one as needed):\n{file_list}\n\n"
f"Then output the structured JSON answer per the system prompt."
)
print("\nCalling Haiku...", flush=True)
try:
out = call_claude(user_prompt, build_system_prompt())
except Exception as e:
sys.stderr.write(f"FATAL: {e}\n")
sys.exit(1)
parsed = enrich_citations(out["parsed"])
print(f"\n=== Agent reply (cost ${out['meta'].get('total_cost_usd', 0):.4f}, "
f"latency {out['meta'].get('duration_ms', 0)/1000:.1f}s) ===\n", flush=True)
print(json.dumps(parsed, indent=2, ensure_ascii=False))
if __name__ == "__main__":
main()