disclosure-bureau/scripts/07-test-agent.py

#!/usr/bin/env python3
"""
07-test-agent.py — Minimal chat-agent CLI that validates the schema end-to-end

Simulates one chat-bubble round trip:
  1. User asks a free-text query.
  2. Agent walks wiki/ + case/ and collects relevant context markdowns.
  3. Calls Claude Haiku (via claude CLI OAuth — same path as 02-vision-page.py)
     with a system prompt that asks for STRUCTURED output:

        {
          "answer_en": "...",
          "answer_pt_br": "...",
          "citations": [
            {
              "kind": "page|crop|entity",
              "page_id": "doc-id/pNNN",          # for kind=page/crop
              "entity_link": "[[loc/.../...]]",  # for kind=entity
              "png_url": "/static/png/doc-id/p-NNN.png",
              "crop_url": "/static/crops/doc-id/CROP-ID.png",  # if available
              "bbox": {"x": .., "y": .., "w": .., "h": ..},     # if applicable
              "snippet_en": "...",
              "snippet_pt_br": "..."
            }
          ]
        }

  4. Renders the JSON pretty-printed so the schema-to-UI contract is visible.

This is NOT the production agent — it's a smoke test that proves the wiki
schema carries everything the future chat UI will need (citations at page +
bbox, bilingual snippets, crop URLs).

Usage:
  ./07-test-agent.py "What UAP was observed in the Mediterranean?"
  ./07-test-agent.py --max-context 20 "How many redacted pages does D54 have?"
"""
from __future__ import annotations

import argparse
import json
import re
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path

try:
    import yaml
except ImportError:
    sys.stderr.write("Missing pyyaml. Run: pip3 install pyyaml\n")
    sys.exit(1)


UFO_ROOT = Path("/Users/guto/ufo")
WIKI_BASE = UFO_ROOT / "wiki"
CASE_BASE = UFO_ROOT / "case"
PNG_BASE = UFO_ROOT / "processing" / "png"
CROPS_BASE = UFO_ROOT / "processing" / "crops"

MODEL = "haiku"
MAX_TURNS = 3
DEFAULT_MAX_CONTEXT_FILES = 12

# Future server prefixes (placeholder; real server resolves these to actual paths)
PNG_URL_PREFIX = "/static/png"
CROP_URL_PREFIX = "/static/crops"


def utc_now_iso() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def read_md(path: Path) -> tuple[dict, str]:
    c = path.read_text(encoding="utf-8")
    if not c.startswith("---"):
        return {}, c
    end = c.find("---", 4)
    if end == -1:
        return {}, c
    try:
        return (yaml.safe_load(c[3:end].strip()) or {}), c[end + 3 :]
    except yaml.YAMLError:
        return {}, c[end + 3 :]


def tokenize(text: str) -> set[str]:
    return {t.lower() for t in re.findall(r"[a-zA-Z0-9À-ſ]{3,}", text or "")}


def score_file(query_tokens: set[str], file_text: str, file_fm: dict) -> float:
    """Trivial keyword-overlap score; good enough for smoke test."""
    file_tokens = tokenize(file_text)
    # Boost: include canonical_name and aliases in search tokens
    if file_fm:
        for k in ("canonical_name", "canonical_title", "aliases"):
            v = file_fm.get(k)
            if isinstance(v, str):
                file_tokens |= tokenize(v)
            elif isinstance(v, list):
                for it in v:
                    if isinstance(it, str):
                        file_tokens |= tokenize(it)
    if not file_tokens:
        return 0.0
    overlap = len(query_tokens & file_tokens)
    return overlap / max(1, len(query_tokens))


def gather_context(query: str, max_files: int) -> list[Path]:
    """Return list of markdown paths most relevant to the query, by keyword overlap."""
    q_tokens = tokenize(query)
    scored: list[tuple[float, Path]] = []
    for base in (WIKI_BASE, CASE_BASE):
        for p in base.rglob("*.md"):
            if p.name == "graph.json":
                continue
            try:
                fm, body = read_md(p)
            except Exception:
                continue
            score = score_file(q_tokens, body, fm)
            if score > 0:
                scored.append((score, p))
    scored.sort(key=lambda x: -x[0])
    return [p for _, p in scored[:max_files]]


def crop_url_for(image_id: str) -> str:
    """Return URL for a crop image."""
    # image_id format: IMG-DOCSHORT-pNNN-NN, TBL-..., SIG-...
    # Convert to file path: processing/crops/<doc-id>/<image_id>.png
    # but doc-id is encoded compactly in the crop_id. We need to scan instead.
    matches = list(CROPS_BASE.rglob(f"{image_id}.png"))
    if matches:
        rel = matches[0].relative_to(UFO_ROOT / "processing" / "crops")
        return f"{CROP_URL_PREFIX}/{rel}"
    return ""


def page_url_for(page_id: str) -> str:
    """page_id format: <doc-id>/pNNN.  PNG file: processing/png/<doc-id>/p-NNN.png"""
    m = re.match(r"^(.+)/p(\d{3})$", page_id)
    if not m:
        return ""
    doc_id, num = m.group(1), m.group(2)
    return f"{PNG_URL_PREFIX}/{doc_id}/p-{num}.png"


def build_system_prompt() -> str:
    return """You are a research assistant for the war.gov/ufo UAP/UFO document corpus.

The user asks a question. You receive a set of markdown files from a curated wiki (Karpathy-style LLM wiki) plus case-investigation artifacts. Each file's frontmatter carries strict provenance: doc_id, page_id, bbox coordinates, classifications, etc. Body text is bilingual (EN + PT-BR).

Your output MUST be a single JSON object with this exact shape (no markdown fence, no commentary, no preamble):

{
  "answer_en": "2-5 sentence English answer grounded in the provided files. Every factual claim must be traceable to a citation below.",
  "answer_pt_br": "Same answer translated to Brazilian Portuguese (pt-br). Use Brazilian vocabulary and spelling.",
  "citations": [
    {
      "kind": "page",
      "page_id": "doc-id/pNNN",
      "snippet_en": "short verbatim or near-verbatim excerpt supporting the claim (English)",
      "snippet_pt_br": "same in Brazilian Portuguese",
      "bbox": null
    },
    {
      "kind": "entity",
      "entity_link": "[[loc/aegean-sea-off-santorini-greece]] or similar wiki-link",
      "snippet_en": "...",
      "snippet_pt_br": "..."
    }
  ]
}

Rules:
- ONLY cite files that you were given. Do not invent page_ids or entity links.
- snippet_en and snippet_pt_br must be SHORT (1-2 sentences each).
- Brazilian Portuguese only for *_pt_br fields. Preserve UTF-8 accents.
- Verbatim quotes FROM the source documents stay in their original language (English) inside snippets — only the surrounding narrative is translated.
- If no file supports an answer, return: {"answer_en":"Insufficient evidence in corpus.","answer_pt_br":"Evidências insuficientes no corpus.","citations":[]}
- Output ONLY the JSON. No fence."""


def call_claude(user_prompt: str, system_prompt: str) -> dict:
    cmd = [
        "claude",
        "-p",
        "--model", MODEL,
        "--output-format", "json",
        "--max-turns", str(MAX_TURNS),
        "--allowedTools", "Read",
        "--add-dir", str(UFO_ROOT),
        "--append-system-prompt", system_prompt,
        "--",
        user_prompt,
    ]
    res = subprocess.run(cmd, capture_output=True, text=True, timeout=600, check=False)
    if res.returncode != 0:
        raise RuntimeError(f"claude CLI failed (rc={res.returncode}): {res.stderr[-1000:]}")
    if not res.stdout.strip():
        raise RuntimeError(f"claude CLI returned empty stdout. stderr: {res.stderr[-1000:]}")
    try:
        cli = json.loads(res.stdout)
    except json.JSONDecodeError as e:
        raise RuntimeError(f"claude CLI returned invalid JSON: {e}. stdout[:500]={res.stdout[:500]!r}")
    if cli.get("is_error"):
        raise RuntimeError(f"claude reported error: {cli.get('result','')[:500]}")
    result = cli.get("result", "").strip()
    if result.startswith("```"):
        result = re.sub(r"^```(?:json)?\s*", "", result)
        result = re.sub(r"\s*```$", "", result)
    return {
        "parsed": json.loads(result),
        "meta": {
            "duration_ms": cli.get("duration_ms"),
            "total_cost_usd": cli.get("total_cost_usd"),
            "session_id": cli.get("session_id"),
        },
    }


def enrich_citations(parsed: dict) -> dict:
    """Add png_url and crop_url to each page citation when possible."""
    for cit in parsed.get("citations", []):
        if cit.get("kind") == "page":
            pid = cit.get("page_id", "")
            cit["png_url"] = page_url_for(pid)
        elif cit.get("kind") == "crop":
            crop_id = cit.get("crop_id", "")
            if crop_id:
                cit["crop_url"] = crop_url_for(crop_id)
    return parsed


def main():
    ap = argparse.ArgumentParser(description="Minimal chat-agent smoke test for the UFO wiki.")
    ap.add_argument("query", help="user question (in English or PT-BR)")
    ap.add_argument("--max-context", type=int, default=DEFAULT_MAX_CONTEXT_FILES,
                    help=f"max number of markdown files to surface as context (default {DEFAULT_MAX_CONTEXT_FILES})")
    args = ap.parse_args()

    print(f"Query: {args.query}\n", flush=True)
    print(f"Gathering context (max {args.max_context} files)...", flush=True)
    context_files = gather_context(args.query, args.max_context)
    for f in context_files:
        print(f"  - {f.relative_to(UFO_ROOT)}", flush=True)
    if not context_files:
        print("  (no relevant files found)", flush=True)
        result = {"answer_en": "No relevant files found in the wiki.", "answer_pt_br": "Nenhum arquivo relevante encontrado.", "citations": []}
        print("\n" + json.dumps(result, indent=2, ensure_ascii=False))
        return

    # Build user prompt: list of file paths for the agent to Read
    file_list = "\n".join(f"- {p.relative_to(UFO_ROOT)}" for p in context_files)
    user_prompt = (
        f"User question:\n{args.query}\n\n"
        f"Read the following files from /Users/guto/ufo/ "
        f"(use the Read tool on each one as needed):\n{file_list}\n\n"
        f"Then output the structured JSON answer per the system prompt."
    )

    print("\nCalling Haiku...", flush=True)
    try:
        out = call_claude(user_prompt, build_system_prompt())
    except Exception as e:
        sys.stderr.write(f"FATAL: {e}\n")
        sys.exit(1)

    parsed = enrich_citations(out["parsed"])
    print(f"\n=== Agent reply (cost ${out['meta'].get('total_cost_usd', 0):.4f}, "
          f"latency {out['meta'].get('duration_ms', 0)/1000:.1f}s) ===\n", flush=True)
    print(json.dumps(parsed, indent=2, ensure_ascii=False))


if __name__ == "__main__":
    main()