- TD#8 hybrid.ts: rerank_strategy {always|when_top_k_gt|never} + threshold
(default skips rerank for top_k ≤ 15; chat tool uses threshold 10)
- O11 vision.ts + tools.ts: analyze_image_region tool — sharp-crops the
bbox, claude CLI reads the temp PNG via Read tool, Sonnet vision answers
- TD#12 /graph: SigmaGraph replaces ForceGraphCanvas; react-force-graph-2d
uninstalled (-37 transitive deps); force-graph-canvas.tsx deleted
- TD#27 messages/route.ts gatherContext slice sizes via CTX_* env vars
- TD#22 tests/rag/: golden.yaml (15 queries) + run.py (Recall@k + MRR +
negative-pass rate) + baseline.json + CI job in .forgejo/workflows/ci.yml
- docs/adrs/: ADR-001..005 published from systems-atelier deliverables
Verified live on disclosure.top: top_k=5 path skips rerank (6.7s embed-only,
was 12-15s with rerank); rerank=always still available on demand.
First RAG baseline: Recall@5 = 0.2083, MRR = 0.25, Negative pass = 1.0.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
128 lines
No EOL
2.4 KiB
JSON
128 lines
No EOL
2.4 KiB
JSON
{
|
|
"k": 5,
|
|
"n_queries": 15,
|
|
"n_positive": 12,
|
|
"n_negative": 3,
|
|
"recall_at_k": 0.2083,
|
|
"mrr": 0.25,
|
|
"negative_pass_rate": 1.0,
|
|
"per_query": [
|
|
{
|
|
"id": "q01-arnold-mt-rainier",
|
|
"negative": false,
|
|
"recall_at_k": 0.5,
|
|
"mrr": 1.0,
|
|
"n_expected": 2,
|
|
"n_present": 1
|
|
},
|
|
{
|
|
"id": "q02-maury-island-hoax",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q03-rhodes-phoenix-photo",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q04-chiles-whitted",
|
|
"negative": false,
|
|
"recall_at_k": 1.0,
|
|
"mrr": 1.0,
|
|
"n_expected": 1,
|
|
"n_present": 1
|
|
},
|
|
{
|
|
"id": "q05-gorman-dogfight",
|
|
"negative": true,
|
|
"ok": true,
|
|
"n_hits": 0
|
|
},
|
|
{
|
|
"id": "q06-mantell-crash",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q07-sandia-1948-1950",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q08-pajarito-astronomers",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q09-james-tuck-correspondence",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q10-cometa-report",
|
|
"negative": false,
|
|
"recall_at_k": 1.0,
|
|
"mrr": 1.0,
|
|
"n_expected": 1,
|
|
"n_present": 1
|
|
},
|
|
{
|
|
"id": "q11-apollo-17-flash",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q12-usper-narrative",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
},
|
|
{
|
|
"id": "q13-uss-nimitz-tic-tac",
|
|
"negative": true,
|
|
"ok": true,
|
|
"n_hits": 0
|
|
},
|
|
{
|
|
"id": "q14-mj-12",
|
|
"negative": true,
|
|
"ok": true,
|
|
"n_hits": 0
|
|
},
|
|
{
|
|
"id": "q15-roswell",
|
|
"negative": false,
|
|
"recall_at_k": 0.0,
|
|
"mrr": 0.0,
|
|
"n_expected": 1,
|
|
"n_present": 0
|
|
}
|
|
],
|
|
"url": "https://disclosure.top",
|
|
"top_k": 5,
|
|
"rerank": "never"
|
|
} |