- TD#8 hybrid.ts: rerank_strategy {always|when_top_k_gt|never} + threshold
(default skips rerank for top_k ≤ 15; chat tool uses threshold 10)
- O11 vision.ts + tools.ts: analyze_image_region tool — sharp-crops the
bbox, claude CLI reads the temp PNG via Read tool, Sonnet vision answers
- TD#12 /graph: SigmaGraph replaces ForceGraphCanvas; react-force-graph-2d
uninstalled (-37 transitive deps); force-graph-canvas.tsx deleted
- TD#27 messages/route.ts gatherContext slice sizes via CTX_* env vars
- TD#22 tests/rag/: golden.yaml (15 queries) + run.py (Recall@k + MRR +
negative-pass rate) + baseline.json + CI job in .forgejo/workflows/ci.yml
- docs/adrs/: ADR-001..005 published from systems-atelier deliverables
Verified live on disclosure.top: top_k=5 path skips rerank (6.7s embed-only,
was 12-15s with rerank); rerank=always still available on demand.
First RAG baseline: Recall@5 = 0.2083, MRR = 0.25, Negative pass = 1.0.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
83 lines
2.2 KiB
YAML
83 lines
2.2 KiB
YAML
name: CI
|
|
|
|
on:
|
|
push:
|
|
branches: [main]
|
|
pull_request:
|
|
|
|
jobs:
|
|
web:
|
|
name: Web — typecheck + lint + build
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: node:20-bookworm
|
|
defaults:
|
|
run:
|
|
working-directory: web
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Install (legacy-peer-deps — @react-sigma/core requires it)
|
|
run: npm ci --legacy-peer-deps || npm install --legacy-peer-deps
|
|
|
|
- name: Type-check
|
|
run: npx tsc --noEmit
|
|
|
|
- name: Lint
|
|
run: npm run lint --if-present || echo "no lint script"
|
|
|
|
- name: Production build
|
|
run: npm run build
|
|
env:
|
|
NEXT_PUBLIC_SUPABASE_URL: https://api.disclosure.top
|
|
NEXT_PUBLIC_SUPABASE_ANON_KEY: placeholder
|
|
NEXT_PUBLIC_SITE_URL: https://disclosure.top
|
|
|
|
python:
|
|
name: Scripts — Python smoke
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: python:3.11-bookworm
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Python tooling
|
|
run: pip install --quiet pyyaml psycopg[binary] requests
|
|
|
|
- name: Compile scripts (syntax check)
|
|
run: python -m compileall -q scripts/ || true
|
|
|
|
- name: Validate canonical YAML configs
|
|
run: |
|
|
for f in CLAUDE.md CLAUDE-schema-full.md; do
|
|
[ -f "$f" ] && echo " ✓ $f present"
|
|
done
|
|
python -c "import yaml; yaml.safe_load(open('infra/disclosure-stack/docker-compose.yml'))"
|
|
echo " ✓ docker-compose.yml is valid YAML"
|
|
|
|
audit:
|
|
name: Web — npm audit
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: node:20-bookworm
|
|
defaults:
|
|
run:
|
|
working-directory: web
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- run: npm audit --production --omit=dev --audit-level=high || echo "audit findings — see job output"
|
|
|
|
rag-eval:
|
|
name: Retrieval — golden set (Recall@5 + MRR)
|
|
runs-on: ubuntu-latest
|
|
container:
|
|
image: python:3.11-bookworm
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- run: pip install --quiet pyyaml
|
|
- name: Run RAG eval against production
|
|
run: python3 tests/rag/run.py --url https://disclosure.top --top-k 5 --rerank never
|
|
env:
|
|
MAX_RECALL_DROP: "0.05"
|