- TD#8 hybrid.ts: rerank_strategy {always|when_top_k_gt|never} + threshold
(default skips rerank for top_k ≤ 15; chat tool uses threshold 10)
- O11 vision.ts + tools.ts: analyze_image_region tool — sharp-crops the
bbox, claude CLI reads the temp PNG via Read tool, Sonnet vision answers
- TD#12 /graph: SigmaGraph replaces ForceGraphCanvas; react-force-graph-2d
uninstalled (-37 transitive deps); force-graph-canvas.tsx deleted
- TD#27 messages/route.ts gatherContext slice sizes via CTX_* env vars
- TD#22 tests/rag/: golden.yaml (15 queries) + run.py (Recall@k + MRR +
negative-pass rate) + baseline.json + CI job in .forgejo/workflows/ci.yml
- docs/adrs/: ADR-001..005 published from systems-atelier deliverables
Verified live on disclosure.top: top_k=5 path skips rerank (6.7s embed-only,
was 12-15s with rerank); rerank=always still available on demand.
First RAG baseline: Recall@5 = 0.2083, MRR = 0.25, Negative pass = 1.0.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
200 lines
6.7 KiB
TypeScript
200 lines
6.7 KiB
TypeScript
/**
|
||
* Hybrid retrieval: BM25 (tsvector) + dense (pgvector) → RRF fusion → reranker.
|
||
*
|
||
* Stage 1 (in Postgres via public.hybrid_search_chunks RPC):
|
||
* - tsvector keyword recall on en_unaccent or pt_unaccent
|
||
* - dense cosine on BGE-M3 embedding (1024 dim)
|
||
* - RRF score combines both rankings
|
||
* - filters: doc_id, type, classification, ufo_only
|
||
*
|
||
* Stage 2 (in Node via embed-service /rerank):
|
||
* - Cross-encoder rerank of top-N candidates
|
||
*
|
||
* Returns chunks sorted by final reranked score, with all metadata for
|
||
* citation rendering (bbox, page, type, classification, image refs).
|
||
*/
|
||
import { embedQuery, rerank, toPgVectorLiteral } from "./embed";
|
||
import { pgQuery } from "./db";
|
||
|
||
export interface ChunkHit {
|
||
chunk_pk: number;
|
||
doc_id: string;
|
||
chunk_id: string;
|
||
page: number;
|
||
type: string;
|
||
bbox: { x: number; y: number; w: number; h: number } | null;
|
||
content_en: string | null;
|
||
content_pt: string | null;
|
||
classification: string | null;
|
||
score: number;
|
||
bm25_rank: number | null;
|
||
dense_rank: number | null;
|
||
rerank_score?: number;
|
||
}
|
||
|
||
export interface HybridSearchOptions {
|
||
query: string;
|
||
lang?: "pt" | "en";
|
||
doc_id?: string | null;
|
||
type?: string | null;
|
||
classification?: string | null;
|
||
ufo_only?: boolean;
|
||
/** Postgres recall window (default 100) — top-k from RRF before rerank. */
|
||
recall_k?: number;
|
||
/** Final list size after rerank (default 20). */
|
||
top_k?: number;
|
||
/** Skip reranker (faster, lower precision). Back-compat shortcut for
|
||
* `rerank_strategy: "never"`. */
|
||
no_rerank?: boolean;
|
||
/**
|
||
* W2-TD#8: rerank policy.
|
||
* - "always" — always run the cross-encoder (highest precision,
|
||
* slowest, 5–8s on CPU)
|
||
* - "when_top_k_gt" — rerank only when `top_k > rerank_threshold`
|
||
* (default threshold 15). RRF order from the RPC is
|
||
* usually good enough for the tight head of results;
|
||
* the reranker pays off when re-sorting a wider list.
|
||
* This is the new default — autocomplete / chat
|
||
* top-10 calls now skip rerank for free.
|
||
* - "never" — same as `no_rerank: true`.
|
||
*/
|
||
rerank_strategy?: "always" | "when_top_k_gt" | "never";
|
||
/** Threshold for `when_top_k_gt`. Default 15 (per ADR-001). */
|
||
rerank_threshold?: number;
|
||
}
|
||
|
||
export async function hybridSearch(opts: HybridSearchOptions): Promise<ChunkHit[]> {
|
||
const {
|
||
query,
|
||
lang = "pt",
|
||
doc_id = null,
|
||
type = null,
|
||
classification = null,
|
||
ufo_only = false,
|
||
recall_k = 100,
|
||
top_k = 20,
|
||
no_rerank = false,
|
||
rerank_strategy = "when_top_k_gt",
|
||
rerank_threshold = 15,
|
||
} = opts;
|
||
|
||
// Effective strategy: explicit `no_rerank=true` always wins (back-compat).
|
||
const strategy: "always" | "when_top_k_gt" | "never" =
|
||
no_rerank ? "never" : rerank_strategy;
|
||
|
||
if (!query.trim()) return [];
|
||
|
||
// 1. Embed the query
|
||
const q_embedding = await embedQuery(query);
|
||
|
||
// 2. Call hybrid_search_chunks RPC
|
||
const sql = `
|
||
SELECT *
|
||
FROM public.hybrid_search_chunks(
|
||
$1, $2::vector, $3, $4, $5, $6, $7, $8, 60
|
||
)
|
||
`;
|
||
const rows = await pgQuery<ChunkHit>(sql, [
|
||
query,
|
||
toPgVectorLiteral(q_embedding),
|
||
lang,
|
||
doc_id,
|
||
type,
|
||
classification,
|
||
ufo_only,
|
||
recall_k,
|
||
]);
|
||
|
||
// Relevance gating happens server-side in the RPC: BM25 only matches real
|
||
// terms, and the dense branch is bounded by max_dense_dist so absent-term
|
||
// queries return no nearest-vector noise. So `rows` is already clean.
|
||
if (rows.length === 0) return [];
|
||
|
||
// 3. Optional cross-encoder rerank for finer ordering. It's CPU-slow
|
||
// (seconds per ~dozen candidates). Strategy resolution (W2-TD#8 / ADR-001):
|
||
// - "never" → skip
|
||
// - "when_top_k_gt" → skip when top_k ≤ threshold (RRF is good enough
|
||
// for a small head)
|
||
// - "always" → run unconditionally
|
||
if (strategy === "never" ||
|
||
(strategy === "when_top_k_gt" && top_k <= rerank_threshold)) {
|
||
return rows.slice(0, top_k);
|
||
}
|
||
|
||
const candidateTexts = rows.map((r) =>
|
||
lang === "en" ? r.content_en || r.content_pt || "" : r.content_pt || r.content_en || "",
|
||
);
|
||
let scores: number[] = [];
|
||
try {
|
||
scores = await rerank(query, candidateTexts);
|
||
} catch {
|
||
return rows.slice(0, top_k);
|
||
}
|
||
return rows
|
||
.map((r, i) => ({ ...r, rerank_score: scores[i] ?? 0 }))
|
||
.sort((a, b) => (b.rerank_score ?? 0) - (a.rerank_score ?? 0))
|
||
.slice(0, top_k);
|
||
}
|
||
|
||
/** Quick chunk lookup by chunk_id (no embedding). For citation expansion. */
|
||
export async function getChunk(doc_id: string, chunk_id: string): Promise<ChunkHit | null> {
|
||
const rows = await pgQuery<ChunkHit>(
|
||
`SELECT chunk_pk, doc_id, chunk_id, page, type, bbox, content_en, content_pt,
|
||
classification, 0::DOUBLE PRECISION AS score,
|
||
NULL::INT AS bm25_rank, NULL::INT AS dense_rank
|
||
FROM public.chunks
|
||
WHERE doc_id = $1 AND chunk_id = $2`,
|
||
[doc_id, chunk_id],
|
||
);
|
||
return rows[0] ?? null;
|
||
}
|
||
|
||
/** List anomaly-flagged chunks. Useful for "show me all UFO sightings" without embedding. */
|
||
export async function listAnomalies(opts: {
|
||
kind: "ufo" | "cryptid";
|
||
doc_id?: string | null;
|
||
limit?: number;
|
||
}): Promise<Array<{
|
||
chunk_id: string;
|
||
doc_id: string;
|
||
page: number;
|
||
anomaly_type: string | null;
|
||
rationale: string | null;
|
||
content_en: string | null;
|
||
content_pt: string | null;
|
||
}>> {
|
||
const col = opts.kind === "ufo" ? "ufo_anomaly" : "cryptid_anomaly";
|
||
const typeCol = opts.kind === "ufo" ? "ufo_anomaly_type" : "cryptid_anomaly_type";
|
||
const ratCol = opts.kind === "ufo" ? "ufo_rationale" : "cryptid_rationale";
|
||
const limit = Math.min(opts.limit ?? 50, 200);
|
||
const params: unknown[] = [];
|
||
let where = `WHERE ${col} = TRUE`;
|
||
if (opts.doc_id) {
|
||
params.push(opts.doc_id);
|
||
where += ` AND doc_id = $${params.length}`;
|
||
}
|
||
params.push(limit);
|
||
const rows = await pgQuery<Record<string, unknown>>(
|
||
`SELECT chunk_id, doc_id, page, ${typeCol} AS anomaly_type, ${ratCol} AS rationale,
|
||
content_en, content_pt
|
||
FROM public.chunks
|
||
${where}
|
||
ORDER BY doc_id, order_global
|
||
LIMIT $${params.length}`,
|
||
params,
|
||
);
|
||
return rows as never;
|
||
}
|
||
|
||
/** Assemble a single page (chunks ordered) directly from DB. */
|
||
export async function getPageChunks(doc_id: string, page: number): Promise<ChunkHit[]> {
|
||
return pgQuery<ChunkHit>(
|
||
`SELECT chunk_pk, doc_id, chunk_id, page, type, bbox, content_en, content_pt,
|
||
classification, 0::DOUBLE PRECISION AS score,
|
||
NULL::INT AS bm25_rank, NULL::INT AS dense_rank
|
||
FROM public.chunks
|
||
WHERE doc_id = $1 AND page = $2
|
||
ORDER BY order_in_page`,
|
||
[doc_id, page],
|
||
);
|
||
}
|