disclosure-bureau/web/lib/retrieval/embed.ts

43 lines
1.5 KiB
TypeScript
Raw Normal View History

/**
* Client for the embed-service (BGE-M3 + BGE-Reranker-v2-M3).
*
* Self-hosted at EMBED_SERVICE_URL. CPU-only on the VPS.
*/
const EMBED_URL = process.env.EMBED_SERVICE_URL || "http://localhost:8000";
export async function embedQuery(text: string): Promise<number[]> {
const res = await fetch(`${EMBED_URL}/embed`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ texts: [text], normalize: true }),
// The first embed call after a cold start can take several seconds while
// BGE-M3 loads into RAM. After that it stabilizes around 150-300ms.
signal: AbortSignal.timeout(60_000),
});
if (!res.ok) throw new Error(`embed-service /embed ${res.status}`);
const data = (await res.json()) as { embeddings: number[][] };
return data.embeddings[0];
}
export async function rerank(
query: string,
docs: string[],
): Promise<number[]> {
if (docs.length === 0) return [];
const res = await fetch(`${EMBED_URL}/rerank`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({ query, docs, normalize: true }),
signal: AbortSignal.timeout(60_000),
});
if (!res.ok) throw new Error(`embed-service /rerank ${res.status}`);
const data = (await res.json()) as { scores: number[] };
return data.scores;
}
/** pgvector accepts the textual `[1.0,2.0,...]` form. */
export function toPgVectorLiteral(vec: number[]): string {
return "[" + vec.map((v) => v.toFixed(6)).join(",") + "]";
}