/** * Client for the embed-service (BGE-M3 + BGE-Reranker-v2-M3). * * Self-hosted at EMBED_SERVICE_URL. CPU-only on the VPS. */ const EMBED_URL = process.env.EMBED_SERVICE_URL || "http://localhost:8000"; export async function embedQuery(text: string): Promise { const res = await fetch(`${EMBED_URL}/embed`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ texts: [text], normalize: true }), // The first embed call after a cold start can take several seconds while // BGE-M3 loads into RAM. After that it stabilizes around 150-300ms. signal: AbortSignal.timeout(60_000), }); if (!res.ok) throw new Error(`embed-service /embed ${res.status}`); const data = (await res.json()) as { embeddings: number[][] }; return data.embeddings[0]; } export async function rerank( query: string, docs: string[], ): Promise { if (docs.length === 0) return []; const res = await fetch(`${EMBED_URL}/rerank`, { method: "POST", headers: { "content-type": "application/json" }, body: JSON.stringify({ query, docs, normalize: true }), signal: AbortSignal.timeout(60_000), }); if (!res.ok) throw new Error(`embed-service /rerank ${res.status}`); const data = (await res.json()) as { scores: number[] }; return data.scores; } /** pgvector accepts the textual `[1.0,2.0,...]` form. */ export function toPgVectorLiteral(vec: number[]): string { return "[" + vec.map((v) => v.toFixed(6)).join(",") + "]"; }