42 lines
1.5 KiB
TypeScript
42 lines
1.5 KiB
TypeScript
/**
|
|
* Client for the embed-service (BGE-M3 + BGE-Reranker-v2-M3).
|
|
*
|
|
* Self-hosted at EMBED_SERVICE_URL. CPU-only on the VPS.
|
|
*/
|
|
|
|
const EMBED_URL = process.env.EMBED_SERVICE_URL || "http://localhost:8000";
|
|
|
|
export async function embedQuery(text: string): Promise<number[]> {
|
|
const res = await fetch(`${EMBED_URL}/embed`, {
|
|
method: "POST",
|
|
headers: { "content-type": "application/json" },
|
|
body: JSON.stringify({ texts: [text], normalize: true }),
|
|
// The first embed call after a cold start can take several seconds while
|
|
// BGE-M3 loads into RAM. After that it stabilizes around 150-300ms.
|
|
signal: AbortSignal.timeout(60_000),
|
|
});
|
|
if (!res.ok) throw new Error(`embed-service /embed ${res.status}`);
|
|
const data = (await res.json()) as { embeddings: number[][] };
|
|
return data.embeddings[0];
|
|
}
|
|
|
|
export async function rerank(
|
|
query: string,
|
|
docs: string[],
|
|
): Promise<number[]> {
|
|
if (docs.length === 0) return [];
|
|
const res = await fetch(`${EMBED_URL}/rerank`, {
|
|
method: "POST",
|
|
headers: { "content-type": "application/json" },
|
|
body: JSON.stringify({ query, docs, normalize: true }),
|
|
signal: AbortSignal.timeout(60_000),
|
|
});
|
|
if (!res.ok) throw new Error(`embed-service /rerank ${res.status}`);
|
|
const data = (await res.json()) as { scores: number[] };
|
|
return data.scores;
|
|
}
|
|
|
|
/** pgvector accepts the textual `[1.0,2.0,...]` form. */
|
|
export function toPgVectorLiteral(vec: number[]): string {
|
|
return "[" + vec.map((v) => v.toFixed(6)).join(",") + "]";
|
|
}
|