39 lines
1.3 KiB
JavaScript
39 lines
1.3 KiB
JavaScript
// utils/vectorSearch.js
|
|
import sqlite3 from "sqlite3";
|
|
import { open } from "sqlite";
|
|
|
|
/* ---------- small helper ---------- */
|
|
function cosineSim(a, b) {
|
|
let dot = 0, na = 0, nb = 0;
|
|
for (let i = 0; i < a.length; i++) {
|
|
dot += a[i] * b[i];
|
|
na += a[i] * a[i];
|
|
nb += b[i] * b[i];
|
|
}
|
|
return na && nb ? dot / (Math.sqrt(na) * Math.sqrt(nb)) : 0;
|
|
}
|
|
|
|
/**
|
|
* JS-only vector search against faq_embeddings
|
|
* @param {string} dbPath absolute path to user_profile.db
|
|
* @param {number[]} queryEmbedding the embedding array from OpenAI
|
|
* @param {number} topK how many rows to return
|
|
*/
|
|
export async function vectorSearch(dbPath, queryEmbedding, topK = 3) {
|
|
const db = await open({ filename: dbPath, driver: sqlite3.Database });
|
|
const rows = await db.all(`SELECT id, question, answer, embedding FROM faq_embeddings`);
|
|
await db.close();
|
|
|
|
const scored = rows.map(r => {
|
|
// SQLite returns Buffer → turn it back into Float32Array
|
|
const vec = new Float32Array(r.embedding.buffer,
|
|
r.embedding.byteOffset,
|
|
r.embedding.length / 4);
|
|
return { ...r, score: cosineSim(queryEmbedding, vec) };
|
|
})
|
|
.sort((a, b) => b.score - a.score) // highest similarity first
|
|
.slice(0, topK);
|
|
|
|
return scored;
|
|
}
|