-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvectorSearchOpenAI.js
76 lines (64 loc) · 1.83 KB
/
vectorSearchOpenAI.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import { MongoClient } from "mongodb";
import dotenv from "dotenv";
import createEmbeddings from "./EmbeddingsOpenAI.js";
dotenv.config();
// connect to your Atlas cluster
const uri = process.env.MONGO_URI;
const client = new MongoClient(uri);
async function vectorSearchOpenAI(question) {
try {
await client.connect();
const database = client.db("cuny_guide");
const col = database.collection("ccny_embeddings");
// embed the question
const data = await createEmbeddings(question);
const embedding = data.data[0].embedding;
// console.log(embedding);
// define pipeline
const pipeline = [
{
$vectorSearch: {
index: "ccny_vector_index",
filter: {
school: {
$eq: "CCNY",
},
},
path: "plot_embedding",
queryVector: embedding,
limit: 5, // return top k results
// increase if we have more than 100 docs
numCandidates: 100,
},
},
{
$project: {
// change this to project the fields you want to return
_id: 1,
question: 1,
answer: 1,
context: 1,
score: {
$meta: "vectorSearchScore",
},
},
},
];
// run pipeline
const results = await col.aggregate(pipeline).toArray();
const documents = [];
// why do we need this step to return the results? When I remove this step, its a bunch of meta data. - Daniel
await results.forEach((doc) => {
const docString = JSON.stringify(doc);
// console.dir(docString);
documents.push(doc);
});
return documents;
} catch (error) {
console.error("Error fetching filtered schools:", error);
throw error;
} finally {
await client.close();
}
}
export default vectorSearchOpenAI;