From 87906a20c2edaa3c390c2e3fe347ddf9debf53fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Furmaniak?=
 <michal.furmaniak@morizon-gratka.pl>
Date: Fri, 5 Apr 2024 06:27:05 +0200
Subject: [PATCH] Search task first part.

---
 createEmbeddings.js | 17 +++++++++
 embeddings/index.js | 86 +++++++++++++++++++++++++++++++++++++++++++++
 io/index.js         | 15 ++++++++
 prompt.js           | 85 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 203 insertions(+)
 create mode 100644 createEmbeddings.js
 create mode 100644 embeddings/index.js
 create mode 100644 io/index.js

diff --git a/createEmbeddings.js b/createEmbeddings.js
new file mode 100644
index 0000000..3277a52
--- /dev/null
+++ b/createEmbeddings.js
@@ -0,0 +1,17 @@
+import { saveEmbeddingsToFile, generateEmbeddings } from './embeddings/index.js';
+import { fetchJSONData } from './io/index.js';
+const jsonDataUrl = 'https://unknow.news/archiwum_aidevs.json';
+try {
+	// Step 1: Fetch JSON data from the provided URL
+	const jsonData = await fetchJSONData(jsonDataUrl);
+
+	// Step 2: Extract content from JSON data
+	const contentData = jsonData.map(entry => entry.info);
+
+	// Step 3: Generate embeddings for the content using a pre-trained model
+	const embeddings = await generateEmbeddings(contentData);
+	const embeddingsFilePath = 'embeddings.json';
+	saveEmbeddingsToFile(embeddings, embeddingsFilePath);
+} catch (error) {
+	console.error('An error occurred:', error);
+}
\ No newline at end of file
diff --git a/embeddings/index.js b/embeddings/index.js
new file mode 100644
index 0000000..40c20fd
--- /dev/null
+++ b/embeddings/index.js
@@ -0,0 +1,86 @@
+import fs from 'fs';
+import tf from '@tensorflow/tfjs-node';
+import * as use from '@tensorflow-models/universal-sentence-encoder';
+
+export async function saveEmbeddingsToFile(embeddings, filePath) {
+    // Convert TensorFlow tensors to arrays
+    const embeddingsArrays = await Promise.all(embeddings.map(embedding => embedding.array()));
+
+    // Serialize embeddings to JSON
+    const serializedEmbeddings = JSON.stringify(embeddingsArrays);
+
+    // Write serialized embeddings to file
+    fs.writeFileSync(filePath, serializedEmbeddings);
+}
+export async function loadEmbeddingsFromFile(filePath) {
+    // Read serialized embeddings from file
+    const serializedEmbeddings = fs.readFileSync(filePath, 'utf8');
+
+    // Parse serialized embeddings from JSON
+    const embeddingsArrays = JSON.parse(serializedEmbeddings);
+
+    // Convert arrays to TensorFlow tensors
+    const embeddings = embeddingsArrays.map(array => tf.tensor(array));
+
+    return embeddings;
+}
+export async function generateEmbeddings(contentData) {
+    // Example: Dummy function to generate embeddings
+    const embeddings = [];
+    for (const content of contentData) {
+        const embedding = await embedTextData(content);
+        embeddings.push(embedding);
+    }
+    return embeddings;
+}
+export async function embedTextData(text) {
+	const model = await use.load();
+    // Assume text is a string
+	const embeddings = await model.embed(text);
+    return embeddings;
+}
+export function findSimilar(embeddingsTensor, queryEmbedding, contentData, k) {
+    const cosineSimilarities = [];
+    // Compute cosine similarity between query embedding and each content embedding
+    for (let i = 0; i < contentData.length; i++) {
+        const contentEmbedding = embeddingsTensor.gather([i]); // Gather the i-th embedding
+
+        // Ensure query embedding has at least 2 dimensions
+        const queryExpanded = tf.expandDims(queryEmbedding, 0);
+
+        // Ensure content embedding has at least 2 dimensions
+        const contentExpanded = tf.expandDims(contentEmbedding, 0);
+
+        // Log shapes for debugging
+        console.log('Query embedding shape:', queryExpanded.shape);
+        console.log('Content embedding shape:', contentExpanded.shape);
+
+        // Calculate cosine similarity
+        const similarity = tf.tidy(() => {
+            const dotProduct = tf.matMul(queryExpanded, contentExpanded, true, false);
+            console.log('Dot product:', dotProduct.dataSync());
+            
+            const queryMagnitude = tf.norm(queryExpanded);
+            console.log('Query magnitude:', queryMagnitude.dataSync());
+
+            const contentMagnitude = tf.norm(contentExpanded);
+            console.log('Content magnitude:', contentMagnitude.dataSync());
+
+            return dotProduct.div(queryMagnitude.mul(contentMagnitude)).dataSync()[0];
+        });
+
+        // Store the similarity score along with the index
+        cosineSimilarities.push({ index: i, similarity });
+
+        // Log computed similarity for debugging
+        console.log(`Computed similarity for index ${i}: ${similarity}`);
+    }
+
+    // Sort similarities in descending order
+    cosineSimilarities.sort((a, b) => b.similarity - a.similarity);
+
+    // Return top k most similar indices
+    const topIndices = cosineSimilarities.slice(0, k).map(item => item.index);
+    console.log('Top indices:', topIndices);
+    return topIndices;
+}
\ No newline at end of file
diff --git a/io/index.js b/io/index.js
new file mode 100644
index 0000000..c01d716
--- /dev/null
+++ b/io/index.js
@@ -0,0 +1,15 @@
+import axios from 'axios'
+
+export async function fetchJSONData(url) {
+    try {
+        const response = await axios.get(url);
+        return response.data;
+    } catch (error) {
+        console.error('Error fetching JSON data:', error);
+        throw error;
+    }
+}
+
+export default {
+	fetchJSONData
+}
\ No newline at end of file
diff --git a/prompt.js b/prompt.js
index 24a8d16..2aafe8e 100644
--- a/prompt.js
+++ b/prompt.js
@@ -3,10 +3,14 @@ import fetch from 'node-fetch';
 import { config } from 'dotenv';
 import { chatCompletion, embedding, transcript } from './openAPI/index.js';
 import { makeRequestWithDelay } from './utils/makeRequest.js';
+import { loadEmbeddingsFromFile, embedTextData, findSimilar } from './embeddings/index.js'
+import { fetchJSONData } from './io/index.js';
 import FormData from 'form-data';
 import axios from 'axios'
 import fs from 'fs';
 import path from 'path';
+import tf from '@tensorflow/tfjs-node';
+import * as use from '@tensorflow-models/universal-sentence-encoder';
 
 
 config();
@@ -24,6 +28,8 @@ let functionAnswer = [];
 let RODOAnswer = [];
 let scraperAnswer = [];
 let whoamiAnswer = [];
+let embeddingsTensor = {};
+let searchAnswer = ""
 
 fetch('https://tasks.aidevs.pl/token/helloapi', {
     method: 'POST',
@@ -485,4 +491,83 @@ fetch('https://tasks.aidevs.pl/token/whoami', {
             console.log('Answer from API', response4);
         });
     })
+    .catch(error => console.error('Error:', error));
+fetch('https://tasks.aidevs.pl/token/search', {
+    
+    method: 'POST',
+    headers: {
+        'Content-Type': 'application/json'
+    },
+    body: JSON.stringify({ apikey: APIKey })
+})
+    .then(async (response) => {
+        const data = await response.json();
+        const token = data.token;
+        const taskUrl = `https://tasks.aidevs.pl/task/${token}`;
+        const response2 = await makeRequestWithDelay(taskUrl, {
+            method: 'GET',
+            headers: {
+                'Content-Type': 'application/json'
+            }
+        }, 10);
+        console.log(response2)
+		const query = response2.question; // Example user query
+		const queryEmbedding = await embedTextData(query);
+		const embeddingsFilePath = 'embeddings.json';
+		embeddingsTensor = await loadEmbeddingsFromFile(embeddingsFilePath);
+        const jsonDataUrl = 'https://unknow.news/archiwum_aidevs.json';
+        const jsonData = await fetchJSONData(jsonDataUrl);
+
+        // Step 2: Extract content from JSON data
+        const contentData = jsonData.map(entry => entry.info);
+
+		// Step 6: Perform similarity search or other operations based on the query embedding
+		const similarURLs = findSimilar(embeddingsTensor, queryEmbedding, contentData, 5); // Find top 5 similar URLs
+		console.log('Top 5 similar URLs:', similarURLs);
+		searchAnswer = jsonData[0].url
+        console.log(searchAnswer)
+        const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({answer: searchAnswer})
+            }, 10);
+            console.log('Answer from API', response4);
+        
+        //     const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, {
+        //         method: 'POST',
+        //         headers: {
+        //             'Content-Type': 'application/json'
+        //         },
+        //         body: JSON.stringify({answer: whoamiAnswer})
+        //     }, 10);
+        //     console.log('Answer from API', response4);
+        
+		// Read JSON data from files
+		
+        // await chatCompletion({
+        //     messages: [
+        //         { 
+        //             role: 'system', 
+        //             content: response2.msg
+        //         },
+        //         { 
+        //             role: 'user', 
+        //             content: response2.hint
+        //         }],
+        //     model: 'gpt-4-turbo-preview',
+        // }).then(async (response) => {
+        //     whoamiAnswer = response.choices[0].message.content;
+        //     console.log(whoamiAnswer)
+        //     const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, {
+        //         method: 'POST',
+        //         headers: {
+        //             'Content-Type': 'application/json'
+        //         },
+        //         body: JSON.stringify({answer: whoamiAnswer})
+        //     }, 10);
+        //     console.log('Answer from API', response4);
+        // });
+    })
     .catch(error => console.error('Error:', error));
\ No newline at end of file