From 87906a20c2edaa3c390c2e3fe347ddf9debf53fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Furmaniak?= Date: Fri, 5 Apr 2024 06:27:05 +0200 Subject: [PATCH] Search task first part. --- createEmbeddings.js | 17 +++++++++ embeddings/index.js | 86 +++++++++++++++++++++++++++++++++++++++++++++ io/index.js | 15 ++++++++ prompt.js | 85 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 createEmbeddings.js create mode 100644 embeddings/index.js create mode 100644 io/index.js diff --git a/createEmbeddings.js b/createEmbeddings.js new file mode 100644 index 0000000..3277a52 --- /dev/null +++ b/createEmbeddings.js @@ -0,0 +1,17 @@ +import { saveEmbeddingsToFile, generateEmbeddings } from './embeddings/index.js'; +import { fetchJSONData } from './io/index.js'; +const jsonDataUrl = 'https://unknow.news/archiwum_aidevs.json'; +try { + // Step 1: Fetch JSON data from the provided URL + const jsonData = await fetchJSONData(jsonDataUrl); + + // Step 2: Extract content from JSON data + const contentData = jsonData.map(entry => entry.info); + + // Step 3: Generate embeddings for the content using a pre-trained model + const embeddings = await generateEmbeddings(contentData); + const embeddingsFilePath = 'embeddings.json'; + saveEmbeddingsToFile(embeddings, embeddingsFilePath); +} catch (error) { + console.error('An error occurred:', error); +} \ No newline at end of file diff --git a/embeddings/index.js b/embeddings/index.js new file mode 100644 index 0000000..40c20fd --- /dev/null +++ b/embeddings/index.js @@ -0,0 +1,86 @@ +import fs from 'fs'; +import tf from '@tensorflow/tfjs-node'; +import * as use from '@tensorflow-models/universal-sentence-encoder'; + +export async function saveEmbeddingsToFile(embeddings, filePath) { + // Convert TensorFlow tensors to arrays + const embeddingsArrays = await Promise.all(embeddings.map(embedding => embedding.array())); + + // Serialize embeddings to JSON + const serializedEmbeddings = JSON.stringify(embeddingsArrays); + + // Write serialized embeddings to file + fs.writeFileSync(filePath, serializedEmbeddings); +} +export async function loadEmbeddingsFromFile(filePath) { + // Read serialized embeddings from file + const serializedEmbeddings = fs.readFileSync(filePath, 'utf8'); + + // Parse serialized embeddings from JSON + const embeddingsArrays = JSON.parse(serializedEmbeddings); + + // Convert arrays to TensorFlow tensors + const embeddings = embeddingsArrays.map(array => tf.tensor(array)); + + return embeddings; +} +export async function generateEmbeddings(contentData) { + // Example: Dummy function to generate embeddings + const embeddings = []; + for (const content of contentData) { + const embedding = await embedTextData(content); + embeddings.push(embedding); + } + return embeddings; +} +export async function embedTextData(text) { + const model = await use.load(); + // Assume text is a string + const embeddings = await model.embed(text); + return embeddings; +} +export function findSimilar(embeddingsTensor, queryEmbedding, contentData, k) { + const cosineSimilarities = []; + // Compute cosine similarity between query embedding and each content embedding + for (let i = 0; i < contentData.length; i++) { + const contentEmbedding = embeddingsTensor.gather([i]); // Gather the i-th embedding + + // Ensure query embedding has at least 2 dimensions + const queryExpanded = tf.expandDims(queryEmbedding, 0); + + // Ensure content embedding has at least 2 dimensions + const contentExpanded = tf.expandDims(contentEmbedding, 0); + + // Log shapes for debugging + console.log('Query embedding shape:', queryExpanded.shape); + console.log('Content embedding shape:', contentExpanded.shape); + + // Calculate cosine similarity + const similarity = tf.tidy(() => { + const dotProduct = tf.matMul(queryExpanded, contentExpanded, true, false); + console.log('Dot product:', dotProduct.dataSync()); + + const queryMagnitude = tf.norm(queryExpanded); + console.log('Query magnitude:', queryMagnitude.dataSync()); + + const contentMagnitude = tf.norm(contentExpanded); + console.log('Content magnitude:', contentMagnitude.dataSync()); + + return dotProduct.div(queryMagnitude.mul(contentMagnitude)).dataSync()[0]; + }); + + // Store the similarity score along with the index + cosineSimilarities.push({ index: i, similarity }); + + // Log computed similarity for debugging + console.log(`Computed similarity for index ${i}: ${similarity}`); + } + + // Sort similarities in descending order + cosineSimilarities.sort((a, b) => b.similarity - a.similarity); + + // Return top k most similar indices + const topIndices = cosineSimilarities.slice(0, k).map(item => item.index); + console.log('Top indices:', topIndices); + return topIndices; +} \ No newline at end of file diff --git a/io/index.js b/io/index.js new file mode 100644 index 0000000..c01d716 --- /dev/null +++ b/io/index.js @@ -0,0 +1,15 @@ +import axios from 'axios' + +export async function fetchJSONData(url) { + try { + const response = await axios.get(url); + return response.data; + } catch (error) { + console.error('Error fetching JSON data:', error); + throw error; + } +} + +export default { + fetchJSONData +} \ No newline at end of file diff --git a/prompt.js b/prompt.js index 24a8d16..2aafe8e 100644 --- a/prompt.js +++ b/prompt.js @@ -3,10 +3,14 @@ import fetch from 'node-fetch'; import { config } from 'dotenv'; import { chatCompletion, embedding, transcript } from './openAPI/index.js'; import { makeRequestWithDelay } from './utils/makeRequest.js'; +import { loadEmbeddingsFromFile, embedTextData, findSimilar } from './embeddings/index.js' +import { fetchJSONData } from './io/index.js'; import FormData from 'form-data'; import axios from 'axios' import fs from 'fs'; import path from 'path'; +import tf from '@tensorflow/tfjs-node'; +import * as use from '@tensorflow-models/universal-sentence-encoder'; config(); @@ -24,6 +28,8 @@ let functionAnswer = []; let RODOAnswer = []; let scraperAnswer = []; let whoamiAnswer = []; +let embeddingsTensor = {}; +let searchAnswer = "" fetch('https://tasks.aidevs.pl/token/helloapi', { method: 'POST', @@ -485,4 +491,83 @@ fetch('https://tasks.aidevs.pl/token/whoami', { console.log('Answer from API', response4); }); }) + .catch(error => console.error('Error:', error)); +fetch('https://tasks.aidevs.pl/token/search', { + + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ apikey: APIKey }) +}) + .then(async (response) => { + const data = await response.json(); + const token = data.token; + const taskUrl = `https://tasks.aidevs.pl/task/${token}`; + const response2 = await makeRequestWithDelay(taskUrl, { + method: 'GET', + headers: { + 'Content-Type': 'application/json' + } + }, 10); + console.log(response2) + const query = response2.question; // Example user query + const queryEmbedding = await embedTextData(query); + const embeddingsFilePath = 'embeddings.json'; + embeddingsTensor = await loadEmbeddingsFromFile(embeddingsFilePath); + const jsonDataUrl = 'https://unknow.news/archiwum_aidevs.json'; + const jsonData = await fetchJSONData(jsonDataUrl); + + // Step 2: Extract content from JSON data + const contentData = jsonData.map(entry => entry.info); + + // Step 6: Perform similarity search or other operations based on the query embedding + const similarURLs = findSimilar(embeddingsTensor, queryEmbedding, contentData, 5); // Find top 5 similar URLs + console.log('Top 5 similar URLs:', similarURLs); + searchAnswer = jsonData[0].url + console.log(searchAnswer) + const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({answer: searchAnswer}) + }, 10); + console.log('Answer from API', response4); + + // const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, { + // method: 'POST', + // headers: { + // 'Content-Type': 'application/json' + // }, + // body: JSON.stringify({answer: whoamiAnswer}) + // }, 10); + // console.log('Answer from API', response4); + + // Read JSON data from files + + // await chatCompletion({ + // messages: [ + // { + // role: 'system', + // content: response2.msg + // }, + // { + // role: 'user', + // content: response2.hint + // }], + // model: 'gpt-4-turbo-preview', + // }).then(async (response) => { + // whoamiAnswer = response.choices[0].message.content; + // console.log(whoamiAnswer) + // const response4 = await makeRequestWithDelay(`https://tasks.aidevs.pl/answer/${token}`, { + // method: 'POST', + // headers: { + // 'Content-Type': 'application/json' + // }, + // body: JSON.stringify({answer: whoamiAnswer}) + // }, 10); + // console.log('Answer from API', response4); + // }); + }) .catch(error => console.error('Error:', error)); \ No newline at end of file