diff --git a/ai-assistant/src/commands/AskCodeCommand.ts b/ai-assistant/src/commands/AskCodeCommand.ts index d37e151..3815390 100644 --- a/ai-assistant/src/commands/AskCodeCommand.ts +++ b/ai-assistant/src/commands/AskCodeCommand.ts @@ -21,7 +21,7 @@ export class AskCodeCommand implements ISlashCommand { * @param {string} query - The user's query. * @returns {Promise} A promise that resolves to the response to be given to the user or `null` if no answer or no reference is found. */ - private async process(http: IHttp, query: string): Promise { + private async process(http: IHttp, query: string): Promise { const db = new Neo4j(http) const llm = new Llama3_70B(http) const embeddingModel = new MiniLML6(http) @@ -33,7 +33,7 @@ export class AskCodeCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const keywords = await Query.getDBKeywordsFromQuery(llm, query) - if (!keywords.length) return null + if (!keywords.length) return "I'm sorry, I couldn't understand your query. Please try again." /** * --------------------------------------------------------------------------------------------- @@ -42,7 +42,7 @@ export class AskCodeCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const results = await Query.getCodeNodesFromKeywords(db, embeddingModel, keywords) - if (!results.length) return null + if (!results.length) return "I'm sorry, I couldn't find any code related to your query." /** * --------------------------------------------------------------------------------------------- @@ -53,7 +53,7 @@ export class AskCodeCommand implements ISlashCommand { const answer = await llm.ask( PromptFactory.makeAskCodePrompt(results.map((x) => x.code).join("\n\n"), query) ) - if (!answer) return null + if (!answer) return "I'm sorry, I'm having trouble connecting to the server. Please try again later." return answer } @@ -85,10 +85,6 @@ export class AskCodeCommand implements ISlashCommand { ) const res = await this.process(http, query) - if (res) { - await sendEditedMessage(res) - } else { - await sendEditedMessage("❌ Unable to process your query") - } + await sendEditedMessage(res) } } diff --git a/ai-assistant/src/commands/AskDocsCommand.ts b/ai-assistant/src/commands/AskDocsCommand.ts index c1d9ce3..e738e25 100644 --- a/ai-assistant/src/commands/AskDocsCommand.ts +++ b/ai-assistant/src/commands/AskDocsCommand.ts @@ -21,7 +21,7 @@ export class AskDocsCommand implements ISlashCommand { * @param {string} query - The user's query. * @returns {Promise} A promise that resolves to the response to be given to the user or `null` if no answer or no reference is found. */ - private async process(http: IHttp, query: string): Promise { + private async process(http: IHttp, query: string): Promise { const db = new Neo4j(http) const llm = new Llama3_70B(http) const embeddingModel = new MiniLML6(http) @@ -33,7 +33,7 @@ export class AskDocsCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const results = await Query.getDocsNodesFromQuery(db, embeddingModel, query) - if (!results.length) return null + if (!results.length) return "I'm sorry, I couldn't find any documentation related to your query." /** * --------------------------------------------------------------------------------------------- @@ -45,7 +45,7 @@ export class AskDocsCommand implements ISlashCommand { const answer = await llm.ask( PromptFactory.makeAskDocsPrompt(results.map((x) => x.content).join("\n\n"), uniqueSources, query) ) - if (!answer) return null + if (!answer) return "I'm sorry, I'm having trouble connecting to the server. Please try again later." return answer } @@ -68,10 +68,6 @@ export class AskDocsCommand implements ISlashCommand { ) const res = await this.process(http, query) - if (res) { - await sendEditedMessage(res) - } else { - await sendEditedMessage("❌ Unable to process your query") - } + await sendEditedMessage(res) } } diff --git a/ai-assistant/src/commands/DiagramCommand.ts b/ai-assistant/src/commands/DiagramCommand.ts index c8c2fd2..7be3e1a 100644 --- a/ai-assistant/src/commands/DiagramCommand.ts +++ b/ai-assistant/src/commands/DiagramCommand.ts @@ -47,9 +47,9 @@ export class DiagramCommand implements ISlashCommand { const diagram = await llm.ask( PromptFactory.makeDiagramPrompt(results.map((x) => x.code).join("\n\n"), query) ) + console.log(diagram) if (!diagram) return null - // @ts-ignore const diagramContent = diagram .replace("```mermaid", "") .replace("```", "") diff --git a/ai-assistant/src/commands/DocumentCommand.ts b/ai-assistant/src/commands/DocumentCommand.ts index a0c3415..31b7262 100644 --- a/ai-assistant/src/commands/DocumentCommand.ts +++ b/ai-assistant/src/commands/DocumentCommand.ts @@ -23,7 +23,7 @@ export class DocumentCommand implements ISlashCommand { private async process( http: IHttp, query: string - ): Promise<{ jsDoc: string; explanation: string | null } | null> { + ): Promise<{ jsDoc: string; explanation: string | null } | string> { const db = new Neo4j(http) const llm = new Llama3_70B(http) const embeddingModel = new MiniLML6(http) @@ -35,7 +35,7 @@ export class DocumentCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const keywords = await Query.getDBKeywordsFromQuery(llm, query) - if (!keywords.length) return null + if (!keywords.length) return "I'm sorry, I couldn't understand your query. Please try again." /** * --------------------------------------------------------------------------------------------- @@ -44,7 +44,7 @@ export class DocumentCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const codeNodes = await Query.getCodeNodesFromKeywords(db, embeddingModel, keywords) - if (!codeNodes.length) return null + if (!codeNodes.length) return "I'm sorry, I couldn't find any code related to your query." /** * --------------------------------------------------------------------------------------------- @@ -53,7 +53,7 @@ export class DocumentCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const result = await llm.ask(PromptFactory.makeDocumentPrompt(JSON.stringify(codeNodes), query)) - if (!result) return null + if (!result) return "I'm sorry, I couldn't generate documentation for your query." //@ts-ignore const jsDoc = result.split("")[1].split("")[0].trim() @@ -93,10 +93,10 @@ export class DocumentCommand implements ISlashCommand { ) let res = await this.process(http, query) - if (res) { - await sendEditedMessage(`${res.jsDoc}\n\n${res.explanation}`) + if (typeof res === "string") { + await sendEditedMessage(res) } else { - await sendEditedMessage("❌ No references found!") + await sendEditedMessage(`${res.jsDoc}\n\n${res.explanation}`) } } } diff --git a/ai-assistant/src/commands/TranslateCommand.ts b/ai-assistant/src/commands/TranslateCommand.ts index d6d92f1..c6600d7 100644 --- a/ai-assistant/src/commands/TranslateCommand.ts +++ b/ai-assistant/src/commands/TranslateCommand.ts @@ -21,7 +21,7 @@ export class TranslateCommand implements ISlashCommand { * @param {string} targetEntity - The target entity for translation. * @returns {Promise} A promise that resolves to the translated result or null if no translation is found. */ - private async process(http: IHttp, targetLanguage: string, targetEntity: string): Promise { + private async process(http: IHttp, targetLanguage: string, targetEntity: string): Promise { const db = new Neo4j(http) const llm = new Llama3_70B(http) const embeddingModel = new MiniLML6(http) @@ -33,7 +33,7 @@ export class TranslateCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const codeNodes = await Query.getCodeNodesFromKeywords(db, embeddingModel, [targetEntity]) - if (!codeNodes.length) return null + if (!codeNodes.length) return "I'm sorry, I couldn't find any code related to your query." /** * --------------------------------------------------------------------------------------------- @@ -48,7 +48,7 @@ export class TranslateCommand implements ISlashCommand { targetLanguage ) ) - if (!res) return null + if (!res) return "I'm sorry, I'm having trouble connecting to the server. Please try again later." return res } @@ -85,10 +85,6 @@ export class TranslateCommand implements ISlashCommand { ) const res = await this.process(http, targetEntity, targetLanguage) - if (res) { - await sendEditedMessage(res) - } else { - await sendEditedMessage("❌ Translation failed") - } + await sendEditedMessage(res) } } diff --git a/ai-assistant/src/commands/WhyUsedCommand.ts b/ai-assistant/src/commands/WhyUsedCommand.ts index d803343..52871cb 100644 --- a/ai-assistant/src/commands/WhyUsedCommand.ts +++ b/ai-assistant/src/commands/WhyUsedCommand.ts @@ -7,6 +7,7 @@ import { Query } from "../core/query" import { MiniLML6 } from "../core/services/embeddings/minilml6" import { Llama3_70B } from "../core/services/llm/llama3_70B" import { handleCommandResponse } from "../utils/handleCommandResponse" +import { renderDiagramToBase64URI } from "../core/diagram" export class WhyUsedCommand implements ISlashCommand { public command = "rcc-whyused" @@ -24,10 +25,13 @@ export class WhyUsedCommand implements ISlashCommand { private async process( http: IHttp, query: string - ): Promise<{ - explanation: string - diagram: string - } | null> { + ): Promise< + | { + explanation: string + diagram: string + } + | string + > { const db = new Neo4j(http) const llm = new Llama3_70B(http) const embeddingModel = new MiniLML6(http) @@ -39,7 +43,7 @@ export class WhyUsedCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const keywords = await Query.getDBKeywordsFromQuery(llm, query) - if (!keywords.length) return null + if (!keywords.length) return "I'm sorry, I couldn't understand your query. Please try again." /** * --------------------------------------------------------------------------------------------- @@ -48,7 +52,7 @@ export class WhyUsedCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const codeNodes = await Query.getCodeNodesFromKeywords(db, embeddingModel, keywords) - if (!codeNodes.length) return null + if (!codeNodes.length) return "I'm sorry, I couldn't find any code related to your query." /** * --------------------------------------------------------------------------------------------- @@ -59,7 +63,7 @@ export class WhyUsedCommand implements ISlashCommand { const result = await llm.ask( PromptFactory.makeWhyUsedPrompt(codeNodes.map((x) => x.code).join("\n\n"), query) ) - if (!result) return null + if (!result) return "I'm sorry, I couldn't find any references for your query." const explanation = result.split("")[1].split("")[0].trim() const diagram = result.split("")[1].split("")[0].trim() @@ -71,17 +75,12 @@ export class WhyUsedCommand implements ISlashCommand { * --------------------------------------------------------------------------------------------- */ const data = { explanation, diagram: "" } - // TODO: - // if (diagram) { - // const parsedDiagram = diagram - // .replace("```mermaid", "") - // .replace("```", "") - // .trim(); - // writeFileSync("output.txt", parsedDiagram); - // try { - // // data.diagram = await renderDiagramToBase64URI(parsedDiagram); - // } catch {} - // } + if (diagram) { + const parsedDiagram = diagram.replace("```mermaid", "").replace("```", "").trim() + try { + data.diagram = await renderDiagramToBase64URI(http, parsedDiagram) + } catch {} + } return data } @@ -113,11 +112,10 @@ export class WhyUsedCommand implements ISlashCommand { ) const res = await this.process(http, query) - if (!res) { - await sendEditedMessage("❌ No references found!") + if (typeof res === "string") { + await sendEditedMessage(res) return } - await sendEditedMessage(res.explanation, [res.diagram!]) } } diff --git a/ai-assistant/src/core/query.ts b/ai-assistant/src/core/query.ts index 22a2c6e..b3dbc91 100644 --- a/ai-assistant/src/core/query.ts +++ b/ai-assistant/src/core/query.ts @@ -27,12 +27,12 @@ export namespace Query { const result = await db.run( ` CALL db.index.vector.queryNodes("${indexName}", 2, $vector) - YIELD node, score - WHERE score >= ${threshold} - WITH node, score - OPTIONAL MATCH (node)-[r]->(relatedNode) - RETURN node, COLLECT(relatedNode) AS relatedNodes, score - ORDER BY score DESC + YIELD node, score + WHERE score >= ${threshold} + WITH node, score + OPTIONAL MATCH (node)-[r]->(relatedNode) + RETURN node, COLLECT(relatedNode) AS relatedNodes, score + ORDER BY score DESC `, { vector } ) @@ -67,13 +67,16 @@ export namespace Query { keywords: string[] ): Promise { const results: DBNode[] = [] - for (const keyword of keywords) { - const queryVector = await embeddingModel.generate(keyword) - if (!queryVector) continue - const result = await getDBNodesFromVectorQuery(db, "nameEmbeddings", queryVector, 0.85) - results.push(...result) - } + try { + for (const keyword of keywords) { + const queryVector = await embeddingModel.generate(keyword) + if (!queryVector) continue + + const result = await getDBNodesFromVectorQuery(db, "nameEmbeddings", queryVector, 0.85) + results.push(...result) + } + } catch {} return results } diff --git a/ai-assistant/src/core/services/db/neo4j.ts b/ai-assistant/src/core/services/db/neo4j.ts index 22dbdb4..28c67e5 100644 --- a/ai-assistant/src/core/services/db/neo4j.ts +++ b/ai-assistant/src/core/services/db/neo4j.ts @@ -33,12 +33,14 @@ export class Neo4j implements IDB { // password: string ) { this.http = http + this.baseUrl = "http://neo4j:7474" this.username = "neo4j" this.password = "strongpasswordsafe123" - // this.baseUrl = "http://44.192.104.170:7474"; - // this.username = "neo4j"; - // this.password = "individuals-societies-wools"; + + // this.baseUrl = "http://44.192.104.170:7474" + // this.username = "neo4j" + // this.password = "individuals-societies-wools" } /** diff --git a/ai-assistant/src/core/services/embeddings/minilml6.ts b/ai-assistant/src/core/services/embeddings/minilml6.ts index fd14c72..f69e903 100644 --- a/ai-assistant/src/core/services/embeddings/minilml6.ts +++ b/ai-assistant/src/core/services/embeddings/minilml6.ts @@ -1,5 +1,4 @@ import { IHttp } from "@rocket.chat/apps-engine/definition/accessors" -import { HF_TOKEN } from "../../../credentials" import { IEmbeddingModel } from "./embeddings.types" export class MiniLML6 implements IEmbeddingModel { @@ -10,52 +9,31 @@ export class MiniLML6 implements IEmbeddingModel { this.http = http } - /** - * Extracts embeddings from Hugging Face API using the MiniLM-L6 model. - * @param text - The input text to extract embeddings from. - * @returns A promise that resolves to an array of numbers representing the embeddings, or null if the request fails. - */ - async fromHuggingFace(text: string): Promise { - const res = await this.http.post( - `https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2`, - { - headers: { - accept: "application/json", - "Content-Type": "application/json", - authorization: "Bearer " + HF_TOKEN, - }, - data: { - inputs: [text], - options: { - wait_for_model: true, - }, - }, - } - ) - if (!res || res.statusCode !== 200) return null - - const data = res.data[0] as number[] - return data - } - /** * Generates embeddings for the given text. * @param text - The input text for which embeddings need to be generated. * @returns A promise that resolves to an array of numbers representing the embeddings for the text, or null if the generation fails. */ async generate(text: string): Promise { - // return await this.fromHuggingFace(text); + let tries = 5 + while (tries--) { + try { + const res = await this.http.post(this.baseURL, { + headers: { + accept: "application/json", + "Content-Type": "application/json", + }, + data: [text], + }) + if (!res || res.statusCode !== 200) return null - const res = await this.http.post(this.baseURL, { - headers: { - accept: "application/json", - "Content-Type": "application/json", - }, - data: [text], - }) - if (!res || res.statusCode !== 200) return null + const data = res.data["embeddings"][0] as number[] + return data + } catch (e) { + console.log(e) + } + } - const data = res.data["embeddings"][0] as number[] - return data + return [] } } diff --git a/ai-assistant/src/core/services/llm/llama3_70B.ts b/ai-assistant/src/core/services/llm/llama3_70B.ts index a727693..7e5bf0f 100644 --- a/ai-assistant/src/core/services/llm/llama3_70B.ts +++ b/ai-assistant/src/core/services/llm/llama3_70B.ts @@ -1,6 +1,5 @@ import { IHttp } from "@rocket.chat/apps-engine/definition/accessors" -import { HF_TOKEN } from "../../../credentials" import { Prompt } from "../../prompt" import { ILLMModel } from "./llm.types" @@ -13,29 +12,6 @@ export class Llama3_70B implements ILLMModel { this.http = http } - async fromHuggingFace(prompt: Prompt): Promise { - const url = `https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.1/v1/chat/completions` - const res = await this.http.post(url, { - headers: { - "Content-Type": "application/json", - authorization: "Bearer " + HF_TOKEN, - }, - data: { - temperature: 0, - messages: prompt.messages, - model: "mistralai/Mistral-7B-Instruct-v0.1", - stream: false, - max_tokens: 10000, - }, - }) - if (!res.content) return null - - const message = JSON.parse(res.content).choices[0].message.content - console.log(message) - - return message - } - /** * Asynchronously asks a prompt and returns the response. * @@ -43,24 +19,31 @@ export class Llama3_70B implements ILLMModel { * @returns {Promise} A promise that resolves with the response string or null if no response is available. */ async ask(prompt: Prompt): Promise { - // return await this.fromHuggingFace(prompt); - - const url = `${this.baseURL}/chat/completions` - const res = await this.http.post(url, { - headers: { - "Content-Type": "application/json", - }, - data: { - temperature: 0, - messages: prompt.messages, - }, - }) - if (!res.content) return null - - // @ts-ignore - const message = JSON.parse(res.content).choices[0].message.content - console.log(message) - - return message + let tries = 5 + while (tries--) { + try { + const url = `${this.baseURL}/chat/completions` + const res = await this.http.post(url, { + headers: { + "Content-Type": "application/json", + }, + data: { + temperature: 0, + messages: prompt.messages, + }, + }) + if (!res.content) return null + + // @ts-ignore + const message = JSON.parse(res.content).choices[0].message.content + console.log(message) + + return message + } catch (e) { + console.log(e) + } + } + + return null } } diff --git a/ai-assistant/src/endpoints/ingest.ts b/ai-assistant/src/endpoints/ingest.ts index 7296784..a1af609 100644 --- a/ai-assistant/src/endpoints/ingest.ts +++ b/ai-assistant/src/endpoints/ingest.ts @@ -23,7 +23,7 @@ export class IngestEndpoint extends ApiEndpoint { makeBodies(content: any): [IngestEndpointRequestBody, IngestEndpointResponseBody] { const requestBody = content as IngestEndpointRequestBody const responseBody: IngestEndpointResponseBody = { - batchID: "hey", + batchID: requestBody.batchID, status: 200, } @@ -38,26 +38,30 @@ export class IngestEndpoint extends ApiEndpoint { http: IHttp, persis: IPersistence ): Promise { - let [{ nodes }, responseBody] = this.makeBodies(request.content) + try { + let [{ nodes }, responseBody] = this.makeBodies(request.content) - // ----------------------------------------------------------------------------------- - const db = new Neo4j(http) - await db.verifyConnectivity() - const embeddingModel = new MiniLML6(http) - // ----------------------------------------------------------------------------------- - nodes = nodes.map((node) => { - if ("element" in node) { - return new DevDocDBNode(node) - } else { - return new DBNode(node) - } - }) - await Promise.all(nodes.map((x) => x.fillEmbeddings(embeddingModel))) - // ----------------------------------------------------------------------------------- - const jobs = nodes.map((node) => db.run(node.getDBInsertQuery(), node)) - await Promise.all(jobs) - // ----------------------------------------------------------------------------------- + // ----------------------------------------------------------------------------------- + const db = new Neo4j(http) + await db.verifyConnectivity() + const embeddingModel = new MiniLML6(http) + // ----------------------------------------------------------------------------------- + nodes = nodes.map((node) => { + if ("element" in node) { + return new DevDocDBNode(node) + } else { + return new DBNode(node) + } + }) + await Promise.all(nodes.map((x) => x.fillEmbeddings(embeddingModel))) + // ----------------------------------------------------------------------------------- + const jobs = nodes.map((node) => db.run(node.getDBInsertQuery(), node)) + await Promise.all(jobs) + // ----------------------------------------------------------------------------------- - return this.success(JSON.stringify(responseBody)) + return this.success(JSON.stringify(responseBody)) + } catch (e) { + return this.success(JSON.stringify({ status: 500, error: e })) + } } } diff --git a/ai-assistant/src/endpoints/purgeDB.ts b/ai-assistant/src/endpoints/purgeDB.ts index 51dbd1f..d0e90d2 100644 --- a/ai-assistant/src/endpoints/purgeDB.ts +++ b/ai-assistant/src/endpoints/purgeDB.ts @@ -42,8 +42,8 @@ export class PurgeDBEndpoint extends ApiEndpoint { // Create indices for name embeddings [ - "CREATE VECTOR INDEX `nameEmbeddings` IF NOT EXISTS", - "FOR (n: Node) ON (n.nameEmbeddings)", + "CREATE VECTOR INDEX `codeEmbeddings` IF NOT EXISTS", + "FOR (n: Node) ON (n.codeEmbeddings)", "OPTIONS {indexConfig: {", " `vector.dimensions`: 384,", " `vector.similarity_function`: 'COSINE'", diff --git a/ingestion/src/constants.ts b/ingestion/src/constants.ts index fc27a15..a2cfaba 100644 --- a/ingestion/src/constants.ts +++ b/ingestion/src/constants.ts @@ -3,4 +3,5 @@ configDotenv() export const DOCUMENTATION_URL = "https://developer.rocket.chat/docs" export const REPO_URI = "https://github.com/RocketChat/Rocket.Chat" +// export const REPO_URI = "https://github.com/preeesha/florence-backend" export const RC_APP_URI = process.env["RC_APP_URI"] ?? "" diff --git a/ingestion/src/main.ts b/ingestion/src/main.ts index 84b6dfb..4650f98 100644 --- a/ingestion/src/main.ts +++ b/ingestion/src/main.ts @@ -10,7 +10,7 @@ import { FileProcessor } from "./process/prepare/processor/file" namespace Algorithms { export async function execCommand(command: string) { await new Promise((resolve, reject) => { - console.log(`🕒 Cloning repository: "${command}"`) + console.log(`🕒 ${command}`) exec(command, (error, stdout, stderr) => { if (error) { @@ -24,6 +24,9 @@ namespace Algorithms { } async function main() { + await new Promise((resolve) => setTimeout(resolve, 1000)) + console.clear() + let tries = 5 while (tries--) { try { @@ -40,6 +43,8 @@ async function main() { await insertDataIntoDB(codebase.dataDirPath) } await Algorithms.execCommand(`rm -rf ${sessionID}`) + + break } catch { console.error("Retrying", tries) } diff --git a/ingestion/src/process/ingest/ingest.ts b/ingestion/src/process/ingest/ingest.ts index 026ae31..b5c5d37 100644 --- a/ingestion/src/process/ingest/ingest.ts +++ b/ingestion/src/process/ingest/ingest.ts @@ -1,5 +1,4 @@ -import { readdirSync } from "fs" -import { readFile } from "fs/promises" +import { readFileSync, readdirSync } from "fs" import path from "path" import { v4 as uuid } from "uuid" @@ -26,21 +25,30 @@ namespace Algorithms { } export async function insertBatch(batchID: string, nodes: DBNode[]): Promise { - try { - const res = await fetch(`${RC_APP_URI}/ingest`, { - method: "POST", - headers: { - accept: "application/json", - "Content-Type": "application/json", - }, - body: JSON.stringify({ nodes, batchID }), - }) + let tries = 5 + while (tries--) { + try { + const res = await fetch(`${RC_APP_URI}/ingest`, { + method: "POST", + headers: { + accept: "application/json", + "Content-Type": "application/json", + }, + body: JSON.stringify({ nodes, batchID }), + }) + + if (res.status !== 200) { + console.log(res) + return false + } - return res.status === 200 - } catch (e) { - console.log(e) - return false + return true + // return res.status === 200 + } catch (e) { + console.log(e) + } } + return false } export async function establishRelations(relations: DBNodeRelation[]): Promise { @@ -66,13 +74,15 @@ export async function insertDataIntoDB(batchesDirPath: string) { const files = readdirSync(batchesDirPath).map((file) => path.resolve(batchesDirPath, file)) - /* Step 1: Empty DB */ + // /* Step 1: Empty DB */ { const success = await Algorithms.purgeDB() if (!success) { console.log("❌ Error emptying db") return } + + console.log("🕒 Purged DB") } /* Step 2: Insert batch */ @@ -82,14 +92,21 @@ export async function insertDataIntoDB(batchesDirPath: string) { const relations: (DBNodeRelation | DevDocDBNodeRelation)[] = [] // Insert each batch - for (let i = 0; i < files.length; i += 100) { - const group = files.slice(i, i + 100) - const jobs = group.map(async (file) => { - const batchID = uuid() - const data = await readFile(file, "utf-8") - const nodes = Object.values(JSON.parse(data)) as DBNode[] - - for (const node of nodes) + let batches: string[][] = [] + for (let i = 0; i < files.length; i += 10) { + batches.push(files.slice(i, i + 10)) + } + + console.log("🕒 Waiting for batches") + for (const group of batches) { + const batchID = uuid() + const nodes: DBNode[] = [] + + for (const file of group) { + const data = readFileSync(file, "utf-8") + nodes.push(...(Object.values(JSON.parse(data)) as DBNode[])) + + for (const node of nodes) { relations.push( ...node.relations.map((relation) => ({ source: node.id, @@ -97,16 +114,22 @@ export async function insertDataIntoDB(batchesDirPath: string) { relation: relation.relation, })) ) - - const success = await Algorithms.insertBatch(batchID, nodes) - if (success) { - console.log(`📦 ${batchID} inserted`) - } else { - errorBatches.add(batchID) } - }) - await Promise.all(jobs) + } + + const success = await Algorithms.insertBatch(batchID, nodes) + if (success) { + console.log(`📦 ${batchID} inserted`) + } else { + console.log(`❌ Error inserting ${batchID}`) + errorBatches.add(batchID) + } + + await new Promise((resolve) => setTimeout(resolve, 3_000)) } + + console.log("📦 All batches inserted") + if (errorBatches.size > 0) console.log("❌ Error batches", errorBatches) // Establish relations diff --git a/ingestion/src/process/prepare/codebase.ts b/ingestion/src/process/prepare/codebase.ts index 7ab3ab5..16a942d 100644 --- a/ingestion/src/process/prepare/codebase.ts +++ b/ingestion/src/process/prepare/codebase.ts @@ -39,13 +39,13 @@ export class Codebase { return this._dataDirPath } - private initializeDataDirectory(removeExisting = true): void { - this._dataDirName = uuid() + private initializeDataDirectory(removeExisting = false): void { + this._dataDirName = "data" || uuid() this._dataDirPath = path.resolve(this._path, this._dataDirName) /* Handle data directory */ if (removeExisting && existsSync(this._dataDirPath)) rmSync(this._dataDirPath, { recursive: true }) - mkdirSync(this._dataDirPath) + // mkdirSync(this._dataDirPath) } private prepareFilesMetadata() {