diff --git a/src/adapters/supabase/helpers/comment.ts b/src/adapters/supabase/helpers/comment.ts index 3fa08b2..295f8ae 100644 --- a/src/adapters/supabase/helpers/comment.ts +++ b/src/adapters/supabase/helpers/comment.ts @@ -37,7 +37,7 @@ export class Comment extends SuperSupabase { } else { //Create the embedding for this comment const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; @@ -54,21 +54,34 @@ export class Comment extends SuperSupabase { this.context.logger.info("Comment created successfully"); } - async updateComment(markdown: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { + async updateComment( + markdown: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) { //Create the embedding for this comment const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; plaintext = null as string | null; } - const { error } = await this.supabase - .from("issue_comments") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", commentNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const comments = await this.getComment(commentNodeId); + if (comments && comments.length == 0) { + this.context.logger.info("Comment does not exist, creating a new one"); + await this.createComment(markdown, commentNodeId, authorId, payload, isPrivate, issueId); + } else { + const { error } = await this.supabase + .from("issue_comments") + .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) + .eq("id", commentNodeId); + if (error) { + this.context.logger.error("Error updating comment", error); + } } } diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 6bfef09..059ee75 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -37,7 +37,7 @@ export class Issues extends SuperSupabase { return; } else { const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { payload = null; markdown = null; @@ -52,21 +52,24 @@ export class Issues extends SuperSupabase { this.context.logger.info("Issue created successfully"); } - async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { - //Create the embedding for this comment + async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean, authorId: number) { const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { - markdown = null as string | null; - payload = null as Record | null; - plaintext = null as string | null; + markdown = null; + payload = null; + plaintext = null; } - const { error } = await this.supabase - .from("issues") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", issueNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const issues = await this.getIssue(issueNodeId); + if (issues && issues.length == 0) { + this.context.logger.info("Issue does not exist, creating a new one"); + await this.createIssue(issueNodeId, payload, isPrivate, markdown, authorId); + } else { + const { error } = await this.supabase.from("issues").update({ markdown, plaintext, embedding, payload, modified_at: new Date() }).eq("id", issueNodeId); + + if (error) { + this.context.logger.error("Error updating comment", error); + } } } @@ -96,6 +99,7 @@ export class Issues extends SuperSupabase { current_id: currentId, query_embedding: embedding, threshold: threshold, + top_k: 5, }); if (error) { this.context.logger.error("Error finding similar issues", error); diff --git a/src/handlers/add-issue.ts b/src/handlers/add-issue.ts index 969a5c2..c828bbf 100644 --- a/src/handlers/add-issue.ts +++ b/src/handlers/add-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function addIssue(context: Context) { const { @@ -16,7 +17,8 @@ export async function addIssue(context: Context) { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.createIssue(nodeId, payload, isPrivate, markdown, authorId); + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.createIssue(nodeId, payload, isPrivate, cleanedIssue, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error creating issue:`, { error: error, stack: error.stack }); diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 599b378..0cca940 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -5,7 +5,9 @@ import { IssuePayload } from "../types/payload"; export interface IssueGraphqlResponse { node: { title: string; + number: number; url: string; + body: string; repository: { name: string; owner: { @@ -14,13 +16,15 @@ export interface IssueGraphqlResponse { }; }; similarity: string; + mostSimilarSentence: { sentence: string; similarity: number; index: number }; } /** - * Check if an issue is similar to any existing issues in the database - * @param context - * @returns true if the issue is similar to an existing issue, false otherwise - */ + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a footnote is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueChecker(context: Context): Promise { const { logger, @@ -29,19 +33,15 @@ export async function issueChecker(context: Context): Promise { } = context; const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; - - //Find and remove the footnotes from the issue content - const existingBody = context.payload.issue.body; - const footnoteIndex = existingBody?.indexOf("\n###### Similar"); - const issueBody = footnoteIndex !== -1 ? existingBody?.substring(0, footnoteIndex) : existingBody; - const issueContent = issueBody + issue.title; - - // Fetch all similar issues based on settings.warningThreshold - const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); + let issueBody = issue.body; + if (!issueBody) { + logger.info("Issue body is empty"); + return false; + } + issueBody = removeFootnotes(issueBody); + const similarIssues = await supabase.issue.findSimilarIssues(issue.title + removeFootnotes(issueBody), context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { const matchIssues = similarIssues.filter((issue) => issue.similarity >= context.config.matchThreshold); - - // Handle issues that match the MATCH_THRESHOLD (Very Similar) if (matchIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.matchThreshold} already exists`); await octokit.issues.update({ @@ -53,42 +53,84 @@ export async function issueChecker(context: Context): Promise { }); } - // Handle issues that match the settings.warningThreshold but not the MATCH_THRESHOLD if (similarIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); - await handleSimilarIssuesComment(context, payload, issue.number, similarIssues, issueBody || ""); + await handleSimilarIssuesComment(context, payload, issueBody, issue.number, similarIssues); return true; } } + context.logger.info("No similar issues found"); + //Use the IssueBody (Without footnotes) to update the issue + if (issueBody !== issue.body) { + await octokit.issues.update({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issue.number, + body: issueBody, + }); + } return false; } -/** - * Compare the repository and issue name to the similar issue repository and issue name - * @param repoOrg - * @param similarIssueRepoOrg - * @param repoName - * @param similarIssueRepoName - * @returns - */ function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: string, repoName: string, similarIssueRepoName: string): boolean { return repoOrg === similarIssueRepoOrg && repoName === similarIssueRepoName; } /** - * Handle commenting on an issue with similar issues information - * @param context - * @param payload - * @param issueNumber - * @param similarIssues + * Finds the most similar sentence in a similar issue to a sentence in the current issue. + * @param issueContent The content of the current issue + * @param similarIssueContent The content of the similar issue + * @returns The most similar sentence and its similarity score */ +function findMostSimilarSentence(issueContent: string, similarIssueContent: string): { sentence: string; similarity: number; index: number } { + // Regex to match sentences while preserving URLs + const sentenceRegex = /([^.!?\s][^.!?]*(?:[.!?](?!['"]?\s|$)[^.!?]*)*[.!?]?['"]?(?=\s|$))/g; + + // Function to split text into sentences while preserving URLs + const splitIntoSentences = (text: string): string[] => { + const sentences: string[] = []; + let match; + while ((match = sentenceRegex.exec(text)) !== null) { + sentences.push(match[0].trim()); + } + return sentences; + }; + + const issueSentences = splitIntoSentences(issueContent); + const similarIssueSentences = splitIntoSentences(similarIssueContent); + + let maxSimilarity = 0; + let mostSimilarSentence = ""; + let mostSimilarIndex = -1; + + issueSentences.forEach((sentence, index) => { + const similarities = similarIssueSentences.map((similarSentence) => { + const editDistance = findEditDistance(sentence, similarSentence); + const maxLength = Math.max(sentence.length, similarSentence.length); + // Normalized similarity (edit distance) + return 1 - editDistance / maxLength; + }); + const maxSentenceSimilarity = Math.max(...similarities); + if (maxSentenceSimilarity > maxSimilarity) { + maxSimilarity = maxSentenceSimilarity; + mostSimilarSentence = sentence; + mostSimilarIndex = index; + } + }); + + if (!mostSimilarSentence) { + throw new Error("No similar sentence found"); + } + return { sentence: mostSimilarSentence, similarity: maxSimilarity, index: mostSimilarIndex }; +} + async function handleSimilarIssuesComment( context: Context, payload: IssuePayload, + issueBody: string, issueNumber: number, - similarIssues: IssueSimilaritySearchResult[], - modifiedBody: string + similarIssues: IssueSimilaritySearchResult[] ) { const issueList: IssueGraphqlResponse[] = await Promise.all( similarIssues.map(async (issue: IssueSimilaritySearchResult) => { @@ -98,6 +140,8 @@ async function handleSimilarIssuesComment( ... on Issue { title url + number + body repository { name owner { @@ -110,32 +154,111 @@ async function handleSimilarIssuesComment( { issueNodeId: issue.issue_id } ); issueUrl.similarity = Math.round(issue.similarity * 100).toString(); + issueUrl.mostSimilarSentence = findMostSimilarSentence(issueBody, issueUrl.node.body); return issueUrl; }) ); - let finalIndex = 0; - const commentBody = issueList - .filter((issue) => - matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) - ) - .map((issue, index) => { - const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); - return `[^0${index + 1}^]: [${issue.node.title}](${modifiedUrl}) ${issue.similarity}%`; - }) - .join("\n"); + const relevantIssues = issueList.filter((issue) => + matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) + ); - if (commentBody.length === 0) { + if (relevantIssues.length === 0) { + context.logger.info("No relevant issues found with the same repository and organization"); + } + + if (!issueBody) { return; } - const footnoteLinks = [...Array(++finalIndex).keys()].map((i) => `[^0${i + 1}^]`).join(""); - const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; + // Find existing footnotes in the body + const footnoteRegex = /\[\^(\d+)\^\]/g; + const existingFootnotes = issueBody.match(footnoteRegex) || []; + const highestFootnoteIndex = existingFootnotes.length > 0 ? Math.max(...existingFootnotes.map((fn) => parseInt(fn.match(/\d+/)?.[0] ?? "0"))) : 0; + let updatedBody = issueBody; + let footnotes: string[] | undefined; + // Sort relevant issues by similarity in ascending order + relevantIssues.sort((a, b) => parseFloat(a.similarity) - parseFloat(b.similarity)); + + relevantIssues.forEach((issue, index) => { + const footnoteIndex = highestFootnoteIndex + index + 1; // Continue numbering from the highest existing footnote number + const footnoteRef = `[^0${footnoteIndex}^]`; + const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); + const { sentence } = issue.mostSimilarSentence; + + // Insert footnote reference in the body + const sentencePattern = new RegExp(`${sentence.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "g"); + updatedBody = updatedBody.replace(sentencePattern, `${sentence}${footnoteRef}`); - //Append the new foot note + // Initialize footnotes array if not already done + if (!footnotes) { + footnotes = []; + } + + // Add new footnote to the array + footnotes.push(`${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl}#${issue.node.number})\n\n`); + }); + + // Append new footnotes to the body, keeping the previous ones + if (footnotes) { + updatedBody += "\n\n" + footnotes.join(""); + } + + // Update the issue with the modified body await context.octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, - body: modifiedBody + body, + body: updatedBody, }); } + +/** + * Finds the edit distance between two strings using dynamic programming. + * The edit distance is a way of quantifying how dissimilar two strings are to one another by + * counting the minimum number of operations required to transform one string into the other. + * For more information, see: https://en.wikipedia.org/wiki/Edit_distance + * @param sentenceA The first string + * @param sentenceB The second string + * @returns The edit distance between the two strings + */ +function findEditDistance(sentenceA: string, sentenceB: string): number { + const lengthA = sentenceA.length; + const lengthB = sentenceB.length; + const distanceMatrix: number[][] = Array.from({ length: lengthA + 1 }, () => Array.from({ length: lengthB + 1 }, () => 0)); + + for (let indexA = 0; indexA <= lengthA; indexA++) { + for (let indexB = 0; indexB <= lengthB; indexB++) { + if (indexA === 0) { + distanceMatrix[indexA][indexB] = indexB; + } else if (indexB === 0) { + distanceMatrix[indexA][indexB] = indexA; + } else if (sentenceA[indexA - 1] === sentenceB[indexB - 1]) { + distanceMatrix[indexA][indexB] = distanceMatrix[indexA - 1][indexB - 1]; + } else { + distanceMatrix[indexA][indexB] = + 1 + Math.min(distanceMatrix[indexA - 1][indexB], distanceMatrix[indexA][indexB - 1], distanceMatrix[indexA - 1][indexB - 1]); + } + } + } + + return distanceMatrix[lengthA][lengthB]; +} + +/** + * Removes all footnotes from the issue content. + * This includes both the footnote references in the body and the footnote definitions at the bottom. + * @param content The content of the issue + * @returns The content without footnotes + */ +export function removeFootnotes(content: string): string { + const footnoteDefRegex = /\[\^(\d+)\^\]: ⚠ \d+% possible duplicate - [^\n]+(\n|$)/g; + const footnotes = content.match(footnoteDefRegex); + let contentWithoutFootnotes = content.replace(footnoteDefRegex, ""); + if (footnotes) { + footnotes.forEach((footnote) => { + const footnoteNumber = footnote.match(/\d+/)?.[0]; + contentWithoutFootnotes = contentWithoutFootnotes.replace(new RegExp(`\\[\\^${footnoteNumber}\\^\\]`, "g"), ""); + }); + } + return contentWithoutFootnotes.replace(/\n{2,}/g, "\n").trim(); +} diff --git a/src/handlers/issue-matching.ts b/src/handlers/issue-matching.ts index cc1d060..f2e866a 100644 --- a/src/handlers/issue-matching.ts +++ b/src/handlers/issue-matching.ts @@ -24,17 +24,12 @@ export interface IssueGraphqlResponse { similarity: number; } -const commentBuilder = (matchResultArray: Map>): string => { - const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; - matchResultArray.forEach((issues, assignee) => { - commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); - issues.forEach((issue) => { - commentLines.push(issue); - }); - }); - return commentLines.join("\n"); -}; - +/** + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a comment is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueMatching(context: Context) { const { logger, @@ -45,15 +40,10 @@ export async function issueMatching(context: Context) { const issue = payload.issue; const issueContent = issue.body + issue.title; const commentStart = ">The following contributors may be suitable for this task:"; - - // On Adding the labels to the issue, the bot should - // create a new comment with users who completed task most similar to the issue - // if the comment already exists, it should update the comment with the new users const matchResultArray: Map> = new Map(); const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.jobMatchingThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { - // Find the most similar issue and the users who completed the task - similarIssues.sort((a, b) => b.similarity - a.similarity); + similarIssues.sort((a, b) => b.similarity - a.similarity); // Sort by similarity const fetchPromises = similarIssues.map(async (issue) => { const issueObject: IssueGraphqlResponse = await context.octokit.graphql( `query ($issueNodeId: ID!) { @@ -84,7 +74,6 @@ export async function issueMatching(context: Context) { issueObject.similarity = issue.similarity; return issueObject; }); - const issueList = await Promise.all(fetchPromises); issueList.forEach((issue) => { if (issue.node.closed && issue.node.stateReason === "COMPLETED" && issue.node.assignees.nodes.length > 0) { @@ -148,3 +137,19 @@ export async function issueMatching(context: Context) { logger.ok(`Successfully created issue comment!`); logger.debug(`Exiting issueMatching handler`); } + +/** + * Builds the comment to be added to the issue + * @param matchResultArray The array of issues to be matched + * @returns The comment to be added to the issue + */ +function commentBuilder(matchResultArray: Map>): string { + const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; + matchResultArray.forEach((issues, assignee) => { + commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); + issues.forEach((issue) => { + commentLines.push(issue); + }); + }); + return commentLines.join("\n"); +} diff --git a/src/handlers/update-comments.ts b/src/handlers/update-comments.ts index b1b9d18..6cc9545 100644 --- a/src/handlers/update-comments.ts +++ b/src/handlers/update-comments.ts @@ -7,15 +7,18 @@ export async function updateComment(context: Context) { adapters: { supabase }, } = context; const { payload } = context as { payload: CommentPayload }; + const markdown = payload.comment.body; + const authorId = payload.comment.user?.id || -1; const nodeId = payload.comment.node_id; const isPrivate = payload.repository.private; - const markdown = payload.comment.body || null; + const issueId = payload.issue.node_id; + // Fetch the previous comment and update it in the db try { if (!markdown) { throw new Error("Comment body is empty"); } - await supabase.comment.updateComment(markdown, nodeId, payload, isPrivate); + await supabase.comment.updateComment(markdown, nodeId, authorId, payload, isPrivate, issueId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating comment:`, { error: error, stack: error.stack }); diff --git a/src/handlers/update-issue.ts b/src/handlers/update-issue.ts index 763b2ba..cec5de2 100644 --- a/src/handlers/update-issue.ts +++ b/src/handlers/update-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function updateIssue(context: Context) { const { @@ -11,12 +12,15 @@ export async function updateIssue(context: Context) { const nodeId = payload.issue.node_id; const isPrivate = payload.repository.private; const markdown = payload.issue.body + " " + payload.issue.title || null; + const authorId = payload.issue.user?.id || -1; // Fetch the previous issue and update it in the db try { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.updateIssue(markdown, nodeId, payloadObject, isPrivate); + //clean issue by removing footnotes + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.updateIssue(cleanedIssue, nodeId, payloadObject, isPrivate, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating issue:`, { error: error, stack: error.stack }); diff --git a/src/main.ts b/src/main.ts index 91490bc..0b9b9a4 100644 --- a/src/main.ts +++ b/src/main.ts @@ -36,7 +36,7 @@ async function returnDataToKernel(repoToken: string, stateId: string, output: ob await octokit.repos.createDispatchEvent({ owner: github.context.repo.owner, repo: github.context.repo.repo, - event_type: "return_data_to_ubiquibot_kernel", + event_type: "return-data-to-ubiquity-os-kernel", client_payload: { state_id: stateId, output: JSON.stringify(output), diff --git a/src/plugin.ts b/src/plugin.ts index 341e07b..f382409 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -32,8 +32,8 @@ export async function runPlugin(context: Context) { } else if (isIssueEvent(context)) { switch (eventName) { case "issues.opened": - await issueChecker(context); await addIssue(context); + await issueChecker(context); return await issueMatching(context); case "issues.edited": await issueChecker(context); diff --git a/supabase/migrations/20241002004403_issue_comments.sql b/supabase/migrations/20241002004403_issue_comments.sql index 6323882..9ebb751 100644 --- a/supabase/migrations/20241002004403_issue_comments.sql +++ b/supabase/migrations/20241002004403_issue_comments.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8) +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ DECLARE current_quantized vector(1024); @@ -9,10 +9,10 @@ BEGIN SELECT id AS issue_id, plaintext AS issue_plaintext, 1 - (l2_distance(current_quantized, embedding)) AS similarity - FROM issues WHERE id <> current_id AND 1 - (l2_distance(current_quantized, embedding)) > threshold - ORDER BY similarity; + ORDER BY similarity + LIMIT top_k; -- Limit the number of results to top_k END; -$$ LANGUAGE plpgsql; \ No newline at end of file +$$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20241008165113_function_issue.sql b/supabase/migrations/20241008165113_function_issue.sql new file mode 100644 index 0000000..3223d1d --- /dev/null +++ b/supabase/migrations/20241008165113_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + (l2_distance(current_quantized, embedding)) AS similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND l2_distance(current_quantized, embedding) > threshold -- Ensure similarity exceeds threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20241008175109_function_issue.sql b/supabase/migrations/20241008175109_function_issue.sql new file mode 100644 index 0000000..6c77ce3 --- /dev/null +++ b/supabase/migrations/20241008175109_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) as similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) > threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; diff --git a/tests/__mocks__/adapter.ts b/tests/__mocks__/adapter.ts index d1f634c..abb271f 100644 --- a/tests/__mocks__/adapter.ts +++ b/tests/__mocks__/adapter.ts @@ -36,21 +36,29 @@ export function createMockAdapters(context: Context) { commentMap.set(commentNodeId, { id: commentNodeId, plaintext, author_id: authorId, embedding, issue_id: issueId }); } ), - updateComment: jest.fn(async (plaintext: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) => { - if (!commentMap.has(commentNodeId)) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const originalComment = commentMap.get(commentNodeId); - if (!originalComment) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const { id, author_id } = originalComment; - const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); - if (isPrivate) { - plaintext = null; + updateComment: jest.fn( + async ( + plaintext: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) => { + if (!commentMap.has(commentNodeId)) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const originalComment = commentMap.get(commentNodeId); + if (!originalComment) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); + if (isPrivate) { + plaintext = null; + } + commentMap.set(commentNodeId, { id: issueId, plaintext, author_id: authorId, embedding, payload }); } - commentMap.set(commentNodeId, { id, plaintext, author_id, embedding }); - }), + ), deleteComment: jest.fn(async (commentNodeId: string) => { if (!commentMap.has(commentNodeId)) { throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); diff --git a/tests/main.test.ts b/tests/main.test.ts index 442b82e..27caeeb 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -5,7 +5,6 @@ import { drop } from "@mswjs/data"; import { Octokit } from "@octokit/rest"; import { Logs } from "@ubiquity-os/ubiquity-os-logger"; import dotenv from "dotenv"; -import manifest from "../manifest.json"; import { runPlugin } from "../src/plugin"; import { Env } from "../src/types"; import { Context, SupportedEvents } from "../src/types/context"; @@ -35,17 +34,6 @@ describe("Plugin tests", () => { await setupTests(); }); - it("Should serve the manifest file", async () => { - const worker = (await import("../src/worker")).default; - const response = await worker.fetch(new Request("http://localhost/manifest"), { - SUPABASE_KEY: "test", - SUPABASE_URL: "test", - VOYAGEAI_API_KEY: "test", - }); - const content = await response.json(); - expect(content).toEqual(manifest); - }); - it("When a comment is created it should add it to the database", async () => { const { context } = createContext(STRINGS.HELLO_WORLD, 1, 1, 1, 1, "sasasCreate"); await runPlugin(context);