Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Issue Deduplication #11

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
"voyageai",
"vectordump",
"payloadobject",
"markdownit"
"markdownit",
"plpgsql"
],
"dictionaries": ["typescript", "node", "software-terms"],
"import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"],
Expand Down
26 changes: 10 additions & 16 deletions src/adapters/supabase/helpers/issues.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,10 @@ import { SuperSupabase } from "./supabase";
import { Context } from "../../../types/context";
import { markdownToPlainText } from "../../utils/markdown-to-plaintext";

export interface IssueType {
id: string;
markdown?: string;
author_id: number;
created_at: string;
modified_at: string;
payloadObject: Record<string, unknown> | null;
embedding: number[];
export interface IssueSimilaritySearchResult {
issue_id: string;
issue_plaintext: string;
similarity: number;
}

export class Issues extends SuperSupabase {
Expand Down Expand Up @@ -70,15 +66,13 @@ export class Issues extends SuperSupabase {
}
}

async findSimilarIssues(markdown: string, threshold: number): Promise<IssueType[] | null> {
async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[] | null> {
const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown);
const { data, error } = await this.supabase
.from("issues")
.select("*")
.eq("type", "issue")
.textSearch("embedding", embedding.join(","))
.order("embedding", { foreignTable: "issues", ascending: false })
.lte("embedding", threshold);
const { data, error } = await this.supabase.rpc("find_similar_issues", {
current_id: currentId,
query_embedding: embedding,
threshold: threshold,
});
if (error) {
this.context.logger.error("Error finding similar issues", error);
return [];
Expand Down
138 changes: 138 additions & 0 deletions src/handlers/issue-deduplication.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
import { Context } from "../types";
const MATCH_THRESHOLD = 0.95;
const WARNING_THRESHOLD = 0.5;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you do 50%?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A cosine similarity of 0.75 appears quite close for identifying similar issues. I tested this with a few examples and noticed some potential errors with the samples. Typically, for similar issues, the similarity was either above 75% and aligned with 95% category or around 60%. Therefore, I experimented with a 50% threshold, which seemed to work well.


export interface IssueGraphqlResponse {
node: {
title: string;
url: string;
};
}

/**
* Check if an issue is similar to any existing issues in the database
* @param context
* @returns true if the issue is similar to an existing issue, false otherwise
*/
export async function issueChecker(context: Context): Promise<boolean> {
const {
logger,
payload,
adapters: { supabase },
} = context;

const issue = payload.issue;

//First Check if an issue with more than MATCH_THRESHOLD similarity exists (Very Similar)
const similarIssue = await supabase.issue.findSimilarIssues(issue.body + issue.title, MATCH_THRESHOLD, issue.node_id);
if (similarIssue && similarIssue?.length > 0) {
logger.info(`Similar issue which matches more than ${MATCH_THRESHOLD} already exists`);
sshivaditya2019 marked this conversation as resolved.
Show resolved Hide resolved
//Close the issue as "unplanned"
await context.octokit.issues.update({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issue.number,
state: "closed",
labels: ["unplanned"],
sshivaditya2019 marked this conversation as resolved.
Show resolved Hide resolved
});
return true;
}

//Second Check if an issue with more than WARNING_THRESHOLD similarity exists (Warning)
const warningIssue = await supabase.issue.findSimilarIssues(issue.body + issue.title, WARNING_THRESHOLD, issue.node_id);
if (warningIssue && warningIssue?.length > 0) {
logger.info(`Similar issue which matches more than ${WARNING_THRESHOLD} already exists`);
//Add a comment immediately next to the issue
//Build a list of similar issues url
const issueList: IssueGraphqlResponse[] = await Promise.all(
warningIssue.map(async (issue: IssueSimilaritySearchResult) => {
//fetch the issue url and title using globalNodeId
const issueUrl: IssueGraphqlResponse = await context.octokit.graphql(
`query($issueNodeId: ID!) {
node(id: $issueNodeId) {
... on Issue {
title
url
}
}
}`,
{
issueNodeId: issue.issue_id,
}
);
return issueUrl;
})
);

// Reopen the issue
sshivaditya2019 marked this conversation as resolved.
Show resolved Hide resolved
await context.octokit.issues.update({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issue.number,
state: "open",
});
//Remove the "unplanned" label if it exists
if (issue.labels && issue.labels.find((label) => label.name === "unplanned")) {
await context.octokit.issues.removeLabel({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issue.number,
name: "unplanned",
});
}
// Check if there is already a comment on the issue
const existingComment = await context.octokit.issues.listComments({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issue.number,
});
if (existingComment.data.length > 0) {
// Find the comment that lists the similar issues
const commentToUpdate = existingComment.data.find(
(comment) => comment && comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)")
);

if (commentToUpdate) {
// Update the comment with the latest list of similar issues
const body = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url})`).join("\n");
const updatedBody = `This issue seems to be similar to the following issue(s):\n\n${body}`;
await context.octokit.issues.updateComment({
owner: payload.repository.owner.login,
repo: payload.repository.name,
comment_id: commentToUpdate.id,
body: updatedBody,
});
} else {
// Add a new comment to the issue
await createNewComment(context, issueList);
}
} else {
// Add a new comment to the issue
await createNewComment(context, issueList);
}
return true;
}

logger.info("No similar issue found");
return false;
}

/**
* Create a new comment on the issue with the list of similar issues
* @param context
* @param resolvedIssueList
*/
async function createNewComment(context: Context, resolvedIssueList: IssueGraphqlResponse[]) {
let body = "This issue seems to be similar to the following issue(s):\n\n";
resolvedIssueList.forEach((issue) => {
const issueLine = `- [${issue.node.title}](${issue.node.url})\n`;
body += issueLine;
});
await context.octokit.issues.createComment({
owner: context.payload.repository.owner.login,
repo: context.payload.repository.name,
issue_number: context.payload.issue.number,
body: body,
});
}
7 changes: 5 additions & 2 deletions src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { VoyageAIClient } from "voyageai";
import { deleteIssues } from "./handlers/delete-issue";
import { addIssue } from "./handlers/add-issue";
import { updateIssue } from "./handlers/update-issue";
import { issueChecker } from "./handlers/issue-deduplication";

/**
* The main plugin function. Split for easier testing.
Expand All @@ -31,11 +32,13 @@ export async function runPlugin(context: Context) {
} else if (isIssueEvent(context)) {
switch (eventName) {
case "issues.opened":
await issueChecker(context);
return await addIssue(context);
case "issues.deleted":
return await deleteIssues(context);
case "issues.edited":
await issueChecker(context);
return await updateIssue(context);
case "issues.deleted":
return await deleteIssues(context);
}
} else {
logger.error(`Unsupported event: ${eventName}`);
Expand Down
Loading