Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Issue Deduplication #11

Merged
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
"voyageai",
"vectordump",
"payloadobject",
"markdownit"
"markdownit",
"plpgsql"
],
"dictionaries": ["typescript", "node", "software-terms"],
"import": ["@cspell/dict-typescript/cspell-ext.json", "@cspell/dict-node/cspell-ext.json", "@cspell/dict-software-terms"],
Expand Down
26 changes: 10 additions & 16 deletions src/adapters/supabase/helpers/issues.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,10 @@ import { SuperSupabase } from "./supabase";
import { Context } from "../../../types/context";
import { markdownToPlainText } from "../../utils/markdown-to-plaintext";

export interface IssueType {
id: string;
markdown?: string;
author_id: number;
created_at: string;
modified_at: string;
payloadObject: Record<string, unknown> | null;
embedding: number[];
export interface IssueSimilaritySearchResult {
issue_id: string;
issue_plaintext: string;
similarity: number;
}

export class Issues extends SuperSupabase {
Expand Down Expand Up @@ -70,15 +66,13 @@ export class Issues extends SuperSupabase {
}
}

async findSimilarIssues(markdown: string, threshold: number): Promise<IssueType[] | null> {
async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[] | null> {
const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown);
const { data, error } = await this.supabase
.from("issues")
.select("*")
.eq("type", "issue")
.textSearch("embedding", embedding.join(","))
.order("embedding", { foreignTable: "issues", ascending: false })
.lte("embedding", threshold);
const { data, error } = await this.supabase.rpc("find_similar_issues", {
current_id: currentId,
query_embedding: embedding,
threshold: threshold,
});
if (error) {
this.context.logger.error("Error finding similar issues", error);
return [];
Expand Down
114 changes: 114 additions & 0 deletions src/handlers/issue-deduplication.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import { IssueSimilaritySearchResult } from "../adapters/supabase/helpers/issues";
import { Context } from "../types";
import { IssuePayload } from "../types/payload";

const MATCH_THRESHOLD = 0.95;
const WARNING_THRESHOLD = 0.75;
sshivaditya2019 marked this conversation as resolved.
Show resolved Hide resolved

export interface IssueGraphqlResponse {
node: {
title: string;
url: string;
};
similarity: string;
}

/**
* Check if an issue is similar to any existing issues in the database
* @param context
* @returns true if the issue is similar to an existing issue, false otherwise
*/
export async function issueChecker(context: Context): Promise<boolean> {
const {
logger,
adapters: { supabase },
octokit,
} = context;
const { payload } = context as { payload: IssuePayload };
const issue = payload.issue;
const issueContent = issue.body + issue.title;

// Fetch all similar issues based on WARNING_THRESHOLD
const similarIssues = await supabase.issue.findSimilarIssues(issueContent, WARNING_THRESHOLD, issue.node_id);
console.log(similarIssues);
if (similarIssues && similarIssues.length > 0) {
const matchIssues = similarIssues.filter((issue) => issue.similarity >= MATCH_THRESHOLD);

// Handle issues that match the MATCH_THRESHOLD (Very Similar)
if (matchIssues.length > 0) {
logger.info(`Similar issue which matches more than ${MATCH_THRESHOLD} already exists`);
await octokit.issues.update({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issue.number,
state: "closed",
state_reason: "not_planned",
});
}

// Handle issues that match the WARNING_THRESHOLD but not the MATCH_THRESHOLD
if (similarIssues.length > 0) {
logger.info(`Similar issue which matches more than ${WARNING_THRESHOLD} already exists`);
await handleSimilarIssuesComment(context, payload, issue.number, similarIssues);
return true;
}
}

return false;
}

/**
* Handle commenting on an issue with similar issues information
* @param context
* @param payload
* @param issueNumber
* @param similarIssues
*/
async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) {
const issueList: IssueGraphqlResponse[] = await Promise.all(
similarIssues.map(async (issue: IssueSimilaritySearchResult) => {
const issueUrl: IssueGraphqlResponse = await context.octokit.graphql(
`query($issueNodeId: ID!) {
node(id: $issueNodeId) {
... on Issue {
title
url
}
}
}`,
{ issueNodeId: issue.issue_id }
);
issueUrl.similarity = (issue.similarity * 100).toFixed(2);
return issueUrl;
})
);

const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n");
const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`;

const existingComments = await context.octokit.issues.listComments({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issueNumber,
});

const existingComment = existingComments.data.find(
(comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)")
);

if (existingComment) {
await context.octokit.issues.updateComment({
owner: payload.repository.owner.login,
repo: payload.repository.name,
comment_id: existingComment.id,
body: body,
});
} else {
await context.octokit.issues.createComment({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issueNumber,
body: body,
});
}
}
7 changes: 5 additions & 2 deletions src/plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { VoyageAIClient } from "voyageai";
import { deleteIssues } from "./handlers/delete-issue";
import { addIssue } from "./handlers/add-issue";
import { updateIssue } from "./handlers/update-issue";
import { issueChecker } from "./handlers/issue-deduplication";

/**
* The main plugin function. Split for easier testing.
Expand All @@ -31,11 +32,13 @@ export async function runPlugin(context: Context) {
} else if (isIssueEvent(context)) {
switch (eventName) {
case "issues.opened":
await issueChecker(context);
return await addIssue(context);
case "issues.deleted":
return await deleteIssues(context);
case "issues.edited":
await issueChecker(context);
return await updateIssue(context);
case "issues.deleted":
return await deleteIssues(context);
}
} else {
logger.error(`Unsupported event: ${eventName}`);
Expand Down
Loading