Skip to content

Commit

Permalink
fix: removed edit distance using l2 distance
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Oct 2, 2024
1 parent ac2eae6 commit 9d3bdbf
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 64 deletions.
52 changes: 0 additions & 52 deletions src/adapters/supabase/helpers/issues.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,56 +110,4 @@ export class Issues extends SuperSupabase {
this.context.logger.error("Error updating issue payload", error);
}
}

// Edit distance (Number of operations required to convert one string to another)
calculateEditDistance(query: string, similarIssues: string): number {
const dp: number[][] = Array(query.length + 1)
.fill(null)
.map(() => Array(similarIssues.length + 1).fill(null));

for (let i = 0; i <= query.length; i++) {
dp[i][0] = i;
}
for (let j = 0; j <= similarIssues.length; j++) {
dp[0][j] = j;
}
for (let i = 1; i <= query.length; i++) {
for (let j = 1; j <= similarIssues.length; j++) {
const cost = query[i - 1] === similarIssues[j - 1] ? 0 : 1;
dp[i][j] = Math.min(
dp[i - 1][j] + 1, // deletion
dp[i][j - 1] + 1, // insertion
dp[i - 1][j - 1] + cost // substitution
);
}
}

return dp[query.length][similarIssues.length];
}

async fetchSimilarIssueEditDist(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[] | null> {
const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown);
const { data, error } = await this.supabase.rpc("find_similar_issues", {
current_id: currentId,
query_embedding: embedding,
threshold: threshold,
});

if (error) {
this.context.logger.error("Error finding similar issues", error);
return [];
}

const similarIssues: string[] = data.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext);

// Calculate the maximum edit distance based on the length of the input markdown
const maxLength = markdown.length;
const editDistanceThreshold = maxLength * (1 - threshold); // Convert similarity threshold to edit distance threshold

// Calculate edit distances
const editDistances = similarIssues.map((issue) => this.calculateEditDistance(markdown, issue));

// Filter out the issues that are above the edit distance threshold
return data.filter((index: number) => editDistances[index] <= editDistanceThreshold);
}
}
45 changes: 36 additions & 9 deletions src/handlers/issue-deduplication.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ export interface IssueGraphqlResponse {
node: {
title: string;
url: string;
repository: {
name: string;
owner: {
login: string;
};
};
};
similarity: string;
}
Expand All @@ -24,7 +30,6 @@ export async function issueChecker(context: Context): Promise<boolean> {
const { payload } = context as { payload: IssuePayload };
const issue = payload.issue;
const issueContent = issue.body + issue.title;

// Fetch all similar issues based on settings.warningThreshold
const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id);
if (similarIssues && similarIssues.length > 0) {
Expand Down Expand Up @@ -53,6 +58,18 @@ export async function issueChecker(context: Context): Promise<boolean> {
return false;
}

/**
* Compare the repository and issue name to the similar issue repository and issue name
* @param repoOrg
* @param similarIssueRepoOrg
* @param repoName
* @param similarIssueRepoName
* @returns
*/
function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: string, repoName: string, similarIssueRepoName: string): boolean {
return repoOrg === similarIssueRepoOrg && repoName === similarIssueRepoName;
}

/**
* Handle commenting on an issue with similar issues information
* @param context
Expand All @@ -69,29 +86,39 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa
... on Issue {
title
url
repository {
name
owner {
login
}
}
}
}
}`,
{ issueNodeId: issue.issue_id }
);
issueUrl.similarity = (issue.similarity * 100).toFixed(2);
issueUrl.similarity = Math.round(issue.similarity * 100).toString();
return issueUrl;
})
);

const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n");
const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`;
const commentBody = issueList
.filter((issue) =>
matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name)
)
.map((issue) => {
const modifiedUrl = issue.node.url.replace("github.com", "www.github.com");
return `* \`${issue.similarity}%\` [${issue.node.title}](${modifiedUrl})`;
})
.join("\n");
const body = `>[!NOTE]\n>#### Similar Issues:\n>\n>${commentBody}`;

const existingComments = await context.octokit.issues.listComments({
owner: payload.repository.owner.login,
repo: payload.repository.name,
issue_number: issueNumber,
});

const existingComment = existingComments.data.find(
(comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)")
);

const existingComment = existingComments.data.find((comment) => comment.body && comment.body.includes(">[!NOTE]\n>#### Similar Issues:\n>"));
if (existingComment) {
await context.octokit.issues.updateComment({
owner: payload.repository.owner.login,
Expand Down
18 changes: 18 additions & 0 deletions supabase/migrations/20241002004403_issue_comments.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8)
RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$
DECLARE
current_quantized vector(1024);
BEGIN
-- Ensure the query_embedding is in the correct format
current_quantized := query_embedding;
RETURN QUERY
SELECT id AS issue_id,
plaintext AS issue_plaintext,
1 - (l2_distance(current_quantized, embedding)) AS similarity

FROM issues
WHERE id <> current_id
AND 1 - (l2_distance(current_quantized, embedding)) > threshold
ORDER BY similarity;
END;
$$ LANGUAGE plpgsql;
6 changes: 3 additions & 3 deletions tests/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,9 @@ function createContextInner(
organization: { login: STRINGS.USER_1 } as Context["payload"]["organization"],
} as Context["payload"],
config: {
warningThreshold: 0.75,
matchThreshold: 0.95,
jobMatchingThreshold: 0.95,
warningThreshold: 0.1,
matchThreshold: 0.4,
jobMatchingThreshold: 0.4,
},
adapters: {} as Context["adapters"],
logger: new Logs("debug"),
Expand Down

0 comments on commit 9d3bdbf

Please sign in to comment.