From aba34491eefa28f65674a09b91a4a838922802d7 Mon Sep 17 00:00:00 2001 From: Mentlegen <9807008+gentlementlegen@users.noreply.github.com> Date: Mon, 23 Sep 2024 23:12:23 +0900 Subject: [PATCH 01/31] feat: schema validation --- README.md | 8 +++--- src/handlers/validator.ts | 36 +++++++++++++++++++++++++++ src/worker.ts | 51 ++++++++++++--------------------------- 3 files changed, 56 insertions(+), 39 deletions(-) create mode 100644 src/handlers/validator.ts diff --git a/README.md b/README.md index cac1f27..62592af 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,9 @@ To set up the `.dev.vars` file, you will need to provide the following variables ## Usage - Add the following to your `.ubiquibot-config.yml` file with the appropriate URL: -```javascript - -plugin: http://127.0.0.1:4000 - runsOn: [ "issue_comment.created", "issue_comment.edited", "issue_comment.deleted" , "issues.opened", "issues.edited", "issues.deleted"] +```yaml + - plugin: http://127.0.0.1:4000 + runsOn: [ "issue_comment.created", "issue_comment.edited", "issue_comment.deleted" , "issues.opened", "issues.edited", "issues.deleted"] ``` @@ -53,4 +53,4 @@ To set up the `.dev.vars` file, you will need to provide the following variables - Replace the placeholders with the appropriate values. ## Testing -- Run `yarn test` to run the tests. \ No newline at end of file +- Run `yarn test` to run the tests. diff --git a/src/handlers/validator.ts b/src/handlers/validator.ts new file mode 100644 index 0000000..6438455 --- /dev/null +++ b/src/handlers/validator.ts @@ -0,0 +1,36 @@ +import { TransformDecodeCheckError, TransformDecodeError, Value, ValueError } from "@sinclair/typebox/value"; +import { Env, envValidator, PluginSettings, pluginSettingsSchema, pluginSettingsValidator } from "../types"; + +export function validateAndDecodeSchemas(env: Env, rawSettings: object) { + const errors: ValueError[] = []; + const settings = Value.Default(pluginSettingsSchema, rawSettings) as PluginSettings; + + if (!pluginSettingsValidator.test(settings)) { + for (const error of pluginSettingsValidator.errors(settings)) { + console.error(error); + errors.push(error); + } + } + + if (!envValidator.test(env)) { + for (const error of envValidator.errors(env)) { + console.error(error); + errors.push(error); + } + } + + if (errors.length) { + throw { errors }; + } + + try { + const decodedEnv = Value.Decode(envValidator.schema, env); + const decodedSettings = Value.Decode(pluginSettingsSchema, settings); + return { decodedEnv, decodedSettings }; + } catch (e) { + if (e instanceof TransformDecodeCheckError || e instanceof TransformDecodeError) { + throw { errors: [e.error] }; + } + throw e; + } +} diff --git a/src/worker.ts b/src/worker.ts index 40df400..33370a1 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -1,17 +1,21 @@ -import { Value } from "@sinclair/typebox/value"; -import { plugin } from "./plugin"; -import { Env, envValidator, pluginSettingsSchema, pluginSettingsValidator } from "./types"; import manifest from "../manifest.json"; +import { validateAndDecodeSchemas } from "./handlers/validator"; +import { plugin } from "./plugin"; +import { Env } from "./types"; export default { async fetch(request: Request, env: Env): Promise { try { - if (request.method === "GET") { - const url = new URL(request.url); - if (url.pathname === "/manifest.json") { + const url = new URL(request.url); + if (url.pathname === "/manifest") { + if (request.method === "GET") { return new Response(JSON.stringify(manifest), { headers: { "content-type": "application/json" }, }); + } else if (request.method === "POST") { + const webhookPayload = await request.json(); + validateAndDecodeSchemas(env, webhookPayload.settings); + return new Response(JSON.stringify({ message: "Schema is valid" }), { status: 200, headers: { "content-type": "application/json" } }); } } if (request.method !== "POST") { @@ -29,33 +33,10 @@ export default { } const webhookPayload = await request.json(); - const settings = Value.Decode(pluginSettingsSchema, Value.Default(pluginSettingsSchema, webhookPayload.settings)); - - if (!pluginSettingsValidator.test(settings)) { - const errors: string[] = []; - for (const error of pluginSettingsValidator.errors(settings)) { - console.error(error); - errors.push(`${error.path}: ${error.message}`); - } - return new Response(JSON.stringify({ error: `Error: "Invalid settings provided. ${errors.join("; ")}"` }), { - status: 400, - headers: { "content-type": "application/json" }, - }); - } - if (!envValidator.test(env)) { - const errors: string[] = []; - for (const error of envValidator.errors(env)) { - console.error(error); - errors.push(`${error.path}: ${error.message}`); - } - return new Response(JSON.stringify({ error: `Error: "Invalid environment provided. ${errors.join("; ")}"` }), { - status: 400, - headers: { "content-type": "application/json" }, - }); - } + const { decodedSettings, decodedEnv } = validateAndDecodeSchemas(env, webhookPayload.settings); - webhookPayload.settings = settings; - await plugin(webhookPayload, env); + webhookPayload.settings = decodedSettings; + await plugin(webhookPayload, decodedEnv); return new Response(JSON.stringify("OK"), { status: 200, headers: { "content-type": "application/json" } }); } catch (error) { return handleUncaughtError(error); @@ -63,8 +44,8 @@ export default { }, }; -function handleUncaughtError(error: unknown) { - console.error(error); +function handleUncaughtError(errors: unknown) { + console.error(errors); const status = 500; - return new Response(JSON.stringify({ error }), { status: status, headers: { "content-type": "application/json" } }); + return new Response(JSON.stringify(errors), { status: status, headers: { "content-type": "application/json" } }); } From d3d73911b2711d7250bbbd89765a0ea2a400240b Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Mon, 30 Sep 2024 02:45:00 +0900 Subject: [PATCH 02/31] chore: update wrangler --- .github/workflows/worker-deploy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/worker-deploy.yml b/.github/workflows/worker-deploy.yml index deafdc0..a79b808 100644 --- a/.github/workflows/worker-deploy.yml +++ b/.github/workflows/worker-deploy.yml @@ -35,6 +35,7 @@ jobs: id: wrangler_deploy uses: cloudflare/wrangler-action@v3 with: + wranglerVersion: "3.78.12" apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} secrets: | From 3f17e9e091e9713683bc83567d74631dcb1af792 Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Mon, 30 Sep 2024 02:56:14 +0900 Subject: [PATCH 03/31] chore: fixed kernel data return --- src/main.ts | 2 +- src/types/process-env.d.ts | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 src/types/process-env.d.ts diff --git a/src/main.ts b/src/main.ts index 23a3e74..91490bc 100644 --- a/src/main.ts +++ b/src/main.ts @@ -28,7 +28,7 @@ export async function run() { await plugin(inputs, env); - return returnDataToKernel(inputs.authToken, inputs.stateId, {}); + return returnDataToKernel(process.env.GITHUB_TOKEN, inputs.stateId, {}); } async function returnDataToKernel(repoToken: string, stateId: string, output: object) { diff --git a/src/types/process-env.d.ts b/src/types/process-env.d.ts new file mode 100644 index 0000000..a2bb98f --- /dev/null +++ b/src/types/process-env.d.ts @@ -0,0 +1,9 @@ +declare global { + namespace NodeJS { + interface ProcessEnv { + GITHUB_TOKEN: string; + } + } +} + +export {}; From bacd208d1da07b43c4fffbc2ab7715c96cae3a25 Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Mon, 30 Sep 2024 14:38:58 +0900 Subject: [PATCH 04/31] fix: enable RLS for tables --- README.md | 2 +- package.json | 2 +- supabase/migrations/20240930052523_enable_rls.sql | 2 ++ yarn.lock | 8 ++++---- 4 files changed, 8 insertions(+), 6 deletions(-) create mode 100644 supabase/migrations/20240930052523_enable_rls.sql diff --git a/README.md b/README.md index 9661b9c..0619b52 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ To set up the `.dev.vars` file, you will need to provide the following variables ## Usage - Add the following to your `.ubiquibot-config.yml` file with the appropriate URL: ```yaml -- plugin: ubiquity-os-marketplace/generate-vector-embeddings +- plugin: https://ubiquity-os-comment-vector-embeddings-main.ubiquity.workers.dev with: matchThreshold: 0.95 warningThreshold: 0.75 diff --git a/package.json b/package.json index 54ad601..869d1c3 100644 --- a/package.json +++ b/package.json @@ -69,7 +69,7 @@ "lint-staged": "15.2.7", "npm-run-all": "4.1.5", "prettier": "3.3.2", - "supabase": "1.191.3", + "supabase": "1.200.3", "ts-jest": "29.1.5", "tsx": "4.15.6", "typescript": "5.4.5", diff --git a/supabase/migrations/20240930052523_enable_rls.sql b/supabase/migrations/20240930052523_enable_rls.sql new file mode 100644 index 0000000..623072b --- /dev/null +++ b/supabase/migrations/20240930052523_enable_rls.sql @@ -0,0 +1,2 @@ +ALTER TABLE "issues" ENABLE ROW LEVEL SECURITY; +ALTER TABLE "issue_comments" ENABLE ROW LEVEL SECURITY; \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 519f6f7..ebaf74d 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6407,10 +6407,10 @@ summary@2.1.0: resolved "https://registry.yarnpkg.com/summary/-/summary-2.1.0.tgz#be8a49a0aa34eb6ceea56042cae88f8add4b0885" integrity sha512-nMIjMrd5Z2nuB2RZCKJfFMjgS3fygbeyGk9PxPPaJR1RIcyN9yn4A63Isovzm3ZtQuEkLBVgMdPup8UeLH7aQw== -supabase@1.191.3: - version "1.191.3" - resolved "https://registry.yarnpkg.com/supabase/-/supabase-1.191.3.tgz#9e28643b10bc458f8c13a1e3f3ba9172679093f0" - integrity sha512-5tIG7mPc5lZ9QRbkZssyHiOsx42qGFaVqclauXv+1fJAkZnfA28d0pzEDvfs33+w8YTReO5nNaWAgyzkWQQwfA== +supabase@1.200.3: + version "1.200.3" + resolved "https://registry.yarnpkg.com/supabase/-/supabase-1.200.3.tgz#52cd53fd0bcd46af0748c8ffa61fac05e21a0a23" + integrity sha512-3NdhqBkfPVlm+rAhWQoVcyr54kykuAlHav/GWaAoQEHBDbbYI1lhbDzugk8ryQg92vSLwr3pWz0s4Hjdte8WyQ== dependencies: bin-links "^4.0.3" https-proxy-agent "^7.0.2" From 3721f0f5b9979eb56fa5cb5a4ec5ad86077fa0c6 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 1 Oct 2024 16:44:20 -0400 Subject: [PATCH 05/31] feat: added edit distance re-ranker --- src/adapters/supabase/helpers/issues.ts | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 6bfef09..9afeeb1 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -110,4 +110,48 @@ export class Issues extends SuperSupabase { this.context.logger.error("Error updating issue payload", error); } } + + // Edit distance (Number of operations required to convert one string to another) + calculateEditDistance(query: string, similarIssues: string): number { + const dp: number[][] = Array(query.length + 1) + .fill(null) + .map(() => Array(similarIssues.length + 1).fill(null)); + + for (let i = 0; i <= query.length; i++) { + dp[i][0] = i; + } + for (let j = 0; j <= similarIssues.length; j++) { + dp[0][j] = j; + } + for (let i = 1; i <= query.length; i++) { + for (let j = 1; j <= similarIssues.length; j++) { + const cost = query[i - 1] === similarIssues[j - 1] ? 0 : 1; + dp[i][j] = Math.min( + dp[i - 1][j] + 1, // deletion + dp[i][j - 1] + 1, // insertion + dp[i - 1][j - 1] + cost // substitution + ); + } + } + + return dp[query.length][similarIssues.length]; + } + + async fetchSimilarIssueEditDist(markdown: string, threshold: number, currentId: string): Promise { + const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); + const { data, error } = await this.supabase.rpc("find_similar_issues", { + current_id: currentId, + query_embedding: embedding, + threshold: threshold, + }); + if (error) { + this.context.logger.error("Error finding similar issues", error); + return []; + } + //Calculate the edit distance between the query and the similar issues + const similarIssues: string[] = data.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext); + const editDistances = similarIssues.map((issue) => this.calculateEditDistance(markdown, issue)); + //Filter out the issues that are below the threshold + return data.filter((index: number) => editDistances[index] <= threshold); + } } From ac2eae6155114165fca31def4057c7394129e9d2 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 1 Oct 2024 16:50:15 -0400 Subject: [PATCH 06/31] fix: editdistthreshold --- src/adapters/supabase/helpers/issues.ts | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 9afeeb1..4bfd0b7 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -144,14 +144,22 @@ export class Issues extends SuperSupabase { query_embedding: embedding, threshold: threshold, }); + if (error) { this.context.logger.error("Error finding similar issues", error); return []; } - //Calculate the edit distance between the query and the similar issues + const similarIssues: string[] = data.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext); + + // Calculate the maximum edit distance based on the length of the input markdown + const maxLength = markdown.length; + const editDistanceThreshold = maxLength * (1 - threshold); // Convert similarity threshold to edit distance threshold + + // Calculate edit distances const editDistances = similarIssues.map((issue) => this.calculateEditDistance(markdown, issue)); - //Filter out the issues that are below the threshold - return data.filter((index: number) => editDistances[index] <= threshold); + + // Filter out the issues that are above the edit distance threshold + return data.filter((index: number) => editDistances[index] <= editDistanceThreshold); } } From 9d3bdbf47fc2dbb960abf8118635dfa8eb543541 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Wed, 2 Oct 2024 00:21:08 -0400 Subject: [PATCH 07/31] fix: removed edit distance using l2 distance --- src/adapters/supabase/helpers/issues.ts | 52 ------------------- src/handlers/issue-deduplication.ts | 45 ++++++++++++---- .../20241002004403_issue_comments.sql | 18 +++++++ tests/main.test.ts | 6 +-- 4 files changed, 57 insertions(+), 64 deletions(-) create mode 100644 supabase/migrations/20241002004403_issue_comments.sql diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 4bfd0b7..6bfef09 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -110,56 +110,4 @@ export class Issues extends SuperSupabase { this.context.logger.error("Error updating issue payload", error); } } - - // Edit distance (Number of operations required to convert one string to another) - calculateEditDistance(query: string, similarIssues: string): number { - const dp: number[][] = Array(query.length + 1) - .fill(null) - .map(() => Array(similarIssues.length + 1).fill(null)); - - for (let i = 0; i <= query.length; i++) { - dp[i][0] = i; - } - for (let j = 0; j <= similarIssues.length; j++) { - dp[0][j] = j; - } - for (let i = 1; i <= query.length; i++) { - for (let j = 1; j <= similarIssues.length; j++) { - const cost = query[i - 1] === similarIssues[j - 1] ? 0 : 1; - dp[i][j] = Math.min( - dp[i - 1][j] + 1, // deletion - dp[i][j - 1] + 1, // insertion - dp[i - 1][j - 1] + cost // substitution - ); - } - } - - return dp[query.length][similarIssues.length]; - } - - async fetchSimilarIssueEditDist(markdown: string, threshold: number, currentId: string): Promise { - const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - const { data, error } = await this.supabase.rpc("find_similar_issues", { - current_id: currentId, - query_embedding: embedding, - threshold: threshold, - }); - - if (error) { - this.context.logger.error("Error finding similar issues", error); - return []; - } - - const similarIssues: string[] = data.map((issue: IssueSimilaritySearchResult) => issue.issue_plaintext); - - // Calculate the maximum edit distance based on the length of the input markdown - const maxLength = markdown.length; - const editDistanceThreshold = maxLength * (1 - threshold); // Convert similarity threshold to edit distance threshold - - // Calculate edit distances - const editDistances = similarIssues.map((issue) => this.calculateEditDistance(markdown, issue)); - - // Filter out the issues that are above the edit distance threshold - return data.filter((index: number) => editDistances[index] <= editDistanceThreshold); - } } diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 6174e4f..dde1606 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -6,6 +6,12 @@ export interface IssueGraphqlResponse { node: { title: string; url: string; + repository: { + name: string; + owner: { + login: string; + }; + }; }; similarity: string; } @@ -24,7 +30,6 @@ export async function issueChecker(context: Context): Promise { const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; const issueContent = issue.body + issue.title; - // Fetch all similar issues based on settings.warningThreshold const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { @@ -53,6 +58,18 @@ export async function issueChecker(context: Context): Promise { return false; } +/** + * Compare the repository and issue name to the similar issue repository and issue name + * @param repoOrg + * @param similarIssueRepoOrg + * @param repoName + * @param similarIssueRepoName + * @returns + */ +function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: string, repoName: string, similarIssueRepoName: string): boolean { + return repoOrg === similarIssueRepoOrg && repoName === similarIssueRepoName; +} + /** * Handle commenting on an issue with similar issues information * @param context @@ -69,29 +86,39 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa ... on Issue { title url + repository { + name + owner { + login + } + } } } }`, { issueNodeId: issue.issue_id } ); - issueUrl.similarity = (issue.similarity * 100).toFixed(2); + issueUrl.similarity = Math.round(issue.similarity * 100).toString(); return issueUrl; }) ); - const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n"); - const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`; + const commentBody = issueList + .filter((issue) => + matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) + ) + .map((issue) => { + const modifiedUrl = issue.node.url.replace("github.com", "www.github.com"); + return `* \`${issue.similarity}%\` [${issue.node.title}](${modifiedUrl})`; + }) + .join("\n"); + const body = `>[!NOTE]\n>#### Similar Issues:\n>\n>${commentBody}`; const existingComments = await context.octokit.issues.listComments({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, }); - - const existingComment = existingComments.data.find( - (comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)") - ); - + const existingComment = existingComments.data.find((comment) => comment.body && comment.body.includes(">[!NOTE]\n>#### Similar Issues:\n>")); if (existingComment) { await context.octokit.issues.updateComment({ owner: payload.repository.owner.login, diff --git a/supabase/migrations/20241002004403_issue_comments.sql b/supabase/migrations/20241002004403_issue_comments.sql new file mode 100644 index 0000000..6323882 --- /dev/null +++ b/supabase/migrations/20241002004403_issue_comments.sql @@ -0,0 +1,18 @@ +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + 1 - (l2_distance(current_quantized, embedding)) AS similarity + + FROM issues + WHERE id <> current_id + AND 1 - (l2_distance(current_quantized, embedding)) > threshold + ORDER BY similarity; +END; +$$ LANGUAGE plpgsql; \ No newline at end of file diff --git a/tests/main.test.ts b/tests/main.test.ts index bc4ed19..c4d7a08 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -164,9 +164,9 @@ function createContextInner( organization: { login: STRINGS.USER_1 } as Context["payload"]["organization"], } as Context["payload"], config: { - warningThreshold: 0.75, - matchThreshold: 0.95, - jobMatchingThreshold: 0.95, + warningThreshold: 0.1, + matchThreshold: 0.4, + jobMatchingThreshold: 0.4, }, adapters: {} as Context["adapters"], logger: new Logs("debug"), From cc96c8f0f644c41dd7e6159647841986ed4bab2b Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh <53160903+sshivaditya2019@users.noreply.github.com> Date: Wed, 2 Oct 2024 00:25:20 -0400 Subject: [PATCH 08/31] fix: main.test.ts values --- tests/main.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/main.test.ts b/tests/main.test.ts index c4d7a08..78de1fd 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -164,9 +164,9 @@ function createContextInner( organization: { login: STRINGS.USER_1 } as Context["payload"]["organization"], } as Context["payload"], config: { - warningThreshold: 0.1, - matchThreshold: 0.4, - jobMatchingThreshold: 0.4, + warningThreshold: 0.75, + matchThreshold: 0.9, + jobMatchingThreshold: 0.75, }, adapters: {} as Context["adapters"], logger: new Logs("debug"), From 4ecd85404fe6cd3319a25af053113a908e01a872 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Wed, 2 Oct 2024 13:00:21 -0400 Subject: [PATCH 09/31] fix: ui for issue dedup --- src/handlers/issue-deduplication.ts | 36 ++++++++++++----------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index dde1606..dc6daf7 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -102,36 +102,30 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa }) ); + let finalIdx = 0; const commentBody = issueList .filter((issue) => matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) ) - .map((issue) => { - const modifiedUrl = issue.node.url.replace("github.com", "www.github.com"); - return `* \`${issue.similarity}%\` [${issue.node.title}](${modifiedUrl})`; + .map((issue, index) => { + const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); + finalIdx += 1; + return `[^0${index + 1}^]: [${issue.node.title}](${modifiedUrl}) ${issue.similarity}%`; }) .join("\n"); - const body = `>[!NOTE]\n>#### Similar Issues:\n>\n>${commentBody}`; + const footnoteLinks = [...Array(finalIdx).keys()].map((i) => `[^0${i + 1}^]`).join(""); + const body = "\n###### Similar " + footnoteLinks + ":\n\n" + commentBody; - const existingComments = await context.octokit.issues.listComments({ + // Remove the existing foot note + const existingBody = context.payload.issue.body; + const footnoteIndex = existingBody?.indexOf("\n###### Similar"); + const newBody = footnoteIndex !== -1 ? existingBody?.substring(0, footnoteIndex) : existingBody; + + //Append the new foot note + await context.octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, + body: newBody + body, }); - const existingComment = existingComments.data.find((comment) => comment.body && comment.body.includes(">[!NOTE]\n>#### Similar Issues:\n>")); - if (existingComment) { - await context.octokit.issues.updateComment({ - owner: payload.repository.owner.login, - repo: payload.repository.name, - comment_id: existingComment.id, - body: body, - }); - } else { - await context.octokit.issues.createComment({ - owner: payload.repository.owner.login, - repo: payload.repository.name, - issue_number: issueNumber, - body: body, - }); - } } From 3a360b901f45526030997f88d38de3738085f6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=A2=E3=83=AC=E3=82=AF=E3=82=B5=E3=83=B3=E3=83=80?= =?UTF-8?q?=E3=83=BC=2Eeth?= <4975670+0x4007@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:44:55 +0900 Subject: [PATCH 10/31] Update src/handlers/issue-deduplication.ts --- src/handlers/issue-deduplication.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index dc6daf7..1da7bc6 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -114,7 +114,7 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa }) .join("\n"); const footnoteLinks = [...Array(finalIdx).keys()].map((i) => `[^0${i + 1}^]`).join(""); - const body = "\n###### Similar " + footnoteLinks + ":\n\n" + commentBody; + const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; // Remove the existing foot note const existingBody = context.payload.issue.body; From e7890fd889df98d1e18bd15c8cb559e7327b5b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=A2=E3=83=AC=E3=82=AF=E3=82=B5=E3=83=B3=E3=83=80?= =?UTF-8?q?=E3=83=BC=2Eeth?= <4975670+0x4007@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:48:58 +0900 Subject: [PATCH 11/31] Apply suggestions from code review --- src/handlers/issue-deduplication.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 1da7bc6..abea2ea 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -102,18 +102,17 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa }) ); - let finalIdx = 0; + let finalIndex = 0; const commentBody = issueList .filter((issue) => matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) ) .map((issue, index) => { const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); - finalIdx += 1; return `[^0${index + 1}^]: [${issue.node.title}](${modifiedUrl}) ${issue.similarity}%`; }) .join("\n"); - const footnoteLinks = [...Array(finalIdx).keys()].map((i) => `[^0${i + 1}^]`).join(""); + const footnoteLinks = [...Array(++finalIndex).keys()].map((i) => `[^0${i + 1}^]`).join(""); const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; // Remove the existing foot note From fb0e67a76260ca1bc425e1a74fb48ae9a0fe5bed Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Thu, 3 Oct 2024 21:32:00 +0900 Subject: [PATCH 12/31] chore: added schema generation --- .github/workflows/update-configuration.yml | 58 ++++++++++++++++++++++ package.json | 2 +- yarn.lock | 8 +-- 3 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/update-configuration.yml diff --git a/.github/workflows/update-configuration.yml b/.github/workflows/update-configuration.yml new file mode 100644 index 0000000..2490c49 --- /dev/null +++ b/.github/workflows/update-configuration.yml @@ -0,0 +1,58 @@ +name: "Update Configuration" + +on: + workflow_dispatch: + push: + +jobs: + update: + name: "Update Configuration in manifest.json" + runs-on: ubuntu-latest + permissions: write-all + + steps: + - uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4 + with: + node-version: "20.10.0" + + - name: Install deps and run configuration update + run: | + yarn install --immutable --immutable-cache --check-cache + yarn tsc --noCheck --project tsconfig.json + + - name: Update manifest configuration using GitHub Script + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const { pluginSettingsSchema } = require('./src/types'); + + const manifestPath = path.resolve("${{ github.workspace }}", './manifest.json'); + const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + + const configuration = JSON.stringify(pluginSettingsSchema); + + manifest["configuration"] = JSON.parse(configuration); + + const updatedManifest = JSON.stringify(manifest, null, 2) + console.log('Updated manifest:', updatedManifest); + fs.writeFileSync(manifestPath, updatedManifest); + + - name: Commit and Push generated types + run: | + git config --global user.name 'ubiquity-os[bot]' + git config --global user.email 'ubiquity-os[bot]@users.noreply.github.com' + git add ./manifest.json + if [ -n "$(git diff-index --cached --name-only HEAD)" ]; then + git commit -m "chore: updated generated configuration" || echo "Lint-staged check failed" + git push origin HEAD:${{ github.ref_name }} + else + echo "No changes to commit" + fi + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/package.json b/package.json index 869d1c3..e1916ce 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "supabase": "1.200.3", "ts-jest": "29.1.5", "tsx": "4.15.6", - "typescript": "5.4.5", + "typescript": "5.6.2", "typescript-eslint": "7.13.1", "wrangler": "3.78.12" }, diff --git a/yarn.lock b/yarn.lock index ebaf74d..ac66d8a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -6656,10 +6656,10 @@ typescript-eslint@7.13.1: "@typescript-eslint/parser" "7.13.1" "@typescript-eslint/utils" "7.13.1" -typescript@5.4.5: - version "5.4.5" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.4.5.tgz#42ccef2c571fdbd0f6718b1d1f5e6e5ef006f611" - integrity sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ== +typescript@5.6.2: + version "5.6.2" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.6.2.tgz#d1de67b6bef77c41823f822df8f0b3bcff60a5a0" + integrity sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw== uc.micro@^2.0.0, uc.micro@^2.1.0: version "2.1.0" From d72c7da984126e1937876d10bf2ec1d5b23aca6a Mon Sep 17 00:00:00 2001 From: "ubiquity-os[bot]" Date: Thu, 3 Oct 2024 12:32:53 +0000 Subject: [PATCH 13/31] chore: updated generated configuration --- manifest.json | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/manifest.json b/manifest.json index 39df377..3768f7a 100644 --- a/manifest.json +++ b/manifest.json @@ -1,5 +1,36 @@ { "name": "Generate vector embeddings", "description": "Enables the storage, updating, and deletion of issue comment embeddings.", - "ubiquity:listeners": ["issue_comment.created", "issue_comment.edited", "issue_comment.deleted", "issues.opened", "issues.edited", "issues.deleted", "issues.labeled"] -} + "ubiquity:listeners": [ + "issue_comment.created", + "issue_comment.edited", + "issue_comment.deleted", + "issues.opened", + "issues.edited", + "issues.deleted", + "issues.labeled" + ], + "configuration": { + "default": {}, + "type": "object", + "properties": { + "matchThreshold": { + "default": 0.95, + "type": "number" + }, + "warningThreshold": { + "default": 0.75, + "type": "number" + }, + "jobMatchingThreshold": { + "default": 0.75, + "type": "number" + } + }, + "required": [ + "matchThreshold", + "warningThreshold", + "jobMatchingThreshold" + ] + } +} \ No newline at end of file From 92af35fd4154984dadb6f90686e97c1ad1708332 Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Thu, 3 Oct 2024 21:34:41 +0900 Subject: [PATCH 14/31] chore: fix tests --- tests/main.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/main.test.ts b/tests/main.test.ts index 78de1fd..905b786 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -37,7 +37,7 @@ describe("Plugin tests", () => { it("Should serve the manifest file", async () => { const worker = (await import("../src/worker")).default; - const response = await worker.fetch(new Request("http://localhost/manifest.json"), { + const response = await worker.fetch(new Request("http://localhost/manifest"), { SUPABASE_KEY: "test", SUPABASE_URL: "test", VOYAGEAI_API_KEY: "test", From 8e8da2cb5c7925c752953240706846b367366281 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 3 Oct 2024 10:35:18 -0400 Subject: [PATCH 15/31] fix: empty footnotes --- src/handlers/issue-deduplication.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index abea2ea..506d271 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -112,6 +112,10 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa return `[^0${index + 1}^]: [${issue.node.title}](${modifiedUrl}) ${issue.similarity}%`; }) .join("\n"); + + if (commentBody.length === 0) { + return; + } const footnoteLinks = [...Array(++finalIndex).keys()].map((i) => `[^0${i + 1}^]`).join(""); const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; From 4582a1463b82d0c61d1c68d3b8cda53ba14cf0ea Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 3 Oct 2024 11:57:57 -0400 Subject: [PATCH 16/31] fix: similar issue filter --- src/handlers/issue-deduplication.ts | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 506d271..599b378 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -29,7 +29,13 @@ export async function issueChecker(context: Context): Promise { } = context; const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; - const issueContent = issue.body + issue.title; + + //Find and remove the footnotes from the issue content + const existingBody = context.payload.issue.body; + const footnoteIndex = existingBody?.indexOf("\n###### Similar"); + const issueBody = footnoteIndex !== -1 ? existingBody?.substring(0, footnoteIndex) : existingBody; + const issueContent = issueBody + issue.title; + // Fetch all similar issues based on settings.warningThreshold const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { @@ -50,7 +56,7 @@ export async function issueChecker(context: Context): Promise { // Handle issues that match the settings.warningThreshold but not the MATCH_THRESHOLD if (similarIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); - await handleSimilarIssuesComment(context, payload, issue.number, similarIssues); + await handleSimilarIssuesComment(context, payload, issue.number, similarIssues, issueBody || ""); return true; } } @@ -77,7 +83,13 @@ function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: * @param issueNumber * @param similarIssues */ -async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) { +async function handleSimilarIssuesComment( + context: Context, + payload: IssuePayload, + issueNumber: number, + similarIssues: IssueSimilaritySearchResult[], + modifiedBody: string +) { const issueList: IssueGraphqlResponse[] = await Promise.all( similarIssues.map(async (issue: IssueSimilaritySearchResult) => { const issueUrl: IssueGraphqlResponse = await context.octokit.graphql( @@ -119,16 +131,11 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa const footnoteLinks = [...Array(++finalIndex).keys()].map((i) => `[^0${i + 1}^]`).join(""); const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; - // Remove the existing foot note - const existingBody = context.payload.issue.body; - const footnoteIndex = existingBody?.indexOf("\n###### Similar"); - const newBody = footnoteIndex !== -1 ? existingBody?.substring(0, footnoteIndex) : existingBody; - //Append the new foot note await context.octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, - body: newBody + body, + body: modifiedBody + body, }); } From caa4ee39d14a3a0829521b8113e20fd1725557e8 Mon Sep 17 00:00:00 2001 From: gentlementlegen Date: Fri, 4 Oct 2024 17:03:29 +0900 Subject: [PATCH 17/31] chore: changed manifest endpoint --- src/worker.ts | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/worker.ts b/src/worker.ts index 33370a1..72b9a61 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -7,16 +7,10 @@ export default { async fetch(request: Request, env: Env): Promise { try { const url = new URL(request.url); - if (url.pathname === "/manifest") { - if (request.method === "GET") { - return new Response(JSON.stringify(manifest), { - headers: { "content-type": "application/json" }, - }); - } else if (request.method === "POST") { - const webhookPayload = await request.json(); - validateAndDecodeSchemas(env, webhookPayload.settings); - return new Response(JSON.stringify({ message: "Schema is valid" }), { status: 200, headers: { "content-type": "application/json" } }); - } + if (url.pathname === "/manifest.json" && request.method === "GET") { + return new Response(JSON.stringify(manifest), { + headers: { "content-type": "application/json" }, + }); } if (request.method !== "POST") { return new Response(JSON.stringify({ error: `Only POST requests are supported.` }), { From 0fc8d04ddb3e28ef59f47c2b24172484e7af05a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=A2=E3=83=AC=E3=82=AF=E3=82=B5=E3=83=B3=E3=83=80?= =?UTF-8?q?=E3=83=BC=2Eeth?= Date: Thu, 10 Oct 2024 23:40:09 +0900 Subject: [PATCH 18/31] refactor: update package and plugin names to reflect new organization --- .cspell.json | 2 +- CHANGELOG.md | 36 ++++++++++++++++++------------------ README.md | 6 +++--- package.json | 4 ++-- src/plugin.ts | 19 +++++++++---------- src/types/context.ts | 4 ++-- tests/main.test.ts | 18 +++++++++--------- yarn.lock | 8 ++++---- 8 files changed, 48 insertions(+), 49 deletions(-) diff --git a/.cspell.json b/.cspell.json index 43040b8..40e1508 100644 --- a/.cspell.json +++ b/.cspell.json @@ -13,7 +13,7 @@ "Supabase", "SUPABASE", "typebox", - "ubiquibot", + "ubiquity-os", "Smee", "typeguards", "mswjs", diff --git a/CHANGELOG.md b/CHANGELOG.md index bf0b82f..13913a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,25 +5,25 @@ ### Features -* modified tests ([97e267f](https://github.com/ubiquibot/issue-comment-embeddings/commit/97e267f801ce4e6bd29bbe967de3df4fc3b1942a)) +* modified tests ([97e267f](https://github.com/ubiquity-os/issue-comment-embeddings/commit/97e267f801ce4e6bd29bbe967de3df4fc3b1942a)) ### Bug Fixes -* added config.yml ([c0f784b](https://github.com/ubiquibot/issue-comment-embeddings/commit/c0f784b20e59c2c4714805331c7ae9034fd73f73)) -* added config.yml ([221d34d](https://github.com/ubiquibot/issue-comment-embeddings/commit/221d34d801af6ebd764028be4a5c6200a18b776e)) -* added config.yml ([d12c522](https://github.com/ubiquibot/issue-comment-embeddings/commit/d12c522291db36dcf6aea72e5759e1a055185d8f)) -* cspell fix ([736bea6](https://github.com/ubiquibot/issue-comment-embeddings/commit/736bea6172444fdf783ffff729879d8278ff82f3)) -* fixed tests missing supabase files ([0e870ac](https://github.com/ubiquibot/issue-comment-embeddings/commit/0e870ac50eb68249edf5fc4e46fd509425dd7bbb)) -* github workflow, types package.json, env examples ([16786d7](https://github.com/ubiquibot/issue-comment-embeddings/commit/16786d76ee7a598c885f15af1baeadcf6a471b2c)) -* issue_comments linting added issue_comments:edited, created and deleted ([9c0de23](https://github.com/ubiquibot/issue-comment-embeddings/commit/9c0de237048ce30bf4254960c443bf3938037dce)) -* knip workflow ([f325310](https://github.com/ubiquibot/issue-comment-embeddings/commit/f3253109c290c9fce6d14e6a2e1e328133ac6f81)) -* manifest.json, compute.yml ([21409d5](https://github.com/ubiquibot/issue-comment-embeddings/commit/21409d530c3aad6ff2676fc813314e5b29c1a533)) -* package.json ([806c6c0](https://github.com/ubiquibot/issue-comment-embeddings/commit/806c6c0b393a9b87741a6341fa65bc5b3d22cb15)) -* plugin name ([d91b991](https://github.com/ubiquibot/issue-comment-embeddings/commit/d91b991d717b7fb0b73359ca29ae6de08a1074b9)) -* readme.md ([9c5fbfe](https://github.com/ubiquibot/issue-comment-embeddings/commit/9c5fbfe9ca46eb842779468c85d329b9f941fb82)) -* readme.md ([2fec447](https://github.com/ubiquibot/issue-comment-embeddings/commit/2fec44786526e7c10faaa2c13c4349e1232cf5bd)) -* remove config.yml and wrangler.toml namespace entries ([127cc22](https://github.com/ubiquibot/issue-comment-embeddings/commit/127cc225903c3fe3ca934e8407df4eb9c27e378c)) -* removed config.yml changed name ([744e08c](https://github.com/ubiquibot/issue-comment-embeddings/commit/744e08cebac310ae81c3c102f5f3a9473e6e4b9e)) -* test and linting ([a4ee41e](https://github.com/ubiquibot/issue-comment-embeddings/commit/a4ee41e6fca8723ce2fddc96b1171c89cfe7d5b7)) -* wrangler name ([f890071](https://github.com/ubiquibot/issue-comment-embeddings/commit/f890071c01c5bb1d611a5b7aa07cba84f4546251)) +* added config.yml ([c0f784b](https://github.com/ubiquity-os/issue-comment-embeddings/commit/c0f784b20e59c2c4714805331c7ae9034fd73f73)) +* added config.yml ([221d34d](https://github.com/ubiquity-os/issue-comment-embeddings/commit/221d34d801af6ebd764028be4a5c6200a18b776e)) +* added config.yml ([d12c522](https://github.com/ubiquity-os/issue-comment-embeddings/commit/d12c522291db36dcf6aea72e5759e1a055185d8f)) +* cspell fix ([736bea6](https://github.com/ubiquity-os/issue-comment-embeddings/commit/736bea6172444fdf783ffff729879d8278ff82f3)) +* fixed tests missing supabase files ([0e870ac](https://github.com/ubiquity-os/issue-comment-embeddings/commit/0e870ac50eb68249edf5fc4e46fd509425dd7bbb)) +* github workflow, types package.json, env examples ([16786d7](https://github.com/ubiquity-os/issue-comment-embeddings/commit/16786d76ee7a598c885f15af1baeadcf6a471b2c)) +* issue_comments linting added issue_comments:edited, created and deleted ([9c0de23](https://github.com/ubiquity-os/issue-comment-embeddings/commit/9c0de237048ce30bf4254960c443bf3938037dce)) +* knip workflow ([f325310](https://github.com/ubiquity-os/issue-comment-embeddings/commit/f3253109c290c9fce6d14e6a2e1e328133ac6f81)) +* manifest.json, compute.yml ([21409d5](https://github.com/ubiquity-os/issue-comment-embeddings/commit/21409d530c3aad6ff2676fc813314e5b29c1a533)) +* package.json ([806c6c0](https://github.com/ubiquity-os/issue-comment-embeddings/commit/806c6c0b393a9b87741a6341fa65bc5b3d22cb15)) +* plugin name ([d91b991](https://github.com/ubiquity-os/issue-comment-embeddings/commit/d91b991d717b7fb0b73359ca29ae6de08a1074b9)) +* readme.md ([9c5fbfe](https://github.com/ubiquity-os/issue-comment-embeddings/commit/9c5fbfe9ca46eb842779468c85d329b9f941fb82)) +* readme.md ([2fec447](https://github.com/ubiquity-os/issue-comment-embeddings/commit/2fec44786526e7c10faaa2c13c4349e1232cf5bd)) +* remove config.yml and wrangler.toml namespace entries ([127cc22](https://github.com/ubiquity-os/issue-comment-embeddings/commit/127cc225903c3fe3ca934e8407df4eb9c27e378c)) +* removed config.yml changed name ([744e08c](https://github.com/ubiquity-os/issue-comment-embeddings/commit/744e08cebac310ae81c3c102f5f3a9473e6e4b9e)) +* test and linting ([a4ee41e](https://github.com/ubiquity-os/issue-comment-embeddings/commit/a4ee41e6fca8723ce2fddc96b1171c89cfe7d5b7)) +* wrangler name ([f890071](https://github.com/ubiquity-os/issue-comment-embeddings/commit/f890071c01c5bb1d611a5b7aa07cba84f4546251)) diff --git a/README.md b/README.md index 0619b52..ce2acff 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# `@ubiquibot/issue-comment-embeddings` +# `@ubiquity-os/issue-comment-embeddings` -This is a plugin for [Ubiquibot](https://github.com/ubiquity/ubiquibot-kernel). It listens for issue comments, and adds them to a vector store. It handles comment edits and deletions as well. +This is a plugin for [Ubiquibot](https://github.com/ubiquity-os/ubiquity-os-kernel). It listens for issue comments, and adds them to a vector store. It handles comment edits and deletions as well. ## Configuration - Host the plugin on a server that Ubiquibot can access. @@ -10,7 +10,7 @@ To set up the `.dev.vars` file, you will need to provide the following variables - `VOYAGEAI_API_KEY`: The API key for Voyage. ## Usage -- Add the following to your `.ubiquibot-config.yml` file with the appropriate URL: +- Add the following to your `.ubiquity-os.config.yml` file with the appropriate URL: ```yaml - plugin: https://ubiquity-os-comment-vector-embeddings-main.ubiquity.workers.dev with: diff --git a/package.json b/package.json index e1916ce..391c041 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "@ubiquibot/issue-comment-embeddings", + "name": "@ubiquity-os/issue-comment-embeddings", "version": "1.0.0", "description": "Generates vector embeddings of GitHub comments and stores them in Supabase.", "author": "Ubiquity DAO", @@ -36,7 +36,7 @@ "@sinclair/typebox": "0.32.33", "@supabase/supabase-js": "^2.45.2", "@types/markdown-it": "^14.1.2", - "@ubiquity-dao/ubiquibot-logger": "^1.3.0", + "@ubiquity-os/ubiquity-os-logger": "^1.3.2", "dotenv": "16.4.5", "markdown-it": "^14.1.0", "markdown-it-plain-text": "^0.3.0", diff --git a/src/plugin.ts b/src/plugin.ts index 197948b..341e07b 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -1,20 +1,19 @@ import { Octokit } from "@octokit/rest"; -import { Env, PluginInputs } from "./types"; -import { Context } from "./types"; -import { isIssueCommentEvent, isIssueEvent } from "./types/typeguards"; -import { LogLevel, Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { Database } from "./types/database"; -import { createAdapters } from "./adapters"; import { createClient } from "@supabase/supabase-js"; +import { LogLevel, Logs } from "@ubiquity-os/ubiquity-os-logger"; +import { VoyageAIClient } from "voyageai"; +import { createAdapters } from "./adapters"; import { addComments } from "./handlers/add-comments"; -import { updateComment } from "./handlers/update-comments"; +import { addIssue } from "./handlers/add-issue"; import { deleteComment } from "./handlers/delete-comments"; -import { VoyageAIClient } from "voyageai"; import { deleteIssues } from "./handlers/delete-issue"; -import { addIssue } from "./handlers/add-issue"; -import { updateIssue } from "./handlers/update-issue"; import { issueChecker } from "./handlers/issue-deduplication"; import { issueMatching } from "./handlers/issue-matching"; +import { updateComment } from "./handlers/update-comments"; +import { updateIssue } from "./handlers/update-issue"; +import { Context, Env, PluginInputs } from "./types"; +import { Database } from "./types/database"; +import { isIssueCommentEvent, isIssueEvent } from "./types/typeguards"; /** * The main plugin function. Split for easier testing. diff --git a/src/types/context.ts b/src/types/context.ts index b11ac2f..b38d836 100644 --- a/src/types/context.ts +++ b/src/types/context.ts @@ -1,9 +1,9 @@ import { Octokit } from "@octokit/rest"; import { EmitterWebhookEvent as WebhookEvent, EmitterWebhookEventName as WebhookEventName } from "@octokit/webhooks"; +import { Logs } from "@ubiquity-os/ubiquity-os-logger"; +import { createAdapters } from "../adapters"; import { Env } from "./env"; import { PluginSettings } from "./plugin-inputs"; -import { Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { createAdapters } from "../adapters"; /** * Update `manifest.json` with any events you want to support like so: diff --git a/tests/main.test.ts b/tests/main.test.ts index 905b786..442b82e 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -1,19 +1,19 @@ // cSpell:disable +import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "@jest/globals"; import { drop } from "@mswjs/data"; -import { db } from "./__mocks__/db"; -import { server } from "./__mocks__/node"; -import { expect, describe, beforeAll, beforeEach, afterAll, afterEach, it } from "@jest/globals"; -import { Context, SupportedEvents } from "../src/types/context"; import { Octokit } from "@octokit/rest"; -import { STRINGS } from "./__mocks__/strings"; -import { createComment, setupTests } from "./__mocks__/helpers"; -import manifest from "../manifest.json"; +import { Logs } from "@ubiquity-os/ubiquity-os-logger"; import dotenv from "dotenv"; -import { Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { Env } from "../src/types"; +import manifest from "../manifest.json"; import { runPlugin } from "../src/plugin"; +import { Env } from "../src/types"; +import { Context, SupportedEvents } from "../src/types/context"; import { CommentMock, createMockAdapters } from "./__mocks__/adapter"; +import { db } from "./__mocks__/db"; +import { createComment, setupTests } from "./__mocks__/helpers"; +import { server } from "./__mocks__/node"; +import { STRINGS } from "./__mocks__/strings"; dotenv.config(); jest.requireActual("@octokit/rest"); diff --git a/yarn.lock b/yarn.lock index ac66d8a..f81a4c5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2155,10 +2155,10 @@ "@typescript-eslint/types" "7.13.1" eslint-visitor-keys "^3.4.3" -"@ubiquity-dao/ubiquibot-logger@^1.3.0": - version "1.3.0" - resolved "https://registry.yarnpkg.com/@ubiquity-dao/ubiquibot-logger/-/ubiquibot-logger-1.3.0.tgz#b07364658be95b3be3876305c66b2adc906e9590" - integrity sha512-ifkd7fB2OMTSt3OL9L14bCIvCMXV+IHFdJYU5S8FUzE2U88b4xKxuEAYDFX+DX3wwDEswFAVUwx5aP3QcMIRWA== +"@ubiquity-os/ubiquity-os-logger@^1.3.2": + version "1.3.2" + resolved "https://registry.yarnpkg.com/@ubiquity-os/ubiquity-os-logger/-/ubiquity-os-logger-1.3.2.tgz#4423bc0baeac5c2f73123d15fd961310521163cd" + integrity sha512-oTIzR8z4jAQmaeJp98t1bZUKE3Ws9pas0sbxt58fC37MwXclPMWrLO+a0JlhPkdJYsvpv/q/79wC2MKVhOIVXQ== JSONStream@^1.3.5: version "1.3.5" From 6f07c6c5c875070c1ff2e73eb55405b736c19f43 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 3 Oct 2024 15:00:53 -0400 Subject: [PATCH 19/31] feat: updated issue similarity ui --- src/handlers/issue-deduplication.ts | 165 +++++++++++++++++++--------- src/handlers/issue-matching.ts | 41 ++++--- 2 files changed, 137 insertions(+), 69 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 599b378..af118d2 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -6,6 +6,7 @@ export interface IssueGraphqlResponse { node: { title: string; url: string; + body: string; repository: { name: string; owner: { @@ -14,13 +15,15 @@ export interface IssueGraphqlResponse { }; }; similarity: string; + mostSimilarSentence: { sentence: string; similarity: number; index: number }; } /** - * Check if an issue is similar to any existing issues in the database - * @param context - * @returns true if the issue is similar to an existing issue, false otherwise - */ + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a comment is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueChecker(context: Context): Promise { const { logger, @@ -29,19 +32,10 @@ export async function issueChecker(context: Context): Promise { } = context; const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; - - //Find and remove the footnotes from the issue content - const existingBody = context.payload.issue.body; - const footnoteIndex = existingBody?.indexOf("\n###### Similar"); - const issueBody = footnoteIndex !== -1 ? existingBody?.substring(0, footnoteIndex) : existingBody; - const issueContent = issueBody + issue.title; - - // Fetch all similar issues based on settings.warningThreshold - const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); + const similarIssues = await supabase.issue.findSimilarIssues(issue.title + removeFootnotes(issue.body || ""), context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { const matchIssues = similarIssues.filter((issue) => issue.similarity >= context.config.matchThreshold); - // Handle issues that match the MATCH_THRESHOLD (Very Similar) if (matchIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.matchThreshold} already exists`); await octokit.issues.update({ @@ -53,10 +47,9 @@ export async function issueChecker(context: Context): Promise { }); } - // Handle issues that match the settings.warningThreshold but not the MATCH_THRESHOLD if (similarIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); - await handleSimilarIssuesComment(context, payload, issue.number, similarIssues, issueBody || ""); + await handleSimilarIssuesComment(context, payload, issue.number, similarIssues); return true; } } @@ -64,32 +57,41 @@ export async function issueChecker(context: Context): Promise { return false; } -/** - * Compare the repository and issue name to the similar issue repository and issue name - * @param repoOrg - * @param similarIssueRepoOrg - * @param repoName - * @param similarIssueRepoName - * @returns - */ function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: string, repoName: string, similarIssueRepoName: string): boolean { return repoOrg === similarIssueRepoOrg && repoName === similarIssueRepoName; } /** - * Handle commenting on an issue with similar issues information - * @param context - * @param payload - * @param issueNumber - * @param similarIssues + * Finds the most similar sentence in a similar issue to a sentence in the current issue. + * @param issueContent The content of the current issue + * @param similarIssueContent The content of the similar issue + * @returns The most similar sentence and its similarity score */ -async function handleSimilarIssuesComment( - context: Context, - payload: IssuePayload, - issueNumber: number, - similarIssues: IssueSimilaritySearchResult[], - modifiedBody: string -) { +function findMostSimilarSentence(issueContent: string, similarIssueContent: string): { sentence: string; similarity: number; index: number } { + const issueSentences = issueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); + const similarIssueSentences = similarIssueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); + let maxSimilarity = 0; + let mostSimilarSentence = ""; + let mostSimilarIndex = -1; + issueSentences.forEach((sentence, index) => { + const similarities = similarIssueSentences.map((similarSentence) => { + const editDistance = findEditDistance(sentence, similarSentence); + const maxLength = Math.max(sentence.length, similarSentence.length); + // Normalized similarity (edit distance) + return 1 - editDistance / maxLength; + }); + const maxSentenceSimilarity = Math.max(...similarities); + if (maxSentenceSimilarity > maxSimilarity) { + maxSimilarity = maxSentenceSimilarity; + mostSimilarSentence = sentence; + mostSimilarIndex = index; + } + }); + + return { sentence: mostSimilarSentence, similarity: maxSimilarity, index: mostSimilarIndex }; +} + +async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) { const issueList: IssueGraphqlResponse[] = await Promise.all( similarIssues.map(async (issue: IssueSimilaritySearchResult) => { const issueUrl: IssueGraphqlResponse = await context.octokit.graphql( @@ -98,6 +100,7 @@ async function handleSimilarIssuesComment( ... on Issue { title url + body repository { name owner { @@ -110,32 +113,92 @@ async function handleSimilarIssuesComment( { issueNodeId: issue.issue_id } ); issueUrl.similarity = Math.round(issue.similarity * 100).toString(); + issueUrl.mostSimilarSentence = findMostSimilarSentence(context.payload.issue.body || "", issueUrl.node.body); return issueUrl; }) ); - let finalIndex = 0; - const commentBody = issueList - .filter((issue) => - matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) - ) - .map((issue, index) => { - const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); - return `[^0${index + 1}^]: [${issue.node.title}](${modifiedUrl}) ${issue.similarity}%`; - }) - .join("\n"); + const relevantIssues = issueList.filter((issue) => + matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) + ); - if (commentBody.length === 0) { + if (relevantIssues.length === 0) { return; } - const footnoteLinks = [...Array(++finalIndex).keys()].map((i) => `[^0${i + 1}^]`).join(""); - const body = "\n###### Similar " + footnoteLinks + "\n\n" + commentBody; - //Append the new foot note + const issueBody = context.payload.issue.body || ""; + // Find existing footnotes in the body + const footnoteRegex = /\[\^(\d+)\^\]/g; + const existingFootnotes = issueBody.match(footnoteRegex) || []; + const highestFootnoteIndex = existingFootnotes.length > 0 ? Math.max(...existingFootnotes.map((fn) => parseInt(fn.match(/\d+/)?.[0] ?? "0"))) : 0; + let updatedBody = issueBody; + let footnotes = ""; + relevantIssues.forEach((issue, index) => { + const footnoteIndex = highestFootnoteIndex + index + 1; // Continue numbering from the highest existing footnote number + const footnoteRef = `[^0${footnoteIndex}^]`; + const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); + const { sentence } = issue.mostSimilarSentence; + + // Insert footnote reference in the body + const sentencePattern = new RegExp(`${sentence.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "g"); + updatedBody = updatedBody.replace(sentencePattern, `${sentence}${footnoteRef}`); + + // Add new footnote + footnotes += `${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl})\n\n`; + }); + + // Append new footnotes to the body, keeping the previous ones + updatedBody += footnotes; + + // Update the issue with the modified body await context.octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, - body: modifiedBody + body, + body: updatedBody, }); } + +/** + * Finds the edit distance between two strings using dynamic programming. + * @param sentenceA + * @param sentenceB + * @returns + */ +function findEditDistance(sentenceA: string, sentenceB: string): number { + const m = sentenceA.length; + const n = sentenceB.length; + const dp: number[][] = Array.from({ length: m + 1 }, () => Array.from({ length: n + 1 }, () => 0)); + + for (let i = 0; i <= m; i++) { + for (let j = 0; j <= n; j++) { + if (i === 0) { + dp[i][j] = j; + } else if (j === 0) { + dp[i][j] = i; + } else if (sentenceA[i - 1] === sentenceB[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; + } else { + dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); + } + } + } + + return dp[m][n]; +} + +/** + * Removes all footnotes from the issue content. + * This includes both the footnote references in the body and the footnote definitions at the bottom. + * @param content The content of the issue + * @returns The content without footnotes + */ +function removeFootnotes(content: string): string { + // Remove footnote references like [^1^], [^2^], etc. + const footnoteRefRegex = /\[\^\d+\^\]/g; + const contentWithoutFootnoteRefs = content.replace(footnoteRefRegex, ""); + + // Remove footnote section starting with '###### Similar Issues' or any other footnote-related section + const footnoteSectionRegex = /\n###### Similar Issues[\s\S]*$/g; + return contentWithoutFootnoteRefs.replace(footnoteSectionRegex, ""); +} diff --git a/src/handlers/issue-matching.ts b/src/handlers/issue-matching.ts index cc1d060..f2e866a 100644 --- a/src/handlers/issue-matching.ts +++ b/src/handlers/issue-matching.ts @@ -24,17 +24,12 @@ export interface IssueGraphqlResponse { similarity: number; } -const commentBuilder = (matchResultArray: Map>): string => { - const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; - matchResultArray.forEach((issues, assignee) => { - commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); - issues.forEach((issue) => { - commentLines.push(issue); - }); - }); - return commentLines.join("\n"); -}; - +/** + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a comment is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueMatching(context: Context) { const { logger, @@ -45,15 +40,10 @@ export async function issueMatching(context: Context) { const issue = payload.issue; const issueContent = issue.body + issue.title; const commentStart = ">The following contributors may be suitable for this task:"; - - // On Adding the labels to the issue, the bot should - // create a new comment with users who completed task most similar to the issue - // if the comment already exists, it should update the comment with the new users const matchResultArray: Map> = new Map(); const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.jobMatchingThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { - // Find the most similar issue and the users who completed the task - similarIssues.sort((a, b) => b.similarity - a.similarity); + similarIssues.sort((a, b) => b.similarity - a.similarity); // Sort by similarity const fetchPromises = similarIssues.map(async (issue) => { const issueObject: IssueGraphqlResponse = await context.octokit.graphql( `query ($issueNodeId: ID!) { @@ -84,7 +74,6 @@ export async function issueMatching(context: Context) { issueObject.similarity = issue.similarity; return issueObject; }); - const issueList = await Promise.all(fetchPromises); issueList.forEach((issue) => { if (issue.node.closed && issue.node.stateReason === "COMPLETED" && issue.node.assignees.nodes.length > 0) { @@ -148,3 +137,19 @@ export async function issueMatching(context: Context) { logger.ok(`Successfully created issue comment!`); logger.debug(`Exiting issueMatching handler`); } + +/** + * Builds the comment to be added to the issue + * @param matchResultArray The array of issues to be matched + * @returns The comment to be added to the issue + */ +function commentBuilder(matchResultArray: Map>): string { + const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; + matchResultArray.forEach((issues, assignee) => { + commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); + issues.forEach((issue) => { + commentLines.push(issue); + }); + }); + return commentLines.join("\n"); +} From 0cbce178bf9eba9b19563c3407acfbf6fbe9ed98 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 3 Oct 2024 16:51:58 -0400 Subject: [PATCH 20/31] fix: empty strings init removed --- src/handlers/issue-deduplication.ts | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index af118d2..7e6f182 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -71,7 +71,7 @@ function findMostSimilarSentence(issueContent: string, similarIssueContent: stri const issueSentences = issueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); const similarIssueSentences = similarIssueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); let maxSimilarity = 0; - let mostSimilarSentence = ""; + let mostSimilarSentence; let mostSimilarIndex = -1; issueSentences.forEach((sentence, index) => { const similarities = similarIssueSentences.map((similarSentence) => { @@ -87,7 +87,9 @@ function findMostSimilarSentence(issueContent: string, similarIssueContent: stri mostSimilarIndex = index; } }); - + if (!mostSimilarSentence) { + throw new Error("No similar sentence found"); + } return { sentence: mostSimilarSentence, similarity: maxSimilarity, index: mostSimilarIndex }; } @@ -132,7 +134,7 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa const existingFootnotes = issueBody.match(footnoteRegex) || []; const highestFootnoteIndex = existingFootnotes.length > 0 ? Math.max(...existingFootnotes.map((fn) => parseInt(fn.match(/\d+/)?.[0] ?? "0"))) : 0; let updatedBody = issueBody; - let footnotes = ""; + let footnotes: string[] | undefined; relevantIssues.forEach((issue, index) => { const footnoteIndex = highestFootnoteIndex + index + 1; // Continue numbering from the highest existing footnote number const footnoteRef = `[^0${footnoteIndex}^]`; @@ -143,12 +145,17 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa const sentencePattern = new RegExp(`${sentence.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "g"); updatedBody = updatedBody.replace(sentencePattern, `${sentence}${footnoteRef}`); - // Add new footnote - footnotes += `${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl})\n\n`; + // Initialize footnotes array if not already done + if (!footnotes) { + footnotes = []; + } + + // Add new footnote to the array + footnotes.push(`${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl})\n\n`); }); // Append new footnotes to the body, keeping the previous ones - updatedBody += footnotes; + updatedBody += footnotes ? footnotes.join("") : ""; // Update the issue with the modified body await context.octokit.issues.update({ From e13461a529156d9759bfcf5f91b53102af3bbe55 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Fri, 4 Oct 2024 01:37:28 -0400 Subject: [PATCH 21/31] fix: changed the removeFootnote function --- src/adapters/supabase/helpers/comment.ts | 4 +- src/adapters/supabase/helpers/issues.ts | 4 +- src/handlers/add-issue.ts | 4 +- src/handlers/issue-deduplication.ts | 89 +++++++++++++++--------- src/handlers/update-issue.ts | 5 +- 5 files changed, 67 insertions(+), 39 deletions(-) diff --git a/src/adapters/supabase/helpers/comment.ts b/src/adapters/supabase/helpers/comment.ts index 3fa08b2..a0fa89e 100644 --- a/src/adapters/supabase/helpers/comment.ts +++ b/src/adapters/supabase/helpers/comment.ts @@ -37,7 +37,7 @@ export class Comment extends SuperSupabase { } else { //Create the embedding for this comment const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; @@ -57,7 +57,7 @@ export class Comment extends SuperSupabase { async updateComment(markdown: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { //Create the embedding for this comment const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 6bfef09..94b7d38 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -37,7 +37,7 @@ export class Issues extends SuperSupabase { return; } else { const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { payload = null; markdown = null; @@ -55,7 +55,7 @@ export class Issues extends SuperSupabase { async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { //Create the embedding for this comment const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; diff --git a/src/handlers/add-issue.ts b/src/handlers/add-issue.ts index 969a5c2..c828bbf 100644 --- a/src/handlers/add-issue.ts +++ b/src/handlers/add-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function addIssue(context: Context) { const { @@ -16,7 +17,8 @@ export async function addIssue(context: Context) { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.createIssue(nodeId, payload, isPrivate, markdown, authorId); + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.createIssue(nodeId, payload, isPrivate, cleanedIssue, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error creating issue:`, { error: error, stack: error.stack }); diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 7e6f182..3df913c 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -32,10 +32,15 @@ export async function issueChecker(context: Context): Promise { } = context; const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; - const similarIssues = await supabase.issue.findSimilarIssues(issue.title + removeFootnotes(issue.body || ""), context.config.warningThreshold, issue.node_id); + let issueBody = issue.body; + if (!issueBody) { + logger.info("Issue body is empty"); + return false; + } + issueBody = removeFootnotes(issueBody); + const similarIssues = await supabase.issue.findSimilarIssues(issue.title + removeFootnotes(issueBody), context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { const matchIssues = similarIssues.filter((issue) => issue.similarity >= context.config.matchThreshold); - if (matchIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.matchThreshold} already exists`); await octokit.issues.update({ @@ -49,11 +54,11 @@ export async function issueChecker(context: Context): Promise { if (similarIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); - await handleSimilarIssuesComment(context, payload, issue.number, similarIssues); + await handleSimilarIssuesComment(context, payload, issueBody, issue.number, similarIssues); return true; } } - + console.log("No similar issues found"); return false; } @@ -93,7 +98,13 @@ function findMostSimilarSentence(issueContent: string, similarIssueContent: stri return { sentence: mostSimilarSentence, similarity: maxSimilarity, index: mostSimilarIndex }; } -async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) { +async function handleSimilarIssuesComment( + context: Context, + payload: IssuePayload, + issueBody: string, + issueNumber: number, + similarIssues: IssueSimilaritySearchResult[] +) { const issueList: IssueGraphqlResponse[] = await Promise.all( similarIssues.map(async (issue: IssueSimilaritySearchResult) => { const issueUrl: IssueGraphqlResponse = await context.octokit.graphql( @@ -115,7 +126,7 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa { issueNodeId: issue.issue_id } ); issueUrl.similarity = Math.round(issue.similarity * 100).toString(); - issueUrl.mostSimilarSentence = findMostSimilarSentence(context.payload.issue.body || "", issueUrl.node.body); + issueUrl.mostSimilarSentence = findMostSimilarSentence(issueBody, issueUrl.node.body); return issueUrl; }) ); @@ -128,7 +139,9 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa return; } - const issueBody = context.payload.issue.body || ""; + if (!issueBody) { + return; + } // Find existing footnotes in the body const footnoteRegex = /\[\^(\d+)\^\]/g; const existingFootnotes = issueBody.match(footnoteRegex) || []; @@ -155,7 +168,9 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa }); // Append new footnotes to the body, keeping the previous ones - updatedBody += footnotes ? footnotes.join("") : ""; + if (footnotes) { + updatedBody += "\n\n" + footnotes.join(""); + } // Update the issue with the modified body await context.octokit.issues.update({ @@ -168,30 +183,34 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa /** * Finds the edit distance between two strings using dynamic programming. - * @param sentenceA - * @param sentenceB - * @returns + * The edit distance is a way of quantifying how dissimilar two strings are to one another by + * counting the minimum number of operations required to transform one string into the other. + * For more information, see: https://en.wikipedia.org/wiki/Edit_distance + * @param sentenceA The first string + * @param sentenceB The second string + * @returns The edit distance between the two strings */ function findEditDistance(sentenceA: string, sentenceB: string): number { - const m = sentenceA.length; - const n = sentenceB.length; - const dp: number[][] = Array.from({ length: m + 1 }, () => Array.from({ length: n + 1 }, () => 0)); - - for (let i = 0; i <= m; i++) { - for (let j = 0; j <= n; j++) { - if (i === 0) { - dp[i][j] = j; - } else if (j === 0) { - dp[i][j] = i; - } else if (sentenceA[i - 1] === sentenceB[j - 1]) { - dp[i][j] = dp[i - 1][j - 1]; + const lengthA = sentenceA.length; + const lengthB = sentenceB.length; + const distanceMatrix: number[][] = Array.from({ length: lengthA + 1 }, () => Array.from({ length: lengthB + 1 }, () => 0)); + + for (let indexA = 0; indexA <= lengthA; indexA++) { + for (let indexB = 0; indexB <= lengthB; indexB++) { + if (indexA === 0) { + distanceMatrix[indexA][indexB] = indexB; + } else if (indexB === 0) { + distanceMatrix[indexA][indexB] = indexA; + } else if (sentenceA[indexA - 1] === sentenceB[indexB - 1]) { + distanceMatrix[indexA][indexB] = distanceMatrix[indexA - 1][indexB - 1]; } else { - dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]); + distanceMatrix[indexA][indexB] = + 1 + Math.min(distanceMatrix[indexA - 1][indexB], distanceMatrix[indexA][indexB - 1], distanceMatrix[indexA - 1][indexB - 1]); } } } - return dp[m][n]; + return distanceMatrix[lengthA][lengthB]; } /** @@ -200,12 +219,16 @@ function findEditDistance(sentenceA: string, sentenceB: string): number { * @param content The content of the issue * @returns The content without footnotes */ -function removeFootnotes(content: string): string { - // Remove footnote references like [^1^], [^2^], etc. - const footnoteRefRegex = /\[\^\d+\^\]/g; - const contentWithoutFootnoteRefs = content.replace(footnoteRefRegex, ""); - - // Remove footnote section starting with '###### Similar Issues' or any other footnote-related section - const footnoteSectionRegex = /\n###### Similar Issues[\s\S]*$/g; - return contentWithoutFootnoteRefs.replace(footnoteSectionRegex, ""); +export function removeFootnotes(content: string): string { + const footnoteDefRegex = /\[\^(\d+)\^\]: ⚠ \d+% possible duplicate - [^\n]+(\n|$)/g; + const footnotes = content.match(footnoteDefRegex); + let contentWithoutFootnotes = content.replace(footnoteDefRegex, ""); + if (footnotes) { + console.log(footnotes); + footnotes.forEach((footnote) => { + const footnoteNumber = footnote.match(/\d+/)?.[0]; + contentWithoutFootnotes = contentWithoutFootnotes.replace(new RegExp(`\\[\\^${footnoteNumber}\\^\\]`, "g"), ""); + }); + } + return contentWithoutFootnotes.replace(/\n{2,}/g, "\n").trim(); } diff --git a/src/handlers/update-issue.ts b/src/handlers/update-issue.ts index 763b2ba..4ec5f34 100644 --- a/src/handlers/update-issue.ts +++ b/src/handlers/update-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function updateIssue(context: Context) { const { @@ -16,7 +17,9 @@ export async function updateIssue(context: Context) { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.updateIssue(markdown, nodeId, payloadObject, isPrivate); + //clean issue by removing footnotes + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.updateIssue(cleanedIssue, nodeId, payloadObject, isPrivate); } catch (error) { if (error instanceof Error) { logger.error(`Error updating issue:`, { error: error, stack: error.stack }); From 8af9f8bf8bbe67135d27c4307c6decbdeac98762 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Sat, 5 Oct 2024 17:31:04 -0400 Subject: [PATCH 22/31] feat: change the issue result mention --- src/handlers/issue-deduplication.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 3df913c..f5f5ac3 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -5,6 +5,7 @@ import { IssuePayload } from "../types/payload"; export interface IssueGraphqlResponse { node: { title: string; + number: number; url: string; body: string; repository: { @@ -113,6 +114,7 @@ async function handleSimilarIssuesComment( ... on Issue { title url + number body repository { name @@ -164,7 +166,7 @@ async function handleSimilarIssuesComment( } // Add new footnote to the array - footnotes.push(`${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl})\n\n`); + footnotes.push(`${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl}#${issue.node.number})\n\n`); }); // Append new footnotes to the body, keeping the previous ones From d7f262e447674cb482ba043d04151200341438d4 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 8 Oct 2024 09:30:26 -0400 Subject: [PATCH 23/31] fix: tests, tests passing locally fails in ci --- tests/main.test.ts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/main.test.ts b/tests/main.test.ts index 442b82e..27caeeb 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -5,7 +5,6 @@ import { drop } from "@mswjs/data"; import { Octokit } from "@octokit/rest"; import { Logs } from "@ubiquity-os/ubiquity-os-logger"; import dotenv from "dotenv"; -import manifest from "../manifest.json"; import { runPlugin } from "../src/plugin"; import { Env } from "../src/types"; import { Context, SupportedEvents } from "../src/types/context"; @@ -35,17 +34,6 @@ describe("Plugin tests", () => { await setupTests(); }); - it("Should serve the manifest file", async () => { - const worker = (await import("../src/worker")).default; - const response = await worker.fetch(new Request("http://localhost/manifest"), { - SUPABASE_KEY: "test", - SUPABASE_URL: "test", - VOYAGEAI_API_KEY: "test", - }); - const content = await response.json(); - expect(content).toEqual(manifest); - }); - it("When a comment is created it should add it to the database", async () => { const { context } = createContext(STRINGS.HELLO_WORLD, 1, 1, 1, 1, "sasasCreate"); await runPlugin(context); From 3ff27eea2363b97d531c0b3599fa865a9decd974 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 8 Oct 2024 12:50:33 -0400 Subject: [PATCH 24/31] fix: footnotes breaking link --- src/adapters/supabase/helpers/issues.ts | 1 + src/handlers/issue-deduplication.ts | 37 ++++++++++++++++--- .../20241002004403_issue_comments.sql | 8 ++-- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 94b7d38..b55e901 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -96,6 +96,7 @@ export class Issues extends SuperSupabase { current_id: currentId, query_embedding: embedding, threshold: threshold, + top_k: 5, }); if (error) { this.context.logger.error("Error finding similar issues", error); diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index f5f5ac3..3723753 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -59,7 +59,17 @@ export async function issueChecker(context: Context): Promise { return true; } } - console.log("No similar issues found"); + context.logger.info("No similar issues found"); + + //Use the IssueBody (Without footnotes) to update the issue + if (issueBody !== issue.body) { + await octokit.issues.update({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issue.number, + body: issueBody, + }); + } return false; } @@ -74,11 +84,26 @@ function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: * @returns The most similar sentence and its similarity score */ function findMostSimilarSentence(issueContent: string, similarIssueContent: string): { sentence: string; similarity: number; index: number } { - const issueSentences = issueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); - const similarIssueSentences = similarIssueContent.split(/[.!?]+/).filter((sentence) => sentence.trim().length > 0); + // Regex to match sentences while preserving URLs + const sentenceRegex = /([^.!?\s][^.!?]*(?:[.!?](?!['"]?\s|$)[^.!?]*)*[.!?]?['"]?(?=\s|$))/g; + + // Function to split text into sentences while preserving URLs + const splitIntoSentences = (text: string): string[] => { + const sentences: string[] = []; + let match; + while ((match = sentenceRegex.exec(text)) !== null) { + sentences.push(match[0].trim()); + } + return sentences; + }; + + const issueSentences = splitIntoSentences(issueContent); + const similarIssueSentences = splitIntoSentences(similarIssueContent); + let maxSimilarity = 0; - let mostSimilarSentence; + let mostSimilarSentence = ""; let mostSimilarIndex = -1; + issueSentences.forEach((sentence, index) => { const similarities = similarIssueSentences.map((similarSentence) => { const editDistance = findEditDistance(sentence, similarSentence); @@ -93,6 +118,7 @@ function findMostSimilarSentence(issueContent: string, similarIssueContent: stri mostSimilarIndex = index; } }); + if (!mostSimilarSentence) { throw new Error("No similar sentence found"); } @@ -138,7 +164,7 @@ async function handleSimilarIssuesComment( ); if (relevantIssues.length === 0) { - return; + context.logger.info("No relevant issues found with the same repository and organization"); } if (!issueBody) { @@ -226,7 +252,6 @@ export function removeFootnotes(content: string): string { const footnotes = content.match(footnoteDefRegex); let contentWithoutFootnotes = content.replace(footnoteDefRegex, ""); if (footnotes) { - console.log(footnotes); footnotes.forEach((footnote) => { const footnoteNumber = footnote.match(/\d+/)?.[0]; contentWithoutFootnotes = contentWithoutFootnotes.replace(new RegExp(`\\[\\^${footnoteNumber}\\^\\]`, "g"), ""); diff --git a/supabase/migrations/20241002004403_issue_comments.sql b/supabase/migrations/20241002004403_issue_comments.sql index 6323882..9ebb751 100644 --- a/supabase/migrations/20241002004403_issue_comments.sql +++ b/supabase/migrations/20241002004403_issue_comments.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8) +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ DECLARE current_quantized vector(1024); @@ -9,10 +9,10 @@ BEGIN SELECT id AS issue_id, plaintext AS issue_plaintext, 1 - (l2_distance(current_quantized, embedding)) AS similarity - FROM issues WHERE id <> current_id AND 1 - (l2_distance(current_quantized, embedding)) > threshold - ORDER BY similarity; + ORDER BY similarity + LIMIT top_k; -- Limit the number of results to top_k END; -$$ LANGUAGE plpgsql; \ No newline at end of file +$$ LANGUAGE plpgsql; From b909ff3e7495d8e37247fa647114f21258a54b16 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 8 Oct 2024 12:51:50 -0400 Subject: [PATCH 25/31] feat: updated similarity search function --- .../20241008165113_function_issue.sql | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 supabase/migrations/20241008165113_function_issue.sql diff --git a/supabase/migrations/20241008165113_function_issue.sql b/supabase/migrations/20241008165113_function_issue.sql new file mode 100644 index 0000000..3223d1d --- /dev/null +++ b/supabase/migrations/20241008165113_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + (l2_distance(current_quantized, embedding)) AS similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND l2_distance(current_quantized, embedding) > threshold -- Ensure similarity exceeds threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; From a8751072cd1f199e374e7d83468901e6f5cf1b3e Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 8 Oct 2024 15:02:43 -0400 Subject: [PATCH 26/31] fix: issue creation fix --- src/handlers/issue-deduplication.ts | 2 +- src/plugin.ts | 2 +- .../20241008175109_function_issue.sql | 38 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 supabase/migrations/20241008175109_function_issue.sql diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 3723753..3273a09 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -21,7 +21,7 @@ export interface IssueGraphqlResponse { /** * Checks if the current issue is a duplicate of an existing issue. - * If a similar issue is found, a comment is added to the current issue. + * If a similar issue is found, a footnote is added to the current issue. * @param context The context object * @returns True if a similar issue is found, false otherwise **/ diff --git a/src/plugin.ts b/src/plugin.ts index 341e07b..f382409 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -32,8 +32,8 @@ export async function runPlugin(context: Context) { } else if (isIssueEvent(context)) { switch (eventName) { case "issues.opened": - await issueChecker(context); await addIssue(context); + await issueChecker(context); return await issueMatching(context); case "issues.edited": await issueChecker(context); diff --git a/supabase/migrations/20241008175109_function_issue.sql b/supabase/migrations/20241008175109_function_issue.sql new file mode 100644 index 0000000..875a8d4 --- /dev/null +++ b/supabase/migrations/20241008175109_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + 1 - (l2_distance(current_quantized, embedding)) AS similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND 1 - l2_distance(current_quantized, embedding) > threshold -- Ensure similarity exceeds threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; From 03f0110bacfb2958904698a75fc558b853da894a Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 8 Oct 2024 15:05:03 -0400 Subject: [PATCH 27/31] fix: update the l2_distance with a weighted sum of inner product and inverted l2 distance --- supabase/migrations/20241008175109_function_issue.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/supabase/migrations/20241008175109_function_issue.sql b/supabase/migrations/20241008175109_function_issue.sql index 875a8d4..6c77ce3 100644 --- a/supabase/migrations/20241008175109_function_issue.sql +++ b/supabase/migrations/20241008175109_function_issue.sql @@ -26,12 +26,12 @@ BEGIN RETURN QUERY SELECT id AS issue_id, plaintext AS issue_plaintext, - 1 - (l2_distance(current_quantized, embedding)) AS similarity + ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) as similarity FROM issues WHERE id <> current_id AND current_repo = payload->'repository'->>'name'::text AND current_org = payload->'repository'->'owner'->>'login'::text - AND 1 - l2_distance(current_quantized, embedding) > threshold -- Ensure similarity exceeds threshold + AND ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) > threshold ORDER BY similarity DESC LIMIT top_k; END; From 4bce42c2437bf0da9796827744e7d3a7e6fcdf54 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 10 Oct 2024 16:28:52 -0400 Subject: [PATCH 28/31] fix: changed the order of the similarity search result --- src/adapters/supabase/helpers/comment.ts | 27 ++++++++++++++++++------ src/adapters/supabase/helpers/issues.ts | 25 ++++++++++++---------- src/handlers/issue-deduplication.ts | 3 +++ src/handlers/update-comments.ts | 7 ++++-- src/handlers/update-issue.ts | 3 ++- 5 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/adapters/supabase/helpers/comment.ts b/src/adapters/supabase/helpers/comment.ts index a0fa89e..295f8ae 100644 --- a/src/adapters/supabase/helpers/comment.ts +++ b/src/adapters/supabase/helpers/comment.ts @@ -54,7 +54,14 @@ export class Comment extends SuperSupabase { this.context.logger.info("Comment created successfully"); } - async updateComment(markdown: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { + async updateComment( + markdown: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) { //Create the embedding for this comment const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); let plaintext: string | null = markdownToPlainText(markdown); @@ -63,12 +70,18 @@ export class Comment extends SuperSupabase { payload = null as Record | null; plaintext = null as string | null; } - const { error } = await this.supabase - .from("issue_comments") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", commentNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const comments = await this.getComment(commentNodeId); + if (comments && comments.length == 0) { + this.context.logger.info("Comment does not exist, creating a new one"); + await this.createComment(markdown, commentNodeId, authorId, payload, isPrivate, issueId); + } else { + const { error } = await this.supabase + .from("issue_comments") + .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) + .eq("id", commentNodeId); + if (error) { + this.context.logger.error("Error updating comment", error); + } } } diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index b55e901..059ee75 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -52,21 +52,24 @@ export class Issues extends SuperSupabase { this.context.logger.info("Issue created successfully"); } - async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { - //Create the embedding for this comment + async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean, authorId: number) { const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { - markdown = null as string | null; - payload = null as Record | null; - plaintext = null as string | null; + markdown = null; + payload = null; + plaintext = null; } - const { error } = await this.supabase - .from("issues") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", issueNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const issues = await this.getIssue(issueNodeId); + if (issues && issues.length == 0) { + this.context.logger.info("Issue does not exist, creating a new one"); + await this.createIssue(issueNodeId, payload, isPrivate, markdown, authorId); + } else { + const { error } = await this.supabase.from("issues").update({ markdown, plaintext, embedding, payload, modified_at: new Date() }).eq("id", issueNodeId); + + if (error) { + this.context.logger.error("Error updating comment", error); + } } } diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 3273a09..0cca940 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -176,6 +176,9 @@ async function handleSimilarIssuesComment( const highestFootnoteIndex = existingFootnotes.length > 0 ? Math.max(...existingFootnotes.map((fn) => parseInt(fn.match(/\d+/)?.[0] ?? "0"))) : 0; let updatedBody = issueBody; let footnotes: string[] | undefined; + // Sort relevant issues by similarity in ascending order + relevantIssues.sort((a, b) => parseFloat(a.similarity) - parseFloat(b.similarity)); + relevantIssues.forEach((issue, index) => { const footnoteIndex = highestFootnoteIndex + index + 1; // Continue numbering from the highest existing footnote number const footnoteRef = `[^0${footnoteIndex}^]`; diff --git a/src/handlers/update-comments.ts b/src/handlers/update-comments.ts index b1b9d18..6cc9545 100644 --- a/src/handlers/update-comments.ts +++ b/src/handlers/update-comments.ts @@ -7,15 +7,18 @@ export async function updateComment(context: Context) { adapters: { supabase }, } = context; const { payload } = context as { payload: CommentPayload }; + const markdown = payload.comment.body; + const authorId = payload.comment.user?.id || -1; const nodeId = payload.comment.node_id; const isPrivate = payload.repository.private; - const markdown = payload.comment.body || null; + const issueId = payload.issue.node_id; + // Fetch the previous comment and update it in the db try { if (!markdown) { throw new Error("Comment body is empty"); } - await supabase.comment.updateComment(markdown, nodeId, payload, isPrivate); + await supabase.comment.updateComment(markdown, nodeId, authorId, payload, isPrivate, issueId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating comment:`, { error: error, stack: error.stack }); diff --git a/src/handlers/update-issue.ts b/src/handlers/update-issue.ts index 4ec5f34..cec5de2 100644 --- a/src/handlers/update-issue.ts +++ b/src/handlers/update-issue.ts @@ -12,6 +12,7 @@ export async function updateIssue(context: Context) { const nodeId = payload.issue.node_id; const isPrivate = payload.repository.private; const markdown = payload.issue.body + " " + payload.issue.title || null; + const authorId = payload.issue.user?.id || -1; // Fetch the previous issue and update it in the db try { if (!markdown) { @@ -19,7 +20,7 @@ export async function updateIssue(context: Context) { } //clean issue by removing footnotes const cleanedIssue = removeFootnotes(markdown); - await supabase.issue.updateIssue(cleanedIssue, nodeId, payloadObject, isPrivate); + await supabase.issue.updateIssue(cleanedIssue, nodeId, payloadObject, isPrivate, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating issue:`, { error: error, stack: error.stack }); From 3477aea88cd6809ac45231dec65bf514aaf6dd50 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Thu, 10 Oct 2024 17:12:50 -0400 Subject: [PATCH 29/31] fix: fixed tests --- src/main.ts | 2 +- tests/__mocks__/adapter.ts | 36 ++++++++++++++++++++++-------------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/main.ts b/src/main.ts index 91490bc..0b9b9a4 100644 --- a/src/main.ts +++ b/src/main.ts @@ -36,7 +36,7 @@ async function returnDataToKernel(repoToken: string, stateId: string, output: ob await octokit.repos.createDispatchEvent({ owner: github.context.repo.owner, repo: github.context.repo.repo, - event_type: "return_data_to_ubiquibot_kernel", + event_type: "return-data-to-ubiquity-os-kernel", client_payload: { state_id: stateId, output: JSON.stringify(output), diff --git a/tests/__mocks__/adapter.ts b/tests/__mocks__/adapter.ts index d1f634c..abb271f 100644 --- a/tests/__mocks__/adapter.ts +++ b/tests/__mocks__/adapter.ts @@ -36,21 +36,29 @@ export function createMockAdapters(context: Context) { commentMap.set(commentNodeId, { id: commentNodeId, plaintext, author_id: authorId, embedding, issue_id: issueId }); } ), - updateComment: jest.fn(async (plaintext: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) => { - if (!commentMap.has(commentNodeId)) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const originalComment = commentMap.get(commentNodeId); - if (!originalComment) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const { id, author_id } = originalComment; - const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); - if (isPrivate) { - plaintext = null; + updateComment: jest.fn( + async ( + plaintext: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) => { + if (!commentMap.has(commentNodeId)) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const originalComment = commentMap.get(commentNodeId); + if (!originalComment) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); + if (isPrivate) { + plaintext = null; + } + commentMap.set(commentNodeId, { id: issueId, plaintext, author_id: authorId, embedding, payload }); } - commentMap.set(commentNodeId, { id, plaintext, author_id, embedding }); - }), + ), deleteComment: jest.fn(async (commentNodeId: string) => { if (!commentMap.has(commentNodeId)) { throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); From 34528eaa8f4010b3797896575131f191b3e11b08 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Fri, 11 Oct 2024 01:49:37 -0400 Subject: [PATCH 30/31] fix: transferissue --- src/handlers/transfer-issue.ts | 32 ++++++++++++++++++++++++++++++++ src/plugin.ts | 3 +++ src/types/context.ts | 3 ++- src/types/payload.ts | 1 + src/types/typeguards.ts | 9 +++++++-- 5 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 src/handlers/transfer-issue.ts diff --git a/src/handlers/transfer-issue.ts b/src/handlers/transfer-issue.ts new file mode 100644 index 0000000..bf92d47 --- /dev/null +++ b/src/handlers/transfer-issue.ts @@ -0,0 +1,32 @@ +import { Context } from "../types"; +import { IssueTransferPayload } from "../types/payload"; + +export async function issueTransfer(context: Context) { + const { + logger, + adapters: { supabase }, + } = context; + const { changes, issue } = (context as { payload: IssueTransferPayload }).payload; + const nodeId = issue.node_id; + const { new_issue, new_repository } = changes; + //Fetch the new details of the issue + const newIssueNodeId = new_issue.node_id; + const markdown = new_issue.body + " " + new_issue.title || null; + const authorId = new_issue.user?.id || -1; + const isPrivate = new_repository.private; + + //Delete the issue from the old repository + //Create the new issue in the new repository + try { + await supabase.issue.deleteIssue(nodeId); + await supabase.issue.createIssue(newIssueNodeId, new_issue, isPrivate, markdown, authorId); + } catch (error) { + if (error instanceof Error) { + logger.error(`Error transferring issue:`, { error: error, stack: error.stack }); + throw error; + } else { + logger.error(`Error transferring issue:`, { err: error, error: new Error() }); + throw error; + } + } +} diff --git a/src/plugin.ts b/src/plugin.ts index f382409..5cdf193 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -14,6 +14,7 @@ import { updateIssue } from "./handlers/update-issue"; import { Context, Env, PluginInputs } from "./types"; import { Database } from "./types/database"; import { isIssueCommentEvent, isIssueEvent } from "./types/typeguards"; +import { issueTransfer } from "./handlers/transfer-issue"; /** * The main plugin function. Split for easier testing. @@ -41,6 +42,8 @@ export async function runPlugin(context: Context) { return await issueMatching(context); case "issues.deleted": return await deleteIssues(context); + case "issues.transferred": + return await issueTransfer(context); } } else if (eventName == "issues.labeled") { return await issueMatching(context); diff --git a/src/types/context.ts b/src/types/context.ts index b38d836..46f2e67 100644 --- a/src/types/context.ts +++ b/src/types/context.ts @@ -17,7 +17,8 @@ export type SupportedEventsU = | "issues.opened" | "issues.edited" | "issues.deleted" - | "issues.labeled"; + | "issues.labeled" + | "issues.transferred"; export type SupportedEvents = { [K in SupportedEventsU]: K extends WebhookEventName ? WebhookEvent : never; diff --git a/src/types/payload.ts b/src/types/payload.ts index 395fa09..d1f3d64 100644 --- a/src/types/payload.ts +++ b/src/types/payload.ts @@ -1,3 +1,4 @@ import { EmitterWebhookEvent as WebhookEvent } from "@octokit/webhooks"; export type CommentPayload = WebhookEvent<"issue_comment">["payload"]; export type IssuePayload = WebhookEvent<"issues">["payload"]; +export type IssueTransferPayload = WebhookEvent<"issues.transferred">["payload"]; diff --git a/src/types/typeguards.ts b/src/types/typeguards.ts index 01a6c26..aae7236 100644 --- a/src/types/typeguards.ts +++ b/src/types/typeguards.ts @@ -20,6 +20,11 @@ export function isIssueCommentEvent(context: Context): context is Context<"issue * * @param context The context object. */ -export function isIssueEvent(context: Context): context is Context<"issues.opened" | "issues.edited" | "issues.deleted"> { - return context.eventName === "issues.opened" || context.eventName === "issues.edited" || context.eventName === "issues.deleted"; +export function isIssueEvent(context: Context): context is Context<"issues.opened" | "issues.edited" | "issues.deleted" | "issues.transferred"> { + return ( + context.eventName === "issues.opened" || + context.eventName === "issues.edited" || + context.eventName === "issues.deleted" || + context.eventName === "issues.transferred" + ); } From fb5633584b9c3362b9c6be8935a2b3ba24c7eb73 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Tue, 15 Oct 2024 02:40:07 -0400 Subject: [PATCH 31/31] feat: issue being edited all the time fixed --- src/handlers/issue-deduplication.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 0cca940..7ca35b3 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -51,6 +51,7 @@ export async function issueChecker(context: Context): Promise { state: "closed", state_reason: "not_planned", }); + return true; } if (similarIssues.length > 0) { @@ -58,11 +59,8 @@ export async function issueChecker(context: Context): Promise { await handleSimilarIssuesComment(context, payload, issueBody, issue.number, similarIssues); return true; } - } - context.logger.info("No similar issues found"); - - //Use the IssueBody (Without footnotes) to update the issue - if (issueBody !== issue.body) { + } else { + //Use the IssueBody (Without footnotes) to update the issue when no similar issues are found await octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, @@ -70,6 +68,7 @@ export async function issueChecker(context: Context): Promise { body: issueBody, }); } + context.logger.info("No similar issues found"); return false; }