diff --git a/.cspell.json b/.cspell.json index 43040b8..40e1508 100644 --- a/.cspell.json +++ b/.cspell.json @@ -13,7 +13,7 @@ "Supabase", "SUPABASE", "typebox", - "ubiquibot", + "ubiquity-os", "Smee", "typeguards", "mswjs", diff --git a/.github/workflows/update-configuration.yml b/.github/workflows/update-configuration.yml new file mode 100644 index 0000000..2490c49 --- /dev/null +++ b/.github/workflows/update-configuration.yml @@ -0,0 +1,58 @@ +name: "Update Configuration" + +on: + workflow_dispatch: + push: + +jobs: + update: + name: "Update Configuration in manifest.json" + runs-on: ubuntu-latest + permissions: write-all + + steps: + - uses: actions/checkout@v4 + + - name: Setup node + uses: actions/setup-node@v4 + with: + node-version: "20.10.0" + + - name: Install deps and run configuration update + run: | + yarn install --immutable --immutable-cache --check-cache + yarn tsc --noCheck --project tsconfig.json + + - name: Update manifest configuration using GitHub Script + uses: actions/github-script@v7 + with: + script: | + const fs = require('fs'); + const path = require('path'); + + const { pluginSettingsSchema } = require('./src/types'); + + const manifestPath = path.resolve("${{ github.workspace }}", './manifest.json'); + const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + + const configuration = JSON.stringify(pluginSettingsSchema); + + manifest["configuration"] = JSON.parse(configuration); + + const updatedManifest = JSON.stringify(manifest, null, 2) + console.log('Updated manifest:', updatedManifest); + fs.writeFileSync(manifestPath, updatedManifest); + + - name: Commit and Push generated types + run: | + git config --global user.name 'ubiquity-os[bot]' + git config --global user.email 'ubiquity-os[bot]@users.noreply.github.com' + git add ./manifest.json + if [ -n "$(git diff-index --cached --name-only HEAD)" ]; then + git commit -m "chore: updated generated configuration" || echo "Lint-staged check failed" + git push origin HEAD:${{ github.ref_name }} + else + echo "No changes to commit" + fi + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/worker-deploy.yml b/.github/workflows/worker-deploy.yml index deafdc0..a79b808 100644 --- a/.github/workflows/worker-deploy.yml +++ b/.github/workflows/worker-deploy.yml @@ -35,6 +35,7 @@ jobs: id: wrangler_deploy uses: cloudflare/wrangler-action@v3 with: + wranglerVersion: "3.78.12" apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} secrets: | diff --git a/CHANGELOG.md b/CHANGELOG.md index bf0b82f..13913a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,25 +5,25 @@ ### Features -* modified tests ([97e267f](https://github.com/ubiquibot/issue-comment-embeddings/commit/97e267f801ce4e6bd29bbe967de3df4fc3b1942a)) +* modified tests ([97e267f](https://github.com/ubiquity-os/issue-comment-embeddings/commit/97e267f801ce4e6bd29bbe967de3df4fc3b1942a)) ### Bug Fixes -* added config.yml ([c0f784b](https://github.com/ubiquibot/issue-comment-embeddings/commit/c0f784b20e59c2c4714805331c7ae9034fd73f73)) -* added config.yml ([221d34d](https://github.com/ubiquibot/issue-comment-embeddings/commit/221d34d801af6ebd764028be4a5c6200a18b776e)) -* added config.yml ([d12c522](https://github.com/ubiquibot/issue-comment-embeddings/commit/d12c522291db36dcf6aea72e5759e1a055185d8f)) -* cspell fix ([736bea6](https://github.com/ubiquibot/issue-comment-embeddings/commit/736bea6172444fdf783ffff729879d8278ff82f3)) -* fixed tests missing supabase files ([0e870ac](https://github.com/ubiquibot/issue-comment-embeddings/commit/0e870ac50eb68249edf5fc4e46fd509425dd7bbb)) -* github workflow, types package.json, env examples ([16786d7](https://github.com/ubiquibot/issue-comment-embeddings/commit/16786d76ee7a598c885f15af1baeadcf6a471b2c)) -* issue_comments linting added issue_comments:edited, created and deleted ([9c0de23](https://github.com/ubiquibot/issue-comment-embeddings/commit/9c0de237048ce30bf4254960c443bf3938037dce)) -* knip workflow ([f325310](https://github.com/ubiquibot/issue-comment-embeddings/commit/f3253109c290c9fce6d14e6a2e1e328133ac6f81)) -* manifest.json, compute.yml ([21409d5](https://github.com/ubiquibot/issue-comment-embeddings/commit/21409d530c3aad6ff2676fc813314e5b29c1a533)) -* package.json ([806c6c0](https://github.com/ubiquibot/issue-comment-embeddings/commit/806c6c0b393a9b87741a6341fa65bc5b3d22cb15)) -* plugin name ([d91b991](https://github.com/ubiquibot/issue-comment-embeddings/commit/d91b991d717b7fb0b73359ca29ae6de08a1074b9)) -* readme.md ([9c5fbfe](https://github.com/ubiquibot/issue-comment-embeddings/commit/9c5fbfe9ca46eb842779468c85d329b9f941fb82)) -* readme.md ([2fec447](https://github.com/ubiquibot/issue-comment-embeddings/commit/2fec44786526e7c10faaa2c13c4349e1232cf5bd)) -* remove config.yml and wrangler.toml namespace entries ([127cc22](https://github.com/ubiquibot/issue-comment-embeddings/commit/127cc225903c3fe3ca934e8407df4eb9c27e378c)) -* removed config.yml changed name ([744e08c](https://github.com/ubiquibot/issue-comment-embeddings/commit/744e08cebac310ae81c3c102f5f3a9473e6e4b9e)) -* test and linting ([a4ee41e](https://github.com/ubiquibot/issue-comment-embeddings/commit/a4ee41e6fca8723ce2fddc96b1171c89cfe7d5b7)) -* wrangler name ([f890071](https://github.com/ubiquibot/issue-comment-embeddings/commit/f890071c01c5bb1d611a5b7aa07cba84f4546251)) +* added config.yml ([c0f784b](https://github.com/ubiquity-os/issue-comment-embeddings/commit/c0f784b20e59c2c4714805331c7ae9034fd73f73)) +* added config.yml ([221d34d](https://github.com/ubiquity-os/issue-comment-embeddings/commit/221d34d801af6ebd764028be4a5c6200a18b776e)) +* added config.yml ([d12c522](https://github.com/ubiquity-os/issue-comment-embeddings/commit/d12c522291db36dcf6aea72e5759e1a055185d8f)) +* cspell fix ([736bea6](https://github.com/ubiquity-os/issue-comment-embeddings/commit/736bea6172444fdf783ffff729879d8278ff82f3)) +* fixed tests missing supabase files ([0e870ac](https://github.com/ubiquity-os/issue-comment-embeddings/commit/0e870ac50eb68249edf5fc4e46fd509425dd7bbb)) +* github workflow, types package.json, env examples ([16786d7](https://github.com/ubiquity-os/issue-comment-embeddings/commit/16786d76ee7a598c885f15af1baeadcf6a471b2c)) +* issue_comments linting added issue_comments:edited, created and deleted ([9c0de23](https://github.com/ubiquity-os/issue-comment-embeddings/commit/9c0de237048ce30bf4254960c443bf3938037dce)) +* knip workflow ([f325310](https://github.com/ubiquity-os/issue-comment-embeddings/commit/f3253109c290c9fce6d14e6a2e1e328133ac6f81)) +* manifest.json, compute.yml ([21409d5](https://github.com/ubiquity-os/issue-comment-embeddings/commit/21409d530c3aad6ff2676fc813314e5b29c1a533)) +* package.json ([806c6c0](https://github.com/ubiquity-os/issue-comment-embeddings/commit/806c6c0b393a9b87741a6341fa65bc5b3d22cb15)) +* plugin name ([d91b991](https://github.com/ubiquity-os/issue-comment-embeddings/commit/d91b991d717b7fb0b73359ca29ae6de08a1074b9)) +* readme.md ([9c5fbfe](https://github.com/ubiquity-os/issue-comment-embeddings/commit/9c5fbfe9ca46eb842779468c85d329b9f941fb82)) +* readme.md ([2fec447](https://github.com/ubiquity-os/issue-comment-embeddings/commit/2fec44786526e7c10faaa2c13c4349e1232cf5bd)) +* remove config.yml and wrangler.toml namespace entries ([127cc22](https://github.com/ubiquity-os/issue-comment-embeddings/commit/127cc225903c3fe3ca934e8407df4eb9c27e378c)) +* removed config.yml changed name ([744e08c](https://github.com/ubiquity-os/issue-comment-embeddings/commit/744e08cebac310ae81c3c102f5f3a9473e6e4b9e)) +* test and linting ([a4ee41e](https://github.com/ubiquity-os/issue-comment-embeddings/commit/a4ee41e6fca8723ce2fddc96b1171c89cfe7d5b7)) +* wrangler name ([f890071](https://github.com/ubiquity-os/issue-comment-embeddings/commit/f890071c01c5bb1d611a5b7aa07cba84f4546251)) diff --git a/README.md b/README.md index 9661b9c..ce2acff 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# `@ubiquibot/issue-comment-embeddings` +# `@ubiquity-os/issue-comment-embeddings` -This is a plugin for [Ubiquibot](https://github.com/ubiquity/ubiquibot-kernel). It listens for issue comments, and adds them to a vector store. It handles comment edits and deletions as well. +This is a plugin for [Ubiquibot](https://github.com/ubiquity-os/ubiquity-os-kernel). It listens for issue comments, and adds them to a vector store. It handles comment edits and deletions as well. ## Configuration - Host the plugin on a server that Ubiquibot can access. @@ -10,9 +10,9 @@ To set up the `.dev.vars` file, you will need to provide the following variables - `VOYAGEAI_API_KEY`: The API key for Voyage. ## Usage -- Add the following to your `.ubiquibot-config.yml` file with the appropriate URL: +- Add the following to your `.ubiquity-os.config.yml` file with the appropriate URL: ```yaml -- plugin: ubiquity-os-marketplace/generate-vector-embeddings +- plugin: https://ubiquity-os-comment-vector-embeddings-main.ubiquity.workers.dev with: matchThreshold: 0.95 warningThreshold: 0.75 diff --git a/manifest.json b/manifest.json index 39df377..3768f7a 100644 --- a/manifest.json +++ b/manifest.json @@ -1,5 +1,36 @@ { "name": "Generate vector embeddings", "description": "Enables the storage, updating, and deletion of issue comment embeddings.", - "ubiquity:listeners": ["issue_comment.created", "issue_comment.edited", "issue_comment.deleted", "issues.opened", "issues.edited", "issues.deleted", "issues.labeled"] -} + "ubiquity:listeners": [ + "issue_comment.created", + "issue_comment.edited", + "issue_comment.deleted", + "issues.opened", + "issues.edited", + "issues.deleted", + "issues.labeled" + ], + "configuration": { + "default": {}, + "type": "object", + "properties": { + "matchThreshold": { + "default": 0.95, + "type": "number" + }, + "warningThreshold": { + "default": 0.75, + "type": "number" + }, + "jobMatchingThreshold": { + "default": 0.75, + "type": "number" + } + }, + "required": [ + "matchThreshold", + "warningThreshold", + "jobMatchingThreshold" + ] + } +} \ No newline at end of file diff --git a/package.json b/package.json index 54ad601..391c041 100644 --- a/package.json +++ b/package.json @@ -1,5 +1,5 @@ { - "name": "@ubiquibot/issue-comment-embeddings", + "name": "@ubiquity-os/issue-comment-embeddings", "version": "1.0.0", "description": "Generates vector embeddings of GitHub comments and stores them in Supabase.", "author": "Ubiquity DAO", @@ -36,7 +36,7 @@ "@sinclair/typebox": "0.32.33", "@supabase/supabase-js": "^2.45.2", "@types/markdown-it": "^14.1.2", - "@ubiquity-dao/ubiquibot-logger": "^1.3.0", + "@ubiquity-os/ubiquity-os-logger": "^1.3.2", "dotenv": "16.4.5", "markdown-it": "^14.1.0", "markdown-it-plain-text": "^0.3.0", @@ -69,10 +69,10 @@ "lint-staged": "15.2.7", "npm-run-all": "4.1.5", "prettier": "3.3.2", - "supabase": "1.191.3", + "supabase": "1.200.3", "ts-jest": "29.1.5", "tsx": "4.15.6", - "typescript": "5.4.5", + "typescript": "5.6.2", "typescript-eslint": "7.13.1", "wrangler": "3.78.12" }, diff --git a/src/adapters/supabase/helpers/comment.ts b/src/adapters/supabase/helpers/comment.ts index 3fa08b2..295f8ae 100644 --- a/src/adapters/supabase/helpers/comment.ts +++ b/src/adapters/supabase/helpers/comment.ts @@ -37,7 +37,7 @@ export class Comment extends SuperSupabase { } else { //Create the embedding for this comment const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; @@ -54,21 +54,34 @@ export class Comment extends SuperSupabase { this.context.logger.info("Comment created successfully"); } - async updateComment(markdown: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { + async updateComment( + markdown: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) { //Create the embedding for this comment const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { markdown = null as string | null; payload = null as Record | null; plaintext = null as string | null; } - const { error } = await this.supabase - .from("issue_comments") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", commentNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const comments = await this.getComment(commentNodeId); + if (comments && comments.length == 0) { + this.context.logger.info("Comment does not exist, creating a new one"); + await this.createComment(markdown, commentNodeId, authorId, payload, isPrivate, issueId); + } else { + const { error } = await this.supabase + .from("issue_comments") + .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) + .eq("id", commentNodeId); + if (error) { + this.context.logger.error("Error updating comment", error); + } } } diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 6bfef09..059ee75 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -37,7 +37,7 @@ export class Issues extends SuperSupabase { return; } else { const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { payload = null; markdown = null; @@ -52,21 +52,24 @@ export class Issues extends SuperSupabase { this.context.logger.info("Issue created successfully"); } - async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { - //Create the embedding for this comment + async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean, authorId: number) { const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); - let plaintext: string | null = markdownToPlainText(markdown || ""); + let plaintext: string | null = markdownToPlainText(markdown); if (isPrivate) { - markdown = null as string | null; - payload = null as Record | null; - plaintext = null as string | null; + markdown = null; + payload = null; + plaintext = null; } - const { error } = await this.supabase - .from("issues") - .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) - .eq("id", issueNodeId); - if (error) { - this.context.logger.error("Error updating comment", error); + const issues = await this.getIssue(issueNodeId); + if (issues && issues.length == 0) { + this.context.logger.info("Issue does not exist, creating a new one"); + await this.createIssue(issueNodeId, payload, isPrivate, markdown, authorId); + } else { + const { error } = await this.supabase.from("issues").update({ markdown, plaintext, embedding, payload, modified_at: new Date() }).eq("id", issueNodeId); + + if (error) { + this.context.logger.error("Error updating comment", error); + } } } @@ -96,6 +99,7 @@ export class Issues extends SuperSupabase { current_id: currentId, query_embedding: embedding, threshold: threshold, + top_k: 5, }); if (error) { this.context.logger.error("Error finding similar issues", error); diff --git a/src/handlers/add-issue.ts b/src/handlers/add-issue.ts index 969a5c2..c828bbf 100644 --- a/src/handlers/add-issue.ts +++ b/src/handlers/add-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function addIssue(context: Context) { const { @@ -16,7 +17,8 @@ export async function addIssue(context: Context) { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.createIssue(nodeId, payload, isPrivate, markdown, authorId); + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.createIssue(nodeId, payload, isPrivate, cleanedIssue, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error creating issue:`, { error: error, stack: error.stack }); diff --git a/src/handlers/issue-deduplication.ts b/src/handlers/issue-deduplication.ts index 6174e4f..7ca35b3 100644 --- a/src/handlers/issue-deduplication.ts +++ b/src/handlers/issue-deduplication.ts @@ -5,16 +5,26 @@ import { IssuePayload } from "../types/payload"; export interface IssueGraphqlResponse { node: { title: string; + number: number; url: string; + body: string; + repository: { + name: string; + owner: { + login: string; + }; + }; }; similarity: string; + mostSimilarSentence: { sentence: string; similarity: number; index: number }; } /** - * Check if an issue is similar to any existing issues in the database - * @param context - * @returns true if the issue is similar to an existing issue, false otherwise - */ + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a footnote is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueChecker(context: Context): Promise { const { logger, @@ -23,14 +33,15 @@ export async function issueChecker(context: Context): Promise { } = context; const { payload } = context as { payload: IssuePayload }; const issue = payload.issue; - const issueContent = issue.body + issue.title; - - // Fetch all similar issues based on settings.warningThreshold - const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.warningThreshold, issue.node_id); + let issueBody = issue.body; + if (!issueBody) { + logger.info("Issue body is empty"); + return false; + } + issueBody = removeFootnotes(issueBody); + const similarIssues = await supabase.issue.findSimilarIssues(issue.title + removeFootnotes(issueBody), context.config.warningThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { const matchIssues = similarIssues.filter((issue) => issue.similarity >= context.config.matchThreshold); - - // Handle issues that match the MATCH_THRESHOLD (Very Similar) if (matchIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.matchThreshold} already exists`); await octokit.issues.update({ @@ -40,27 +51,86 @@ export async function issueChecker(context: Context): Promise { state: "closed", state_reason: "not_planned", }); + return true; } - // Handle issues that match the settings.warningThreshold but not the MATCH_THRESHOLD if (similarIssues.length > 0) { logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`); - await handleSimilarIssuesComment(context, payload, issue.number, similarIssues); + await handleSimilarIssuesComment(context, payload, issueBody, issue.number, similarIssues); return true; } + } else { + //Use the IssueBody (Without footnotes) to update the issue when no similar issues are found + await octokit.issues.update({ + owner: payload.repository.owner.login, + repo: payload.repository.name, + issue_number: issue.number, + body: issueBody, + }); } - + context.logger.info("No similar issues found"); return false; } +function matchRepoOrgToSimilarIssueRepoOrg(repoOrg: string, similarIssueRepoOrg: string, repoName: string, similarIssueRepoName: string): boolean { + return repoOrg === similarIssueRepoOrg && repoName === similarIssueRepoName; +} + /** - * Handle commenting on an issue with similar issues information - * @param context - * @param payload - * @param issueNumber - * @param similarIssues + * Finds the most similar sentence in a similar issue to a sentence in the current issue. + * @param issueContent The content of the current issue + * @param similarIssueContent The content of the similar issue + * @returns The most similar sentence and its similarity score */ -async function handleSimilarIssuesComment(context: Context, payload: IssuePayload, issueNumber: number, similarIssues: IssueSimilaritySearchResult[]) { +function findMostSimilarSentence(issueContent: string, similarIssueContent: string): { sentence: string; similarity: number; index: number } { + // Regex to match sentences while preserving URLs + const sentenceRegex = /([^.!?\s][^.!?]*(?:[.!?](?!['"]?\s|$)[^.!?]*)*[.!?]?['"]?(?=\s|$))/g; + + // Function to split text into sentences while preserving URLs + const splitIntoSentences = (text: string): string[] => { + const sentences: string[] = []; + let match; + while ((match = sentenceRegex.exec(text)) !== null) { + sentences.push(match[0].trim()); + } + return sentences; + }; + + const issueSentences = splitIntoSentences(issueContent); + const similarIssueSentences = splitIntoSentences(similarIssueContent); + + let maxSimilarity = 0; + let mostSimilarSentence = ""; + let mostSimilarIndex = -1; + + issueSentences.forEach((sentence, index) => { + const similarities = similarIssueSentences.map((similarSentence) => { + const editDistance = findEditDistance(sentence, similarSentence); + const maxLength = Math.max(sentence.length, similarSentence.length); + // Normalized similarity (edit distance) + return 1 - editDistance / maxLength; + }); + const maxSentenceSimilarity = Math.max(...similarities); + if (maxSentenceSimilarity > maxSimilarity) { + maxSimilarity = maxSentenceSimilarity; + mostSimilarSentence = sentence; + mostSimilarIndex = index; + } + }); + + if (!mostSimilarSentence) { + throw new Error("No similar sentence found"); + } + return { sentence: mostSimilarSentence, similarity: maxSimilarity, index: mostSimilarIndex }; +} + +async function handleSimilarIssuesComment( + context: Context, + payload: IssuePayload, + issueBody: string, + issueNumber: number, + similarIssues: IssueSimilaritySearchResult[] +) { const issueList: IssueGraphqlResponse[] = await Promise.all( similarIssues.map(async (issue: IssueSimilaritySearchResult) => { const issueUrl: IssueGraphqlResponse = await context.octokit.graphql( @@ -69,42 +139,125 @@ async function handleSimilarIssuesComment(context: Context, payload: IssuePayloa ... on Issue { title url + number + body + repository { + name + owner { + login + } + } } } }`, { issueNodeId: issue.issue_id } ); - issueUrl.similarity = (issue.similarity * 100).toFixed(2); + issueUrl.similarity = Math.round(issue.similarity * 100).toString(); + issueUrl.mostSimilarSentence = findMostSimilarSentence(issueBody, issueUrl.node.body); return issueUrl; }) ); - const commentBody = issueList.map((issue) => `- [${issue.node.title}](${issue.node.url}) Similarity: ${issue.similarity}`).join("\n"); - const body = `This issue seems to be similar to the following issue(s):\n\n${commentBody}`; + const relevantIssues = issueList.filter((issue) => + matchRepoOrgToSimilarIssueRepoOrg(payload.repository.owner.login, issue.node.repository.owner.login, payload.repository.name, issue.node.repository.name) + ); + + if (relevantIssues.length === 0) { + context.logger.info("No relevant issues found with the same repository and organization"); + } - const existingComments = await context.octokit.issues.listComments({ + if (!issueBody) { + return; + } + // Find existing footnotes in the body + const footnoteRegex = /\[\^(\d+)\^\]/g; + const existingFootnotes = issueBody.match(footnoteRegex) || []; + const highestFootnoteIndex = existingFootnotes.length > 0 ? Math.max(...existingFootnotes.map((fn) => parseInt(fn.match(/\d+/)?.[0] ?? "0"))) : 0; + let updatedBody = issueBody; + let footnotes: string[] | undefined; + // Sort relevant issues by similarity in ascending order + relevantIssues.sort((a, b) => parseFloat(a.similarity) - parseFloat(b.similarity)); + + relevantIssues.forEach((issue, index) => { + const footnoteIndex = highestFootnoteIndex + index + 1; // Continue numbering from the highest existing footnote number + const footnoteRef = `[^0${footnoteIndex}^]`; + const modifiedUrl = issue.node.url.replace("https://github.com", "https://www.github.com"); + const { sentence } = issue.mostSimilarSentence; + + // Insert footnote reference in the body + const sentencePattern = new RegExp(`${sentence.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`, "g"); + updatedBody = updatedBody.replace(sentencePattern, `${sentence}${footnoteRef}`); + + // Initialize footnotes array if not already done + if (!footnotes) { + footnotes = []; + } + + // Add new footnote to the array + footnotes.push(`${footnoteRef}: ⚠ ${issue.similarity}% possible duplicate - [${issue.node.title}](${modifiedUrl}#${issue.node.number})\n\n`); + }); + + // Append new footnotes to the body, keeping the previous ones + if (footnotes) { + updatedBody += "\n\n" + footnotes.join(""); + } + + // Update the issue with the modified body + await context.octokit.issues.update({ owner: payload.repository.owner.login, repo: payload.repository.name, issue_number: issueNumber, + body: updatedBody, }); +} - const existingComment = existingComments.data.find( - (comment) => comment.body && comment.body.includes("This issue seems to be similar to the following issue(s)") - ); +/** + * Finds the edit distance between two strings using dynamic programming. + * The edit distance is a way of quantifying how dissimilar two strings are to one another by + * counting the minimum number of operations required to transform one string into the other. + * For more information, see: https://en.wikipedia.org/wiki/Edit_distance + * @param sentenceA The first string + * @param sentenceB The second string + * @returns The edit distance between the two strings + */ +function findEditDistance(sentenceA: string, sentenceB: string): number { + const lengthA = sentenceA.length; + const lengthB = sentenceB.length; + const distanceMatrix: number[][] = Array.from({ length: lengthA + 1 }, () => Array.from({ length: lengthB + 1 }, () => 0)); - if (existingComment) { - await context.octokit.issues.updateComment({ - owner: payload.repository.owner.login, - repo: payload.repository.name, - comment_id: existingComment.id, - body: body, - }); - } else { - await context.octokit.issues.createComment({ - owner: payload.repository.owner.login, - repo: payload.repository.name, - issue_number: issueNumber, - body: body, + for (let indexA = 0; indexA <= lengthA; indexA++) { + for (let indexB = 0; indexB <= lengthB; indexB++) { + if (indexA === 0) { + distanceMatrix[indexA][indexB] = indexB; + } else if (indexB === 0) { + distanceMatrix[indexA][indexB] = indexA; + } else if (sentenceA[indexA - 1] === sentenceB[indexB - 1]) { + distanceMatrix[indexA][indexB] = distanceMatrix[indexA - 1][indexB - 1]; + } else { + distanceMatrix[indexA][indexB] = + 1 + Math.min(distanceMatrix[indexA - 1][indexB], distanceMatrix[indexA][indexB - 1], distanceMatrix[indexA - 1][indexB - 1]); + } + } + } + + return distanceMatrix[lengthA][lengthB]; +} + +/** + * Removes all footnotes from the issue content. + * This includes both the footnote references in the body and the footnote definitions at the bottom. + * @param content The content of the issue + * @returns The content without footnotes + */ +export function removeFootnotes(content: string): string { + const footnoteDefRegex = /\[\^(\d+)\^\]: ⚠ \d+% possible duplicate - [^\n]+(\n|$)/g; + const footnotes = content.match(footnoteDefRegex); + let contentWithoutFootnotes = content.replace(footnoteDefRegex, ""); + if (footnotes) { + footnotes.forEach((footnote) => { + const footnoteNumber = footnote.match(/\d+/)?.[0]; + contentWithoutFootnotes = contentWithoutFootnotes.replace(new RegExp(`\\[\\^${footnoteNumber}\\^\\]`, "g"), ""); }); } + return contentWithoutFootnotes.replace(/\n{2,}/g, "\n").trim(); } diff --git a/src/handlers/issue-matching.ts b/src/handlers/issue-matching.ts index cc1d060..f2e866a 100644 --- a/src/handlers/issue-matching.ts +++ b/src/handlers/issue-matching.ts @@ -24,17 +24,12 @@ export interface IssueGraphqlResponse { similarity: number; } -const commentBuilder = (matchResultArray: Map>): string => { - const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; - matchResultArray.forEach((issues, assignee) => { - commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); - issues.forEach((issue) => { - commentLines.push(issue); - }); - }); - return commentLines.join("\n"); -}; - +/** + * Checks if the current issue is a duplicate of an existing issue. + * If a similar issue is found, a comment is added to the current issue. + * @param context The context object + * @returns True if a similar issue is found, false otherwise + **/ export async function issueMatching(context: Context) { const { logger, @@ -45,15 +40,10 @@ export async function issueMatching(context: Context) { const issue = payload.issue; const issueContent = issue.body + issue.title; const commentStart = ">The following contributors may be suitable for this task:"; - - // On Adding the labels to the issue, the bot should - // create a new comment with users who completed task most similar to the issue - // if the comment already exists, it should update the comment with the new users const matchResultArray: Map> = new Map(); const similarIssues = await supabase.issue.findSimilarIssues(issueContent, context.config.jobMatchingThreshold, issue.node_id); if (similarIssues && similarIssues.length > 0) { - // Find the most similar issue and the users who completed the task - similarIssues.sort((a, b) => b.similarity - a.similarity); + similarIssues.sort((a, b) => b.similarity - a.similarity); // Sort by similarity const fetchPromises = similarIssues.map(async (issue) => { const issueObject: IssueGraphqlResponse = await context.octokit.graphql( `query ($issueNodeId: ID!) { @@ -84,7 +74,6 @@ export async function issueMatching(context: Context) { issueObject.similarity = issue.similarity; return issueObject; }); - const issueList = await Promise.all(fetchPromises); issueList.forEach((issue) => { if (issue.node.closed && issue.node.stateReason === "COMPLETED" && issue.node.assignees.nodes.length > 0) { @@ -148,3 +137,19 @@ export async function issueMatching(context: Context) { logger.ok(`Successfully created issue comment!`); logger.debug(`Exiting issueMatching handler`); } + +/** + * Builds the comment to be added to the issue + * @param matchResultArray The array of issues to be matched + * @returns The comment to be added to the issue + */ +function commentBuilder(matchResultArray: Map>): string { + const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"]; + matchResultArray.forEach((issues, assignee) => { + commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`); + issues.forEach((issue) => { + commentLines.push(issue); + }); + }); + return commentLines.join("\n"); +} diff --git a/src/handlers/transfer-issue.ts b/src/handlers/transfer-issue.ts new file mode 100644 index 0000000..bf92d47 --- /dev/null +++ b/src/handlers/transfer-issue.ts @@ -0,0 +1,32 @@ +import { Context } from "../types"; +import { IssueTransferPayload } from "../types/payload"; + +export async function issueTransfer(context: Context) { + const { + logger, + adapters: { supabase }, + } = context; + const { changes, issue } = (context as { payload: IssueTransferPayload }).payload; + const nodeId = issue.node_id; + const { new_issue, new_repository } = changes; + //Fetch the new details of the issue + const newIssueNodeId = new_issue.node_id; + const markdown = new_issue.body + " " + new_issue.title || null; + const authorId = new_issue.user?.id || -1; + const isPrivate = new_repository.private; + + //Delete the issue from the old repository + //Create the new issue in the new repository + try { + await supabase.issue.deleteIssue(nodeId); + await supabase.issue.createIssue(newIssueNodeId, new_issue, isPrivate, markdown, authorId); + } catch (error) { + if (error instanceof Error) { + logger.error(`Error transferring issue:`, { error: error, stack: error.stack }); + throw error; + } else { + logger.error(`Error transferring issue:`, { err: error, error: new Error() }); + throw error; + } + } +} diff --git a/src/handlers/update-comments.ts b/src/handlers/update-comments.ts index b1b9d18..6cc9545 100644 --- a/src/handlers/update-comments.ts +++ b/src/handlers/update-comments.ts @@ -7,15 +7,18 @@ export async function updateComment(context: Context) { adapters: { supabase }, } = context; const { payload } = context as { payload: CommentPayload }; + const markdown = payload.comment.body; + const authorId = payload.comment.user?.id || -1; const nodeId = payload.comment.node_id; const isPrivate = payload.repository.private; - const markdown = payload.comment.body || null; + const issueId = payload.issue.node_id; + // Fetch the previous comment and update it in the db try { if (!markdown) { throw new Error("Comment body is empty"); } - await supabase.comment.updateComment(markdown, nodeId, payload, isPrivate); + await supabase.comment.updateComment(markdown, nodeId, authorId, payload, isPrivate, issueId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating comment:`, { error: error, stack: error.stack }); diff --git a/src/handlers/update-issue.ts b/src/handlers/update-issue.ts index 763b2ba..cec5de2 100644 --- a/src/handlers/update-issue.ts +++ b/src/handlers/update-issue.ts @@ -1,5 +1,6 @@ import { Context } from "../types"; import { IssuePayload } from "../types/payload"; +import { removeFootnotes } from "./issue-deduplication"; export async function updateIssue(context: Context) { const { @@ -11,12 +12,15 @@ export async function updateIssue(context: Context) { const nodeId = payload.issue.node_id; const isPrivate = payload.repository.private; const markdown = payload.issue.body + " " + payload.issue.title || null; + const authorId = payload.issue.user?.id || -1; // Fetch the previous issue and update it in the db try { if (!markdown) { throw new Error("Issue body is empty"); } - await supabase.issue.updateIssue(markdown, nodeId, payloadObject, isPrivate); + //clean issue by removing footnotes + const cleanedIssue = removeFootnotes(markdown); + await supabase.issue.updateIssue(cleanedIssue, nodeId, payloadObject, isPrivate, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error updating issue:`, { error: error, stack: error.stack }); diff --git a/src/handlers/validator.ts b/src/handlers/validator.ts new file mode 100644 index 0000000..6438455 --- /dev/null +++ b/src/handlers/validator.ts @@ -0,0 +1,36 @@ +import { TransformDecodeCheckError, TransformDecodeError, Value, ValueError } from "@sinclair/typebox/value"; +import { Env, envValidator, PluginSettings, pluginSettingsSchema, pluginSettingsValidator } from "../types"; + +export function validateAndDecodeSchemas(env: Env, rawSettings: object) { + const errors: ValueError[] = []; + const settings = Value.Default(pluginSettingsSchema, rawSettings) as PluginSettings; + + if (!pluginSettingsValidator.test(settings)) { + for (const error of pluginSettingsValidator.errors(settings)) { + console.error(error); + errors.push(error); + } + } + + if (!envValidator.test(env)) { + for (const error of envValidator.errors(env)) { + console.error(error); + errors.push(error); + } + } + + if (errors.length) { + throw { errors }; + } + + try { + const decodedEnv = Value.Decode(envValidator.schema, env); + const decodedSettings = Value.Decode(pluginSettingsSchema, settings); + return { decodedEnv, decodedSettings }; + } catch (e) { + if (e instanceof TransformDecodeCheckError || e instanceof TransformDecodeError) { + throw { errors: [e.error] }; + } + throw e; + } +} diff --git a/src/main.ts b/src/main.ts index 23a3e74..0b9b9a4 100644 --- a/src/main.ts +++ b/src/main.ts @@ -28,7 +28,7 @@ export async function run() { await plugin(inputs, env); - return returnDataToKernel(inputs.authToken, inputs.stateId, {}); + return returnDataToKernel(process.env.GITHUB_TOKEN, inputs.stateId, {}); } async function returnDataToKernel(repoToken: string, stateId: string, output: object) { @@ -36,7 +36,7 @@ async function returnDataToKernel(repoToken: string, stateId: string, output: ob await octokit.repos.createDispatchEvent({ owner: github.context.repo.owner, repo: github.context.repo.repo, - event_type: "return_data_to_ubiquibot_kernel", + event_type: "return-data-to-ubiquity-os-kernel", client_payload: { state_id: stateId, output: JSON.stringify(output), diff --git a/src/plugin.ts b/src/plugin.ts index 197948b..5cdf193 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -1,20 +1,20 @@ import { Octokit } from "@octokit/rest"; -import { Env, PluginInputs } from "./types"; -import { Context } from "./types"; -import { isIssueCommentEvent, isIssueEvent } from "./types/typeguards"; -import { LogLevel, Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { Database } from "./types/database"; -import { createAdapters } from "./adapters"; import { createClient } from "@supabase/supabase-js"; +import { LogLevel, Logs } from "@ubiquity-os/ubiquity-os-logger"; +import { VoyageAIClient } from "voyageai"; +import { createAdapters } from "./adapters"; import { addComments } from "./handlers/add-comments"; -import { updateComment } from "./handlers/update-comments"; +import { addIssue } from "./handlers/add-issue"; import { deleteComment } from "./handlers/delete-comments"; -import { VoyageAIClient } from "voyageai"; import { deleteIssues } from "./handlers/delete-issue"; -import { addIssue } from "./handlers/add-issue"; -import { updateIssue } from "./handlers/update-issue"; import { issueChecker } from "./handlers/issue-deduplication"; import { issueMatching } from "./handlers/issue-matching"; +import { updateComment } from "./handlers/update-comments"; +import { updateIssue } from "./handlers/update-issue"; +import { Context, Env, PluginInputs } from "./types"; +import { Database } from "./types/database"; +import { isIssueCommentEvent, isIssueEvent } from "./types/typeguards"; +import { issueTransfer } from "./handlers/transfer-issue"; /** * The main plugin function. Split for easier testing. @@ -33,8 +33,8 @@ export async function runPlugin(context: Context) { } else if (isIssueEvent(context)) { switch (eventName) { case "issues.opened": - await issueChecker(context); await addIssue(context); + await issueChecker(context); return await issueMatching(context); case "issues.edited": await issueChecker(context); @@ -42,6 +42,8 @@ export async function runPlugin(context: Context) { return await issueMatching(context); case "issues.deleted": return await deleteIssues(context); + case "issues.transferred": + return await issueTransfer(context); } } else if (eventName == "issues.labeled") { return await issueMatching(context); diff --git a/src/types/context.ts b/src/types/context.ts index b11ac2f..46f2e67 100644 --- a/src/types/context.ts +++ b/src/types/context.ts @@ -1,9 +1,9 @@ import { Octokit } from "@octokit/rest"; import { EmitterWebhookEvent as WebhookEvent, EmitterWebhookEventName as WebhookEventName } from "@octokit/webhooks"; +import { Logs } from "@ubiquity-os/ubiquity-os-logger"; +import { createAdapters } from "../adapters"; import { Env } from "./env"; import { PluginSettings } from "./plugin-inputs"; -import { Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { createAdapters } from "../adapters"; /** * Update `manifest.json` with any events you want to support like so: @@ -17,7 +17,8 @@ export type SupportedEventsU = | "issues.opened" | "issues.edited" | "issues.deleted" - | "issues.labeled"; + | "issues.labeled" + | "issues.transferred"; export type SupportedEvents = { [K in SupportedEventsU]: K extends WebhookEventName ? WebhookEvent : never; diff --git a/src/types/payload.ts b/src/types/payload.ts index 395fa09..d1f3d64 100644 --- a/src/types/payload.ts +++ b/src/types/payload.ts @@ -1,3 +1,4 @@ import { EmitterWebhookEvent as WebhookEvent } from "@octokit/webhooks"; export type CommentPayload = WebhookEvent<"issue_comment">["payload"]; export type IssuePayload = WebhookEvent<"issues">["payload"]; +export type IssueTransferPayload = WebhookEvent<"issues.transferred">["payload"]; diff --git a/src/types/process-env.d.ts b/src/types/process-env.d.ts new file mode 100644 index 0000000..a2bb98f --- /dev/null +++ b/src/types/process-env.d.ts @@ -0,0 +1,9 @@ +declare global { + namespace NodeJS { + interface ProcessEnv { + GITHUB_TOKEN: string; + } + } +} + +export {}; diff --git a/src/types/typeguards.ts b/src/types/typeguards.ts index 01a6c26..aae7236 100644 --- a/src/types/typeguards.ts +++ b/src/types/typeguards.ts @@ -20,6 +20,11 @@ export function isIssueCommentEvent(context: Context): context is Context<"issue * * @param context The context object. */ -export function isIssueEvent(context: Context): context is Context<"issues.opened" | "issues.edited" | "issues.deleted"> { - return context.eventName === "issues.opened" || context.eventName === "issues.edited" || context.eventName === "issues.deleted"; +export function isIssueEvent(context: Context): context is Context<"issues.opened" | "issues.edited" | "issues.deleted" | "issues.transferred"> { + return ( + context.eventName === "issues.opened" || + context.eventName === "issues.edited" || + context.eventName === "issues.deleted" || + context.eventName === "issues.transferred" + ); } diff --git a/src/worker.ts b/src/worker.ts index 40df400..72b9a61 100644 --- a/src/worker.ts +++ b/src/worker.ts @@ -1,18 +1,16 @@ -import { Value } from "@sinclair/typebox/value"; -import { plugin } from "./plugin"; -import { Env, envValidator, pluginSettingsSchema, pluginSettingsValidator } from "./types"; import manifest from "../manifest.json"; +import { validateAndDecodeSchemas } from "./handlers/validator"; +import { plugin } from "./plugin"; +import { Env } from "./types"; export default { async fetch(request: Request, env: Env): Promise { try { - if (request.method === "GET") { - const url = new URL(request.url); - if (url.pathname === "/manifest.json") { - return new Response(JSON.stringify(manifest), { - headers: { "content-type": "application/json" }, - }); - } + const url = new URL(request.url); + if (url.pathname === "/manifest.json" && request.method === "GET") { + return new Response(JSON.stringify(manifest), { + headers: { "content-type": "application/json" }, + }); } if (request.method !== "POST") { return new Response(JSON.stringify({ error: `Only POST requests are supported.` }), { @@ -29,33 +27,10 @@ export default { } const webhookPayload = await request.json(); - const settings = Value.Decode(pluginSettingsSchema, Value.Default(pluginSettingsSchema, webhookPayload.settings)); - - if (!pluginSettingsValidator.test(settings)) { - const errors: string[] = []; - for (const error of pluginSettingsValidator.errors(settings)) { - console.error(error); - errors.push(`${error.path}: ${error.message}`); - } - return new Response(JSON.stringify({ error: `Error: "Invalid settings provided. ${errors.join("; ")}"` }), { - status: 400, - headers: { "content-type": "application/json" }, - }); - } - if (!envValidator.test(env)) { - const errors: string[] = []; - for (const error of envValidator.errors(env)) { - console.error(error); - errors.push(`${error.path}: ${error.message}`); - } - return new Response(JSON.stringify({ error: `Error: "Invalid environment provided. ${errors.join("; ")}"` }), { - status: 400, - headers: { "content-type": "application/json" }, - }); - } + const { decodedSettings, decodedEnv } = validateAndDecodeSchemas(env, webhookPayload.settings); - webhookPayload.settings = settings; - await plugin(webhookPayload, env); + webhookPayload.settings = decodedSettings; + await plugin(webhookPayload, decodedEnv); return new Response(JSON.stringify("OK"), { status: 200, headers: { "content-type": "application/json" } }); } catch (error) { return handleUncaughtError(error); @@ -63,8 +38,8 @@ export default { }, }; -function handleUncaughtError(error: unknown) { - console.error(error); +function handleUncaughtError(errors: unknown) { + console.error(errors); const status = 500; - return new Response(JSON.stringify({ error }), { status: status, headers: { "content-type": "application/json" } }); + return new Response(JSON.stringify(errors), { status: status, headers: { "content-type": "application/json" } }); } diff --git a/supabase/migrations/20240930052523_enable_rls.sql b/supabase/migrations/20240930052523_enable_rls.sql new file mode 100644 index 0000000..623072b --- /dev/null +++ b/supabase/migrations/20240930052523_enable_rls.sql @@ -0,0 +1,2 @@ +ALTER TABLE "issues" ENABLE ROW LEVEL SECURITY; +ALTER TABLE "issue_comments" ENABLE ROW LEVEL SECURITY; \ No newline at end of file diff --git a/supabase/migrations/20241002004403_issue_comments.sql b/supabase/migrations/20241002004403_issue_comments.sql new file mode 100644 index 0000000..9ebb751 --- /dev/null +++ b/supabase/migrations/20241002004403_issue_comments.sql @@ -0,0 +1,18 @@ +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + 1 - (l2_distance(current_quantized, embedding)) AS similarity + FROM issues + WHERE id <> current_id + AND 1 - (l2_distance(current_quantized, embedding)) > threshold + ORDER BY similarity + LIMIT top_k; -- Limit the number of results to top_k +END; +$$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20241008165113_function_issue.sql b/supabase/migrations/20241008165113_function_issue.sql new file mode 100644 index 0000000..3223d1d --- /dev/null +++ b/supabase/migrations/20241008165113_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + (l2_distance(current_quantized, embedding)) AS similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND l2_distance(current_quantized, embedding) > threshold -- Ensure similarity exceeds threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; diff --git a/supabase/migrations/20241008175109_function_issue.sql b/supabase/migrations/20241008175109_function_issue.sql new file mode 100644 index 0000000..6c77ce3 --- /dev/null +++ b/supabase/migrations/20241008175109_function_issue.sql @@ -0,0 +1,38 @@ +DROP FUNCTION IF EXISTS find_similar_issues; + +CREATE OR REPLACE FUNCTION find_similar_issues(current_id VARCHAR, query_embedding vector(1024), threshold float8, top_k INT) +RETURNS TABLE(issue_id VARCHAR, issue_plaintext TEXT, similarity float8) AS $$ +DECLARE + current_quantized vector(1024); + current_repo TEXT; + current_org TEXT; +BEGIN + -- Ensure the query_embedding is in the correct format + current_quantized := query_embedding; + + -- Extract the current issue's repo and org from the payload + SELECT + payload->'repository'->>'name'::text, + payload->'repository'->'owner'->>'login'::text + INTO current_repo, current_org + FROM issues + WHERE id = current_id; + + -- Check if the current issue has valid repo and org + IF current_repo IS NULL OR current_org IS NULL THEN + RETURN; -- Exit if current issue's repo or org is null + END IF; + + RETURN QUERY + SELECT id AS issue_id, + plaintext AS issue_plaintext, + ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) as similarity + FROM issues + WHERE id <> current_id + AND current_repo = payload->'repository'->>'name'::text + AND current_org = payload->'repository'->'owner'->>'login'::text + AND ((0.5 * inner_product(current_quantized, embedding)) + 0.5 * (1 / (1 + l2_distance(current_quantized, embedding)))) > threshold + ORDER BY similarity DESC + LIMIT top_k; +END; +$$ LANGUAGE plpgsql; diff --git a/tests/__mocks__/adapter.ts b/tests/__mocks__/adapter.ts index d1f634c..abb271f 100644 --- a/tests/__mocks__/adapter.ts +++ b/tests/__mocks__/adapter.ts @@ -36,21 +36,29 @@ export function createMockAdapters(context: Context) { commentMap.set(commentNodeId, { id: commentNodeId, plaintext, author_id: authorId, embedding, issue_id: issueId }); } ), - updateComment: jest.fn(async (plaintext: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) => { - if (!commentMap.has(commentNodeId)) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const originalComment = commentMap.get(commentNodeId); - if (!originalComment) { - throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); - } - const { id, author_id } = originalComment; - const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); - if (isPrivate) { - plaintext = null; + updateComment: jest.fn( + async ( + plaintext: string | null, + commentNodeId: string, + authorId: number, + payload: Record | null, + isPrivate: boolean, + issueId: string + ) => { + if (!commentMap.has(commentNodeId)) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const originalComment = commentMap.get(commentNodeId); + if (!originalComment) { + throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); + } + const embedding = await context.adapters.voyage.embedding.createEmbedding(plaintext); + if (isPrivate) { + plaintext = null; + } + commentMap.set(commentNodeId, { id: issueId, plaintext, author_id: authorId, embedding, payload }); } - commentMap.set(commentNodeId, { id, plaintext, author_id, embedding }); - }), + ), deleteComment: jest.fn(async (commentNodeId: string) => { if (!commentMap.has(commentNodeId)) { throw new Error(STRINGS.COMMENT_DOES_NOT_EXIST); diff --git a/tests/main.test.ts b/tests/main.test.ts index bc4ed19..27caeeb 100644 --- a/tests/main.test.ts +++ b/tests/main.test.ts @@ -1,19 +1,18 @@ // cSpell:disable +import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it } from "@jest/globals"; import { drop } from "@mswjs/data"; -import { db } from "./__mocks__/db"; -import { server } from "./__mocks__/node"; -import { expect, describe, beforeAll, beforeEach, afterAll, afterEach, it } from "@jest/globals"; -import { Context, SupportedEvents } from "../src/types/context"; import { Octokit } from "@octokit/rest"; -import { STRINGS } from "./__mocks__/strings"; -import { createComment, setupTests } from "./__mocks__/helpers"; -import manifest from "../manifest.json"; +import { Logs } from "@ubiquity-os/ubiquity-os-logger"; import dotenv from "dotenv"; -import { Logs } from "@ubiquity-dao/ubiquibot-logger"; -import { Env } from "../src/types"; import { runPlugin } from "../src/plugin"; +import { Env } from "../src/types"; +import { Context, SupportedEvents } from "../src/types/context"; import { CommentMock, createMockAdapters } from "./__mocks__/adapter"; +import { db } from "./__mocks__/db"; +import { createComment, setupTests } from "./__mocks__/helpers"; +import { server } from "./__mocks__/node"; +import { STRINGS } from "./__mocks__/strings"; dotenv.config(); jest.requireActual("@octokit/rest"); @@ -35,17 +34,6 @@ describe("Plugin tests", () => { await setupTests(); }); - it("Should serve the manifest file", async () => { - const worker = (await import("../src/worker")).default; - const response = await worker.fetch(new Request("http://localhost/manifest.json"), { - SUPABASE_KEY: "test", - SUPABASE_URL: "test", - VOYAGEAI_API_KEY: "test", - }); - const content = await response.json(); - expect(content).toEqual(manifest); - }); - it("When a comment is created it should add it to the database", async () => { const { context } = createContext(STRINGS.HELLO_WORLD, 1, 1, 1, 1, "sasasCreate"); await runPlugin(context); @@ -165,8 +153,8 @@ function createContextInner( } as Context["payload"], config: { warningThreshold: 0.75, - matchThreshold: 0.95, - jobMatchingThreshold: 0.95, + matchThreshold: 0.9, + jobMatchingThreshold: 0.75, }, adapters: {} as Context["adapters"], logger: new Logs("debug"), diff --git a/yarn.lock b/yarn.lock index 519f6f7..f81a4c5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2155,10 +2155,10 @@ "@typescript-eslint/types" "7.13.1" eslint-visitor-keys "^3.4.3" -"@ubiquity-dao/ubiquibot-logger@^1.3.0": - version "1.3.0" - resolved "https://registry.yarnpkg.com/@ubiquity-dao/ubiquibot-logger/-/ubiquibot-logger-1.3.0.tgz#b07364658be95b3be3876305c66b2adc906e9590" - integrity sha512-ifkd7fB2OMTSt3OL9L14bCIvCMXV+IHFdJYU5S8FUzE2U88b4xKxuEAYDFX+DX3wwDEswFAVUwx5aP3QcMIRWA== +"@ubiquity-os/ubiquity-os-logger@^1.3.2": + version "1.3.2" + resolved "https://registry.yarnpkg.com/@ubiquity-os/ubiquity-os-logger/-/ubiquity-os-logger-1.3.2.tgz#4423bc0baeac5c2f73123d15fd961310521163cd" + integrity sha512-oTIzR8z4jAQmaeJp98t1bZUKE3Ws9pas0sbxt58fC37MwXclPMWrLO+a0JlhPkdJYsvpv/q/79wC2MKVhOIVXQ== JSONStream@^1.3.5: version "1.3.5" @@ -6407,10 +6407,10 @@ summary@2.1.0: resolved "https://registry.yarnpkg.com/summary/-/summary-2.1.0.tgz#be8a49a0aa34eb6ceea56042cae88f8add4b0885" integrity sha512-nMIjMrd5Z2nuB2RZCKJfFMjgS3fygbeyGk9PxPPaJR1RIcyN9yn4A63Isovzm3ZtQuEkLBVgMdPup8UeLH7aQw== -supabase@1.191.3: - version "1.191.3" - resolved "https://registry.yarnpkg.com/supabase/-/supabase-1.191.3.tgz#9e28643b10bc458f8c13a1e3f3ba9172679093f0" - integrity sha512-5tIG7mPc5lZ9QRbkZssyHiOsx42qGFaVqclauXv+1fJAkZnfA28d0pzEDvfs33+w8YTReO5nNaWAgyzkWQQwfA== +supabase@1.200.3: + version "1.200.3" + resolved "https://registry.yarnpkg.com/supabase/-/supabase-1.200.3.tgz#52cd53fd0bcd46af0748c8ffa61fac05e21a0a23" + integrity sha512-3NdhqBkfPVlm+rAhWQoVcyr54kykuAlHav/GWaAoQEHBDbbYI1lhbDzugk8ryQg92vSLwr3pWz0s4Hjdte8WyQ== dependencies: bin-links "^4.0.3" https-proxy-agent "^7.0.2" @@ -6656,10 +6656,10 @@ typescript-eslint@7.13.1: "@typescript-eslint/parser" "7.13.1" "@typescript-eslint/utils" "7.13.1" -typescript@5.4.5: - version "5.4.5" - resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.4.5.tgz#42ccef2c571fdbd0f6718b1d1f5e6e5ef006f611" - integrity sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ== +typescript@5.6.2: + version "5.6.2" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.6.2.tgz#d1de67b6bef77c41823f822df8f0b3bcff60a5a0" + integrity sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw== uc.micro@^2.0.0, uc.micro@^2.1.0: version "2.1.0"