Skip to content

Commit

Permalink
chore: remove const fn, update db fn, update findSimilarContent
Browse files Browse the repository at this point in the history
  • Loading branch information
Keyrxng committed Sep 30, 2024
1 parent 2915e70 commit 88489d8
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 24 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This is a plugin for [Ubiquibot](https://github.com/ubiquity/ubiquibot-kernel).
## Usage

- Add the following to your `.ubiquibot-config.yml` file with the appropriate URL:

```yaml
- plugin: https://ubiquity-os-comment-vector-embeddings-main.ubiquity.workers.dev
with:
Expand Down
10 changes: 9 additions & 1 deletion manifest.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{
"name": "Generate vector embeddings",
"description": "Enables the storage, updating, and deletion of issue comment embeddings.",
"ubiquity:listeners": ["issue_comment.created", "issue_comment.edited", "issue_comment.deleted", "issues.opened", "issues.edited", "issues.deleted", "issues.labeled"]
"ubiquity:listeners": [
"issue_comment.created",
"issue_comment.edited",
"issue_comment.deleted",
"issues.opened",
"issues.edited",
"issues.deleted",
"issues.labeled"
]
}
18 changes: 9 additions & 9 deletions src/adapters/supabase/helpers/embeddings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ export class Embeddings extends Super {
throw new Error(this.context.logger.error("Markdown not found", { sourceId })?.logMessage.raw);
}

const embeddingData = await this.getEmbedding(sourceId);

if (!embeddingData) {
return await this.createEmbedding(sourceId, type, body, metadata);
}

const embedding = await this._embedWithVoyage(body);

const toStore: Omit<CommentType, "created_at"> = {
Expand All @@ -123,12 +129,6 @@ export class Embeddings extends Super {
modified_at: new Date().toISOString(),
};

const embeddingData = await this.getEmbedding(sourceId);

if (!embeddingData) {
return await this.createEmbedding(sourceId, type, body, metadata);
}

const { error } = await this.supabase.from("content").update(toStore).eq("source_id", sourceId);

if (error) {
Expand Down Expand Up @@ -157,10 +157,10 @@ export class Embeddings extends Super {

// Working with embeddings

async findSimilarIssues(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[]> {
async findSimilarContent(markdown: string, threshold: number, currentId: string): Promise<IssueSimilaritySearchResult[]> {
const embedding = await this._embedWithVoyage(markdown);
const { data, error } = await this.supabase.rpc("find_similar_issues", {
current_id: currentId,
const { data, error } = await this.supabase.rpc("find_similar_content", {
curr_source_id: currentId,
query_embedding: embedding,
threshold: threshold,
});
Expand Down
4 changes: 2 additions & 2 deletions src/handlers/issue-matching.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ export interface IssueGraphqlResponse {
similarity: number;
}

const commentBuilder = (matchResultArray: Map<string, Array<string>>): string => {
function commentBuilder(matchResultArray: Map<string, Array<string>>) {
const commentLines: string[] = [">[!NOTE]", ">The following contributors may be suitable for this task:"];
matchResultArray.forEach((issues, assignee) => {
commentLines.push(`>### [${assignee}](https://www.github.com/${assignee})`);
Expand All @@ -50,7 +50,7 @@ export async function issueMatching(context: Context<"issues.opened" | "issues.e
// create a new comment with users who completed task most similar to the issue
// if the comment already exists, it should update the comment with the new users
const matchResultArray: Map<string, Array<string>> = new Map();
const similarIssues = await supabase.embeddings.findSimilarIssues(issueContent, context.config.jobMatchingThreshold, issue.node_id);
const similarIssues = await supabase.embeddings.findSimilarContent(issueContent, context.config.jobMatchingThreshold, issue.node_id);
if (similarIssues && similarIssues.length > 0) {
// Find the most similar issue and the users who completed the task
similarIssues.sort((a, b) => b.similarity - a.similarity);
Expand Down
8 changes: 4 additions & 4 deletions src/handlers/task-deduplication.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export interface IssueGraphqlResponse {
* @param context
* @returns true if the issue is similar to an existing issue, false otherwise
*/
export async function taskSimilaritySearch(context: Context<"issues.opened">): Promise<CallbackResult> {
export async function taskSimilaritySearch(context: Context<"issues.opened" | "issues.edited">): Promise<CallbackResult> {
const {
logger,
adapters: { supabase },
Expand All @@ -26,9 +26,9 @@ export async function taskSimilaritySearch(context: Context<"issues.opened">): P
} = context;
const similarIssues: IssueSimilaritySearchResult[] = [];

similarIssues.push(...(await supabase.embeddings.findSimilarIssues(issue.title, context.config.warningThreshold, issue.node_id)));
similarIssues.push(...(await supabase.embeddings.findSimilarContent(issue.title, context.config.warningThreshold, issue.node_id)));
if (issue.body) {
similarIssues.push(...(await supabase.embeddings.findSimilarIssues(issue.body, context.config.warningThreshold, issue.node_id)));
similarIssues.push(...(await supabase.embeddings.findSimilarContent(issue.body, context.config.warningThreshold, issue.node_id)));
}

logger.info(`Found ${similarIssues.length} similar issues`);
Expand All @@ -53,7 +53,7 @@ export async function taskSimilaritySearch(context: Context<"issues.opened">): P
if (similarIssues.length > 0) {
logger.info(`Similar issue which matches more than ${context.config.warningThreshold} already exists`);
await handleSimilarIssuesComment(context, issue.number, similarIssues);
return { statusCode: 200 }
return { statusCode: 200 };
}
}

Expand Down
1 change: 0 additions & 1 deletion src/proxy-callbacks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ const callbacks = {
"issues.deleted": [deleteTaskEmbedding],
} as ProxyCallbacks;


/**
*
Expand Down
2 changes: 1 addition & 1 deletion supabase/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ enabled = true
port = 54321
# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API
# endpoints. public and storage are always included.
schemas = ["public", "storage", "graphql_public"]
schemas = ["public", "graphql_public"]
# Extra schemas to add to the search_path of every request. public is always included.
extra_search_path = ["public", "extensions"]
# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size
Expand Down
13 changes: 7 additions & 6 deletions supabase/migrations/20240930150911_content.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
create extension if not exists vector;
DROP TABLE IF EXISTS content;

CREATE TABLE IF NOT EXISTS content (
Expand All @@ -13,16 +14,16 @@ CREATE TABLE IF NOT EXISTS content (

ALTER TABLE "content" ENABLE ROW LEVEL SECURITY;

CREATE OR REPLACE FUNCTION find_similar_content(current_source_id VARCHAR, query_embedding vector(1024), threshold float8)
CREATE OR REPLACE FUNCTION find_similar_content(curr_source_id VARCHAR, query_embedding vector(1024), threshold float8)
RETURNS TABLE(source_id VARCHAR, content_plaintext TEXT, similarity float8) AS $$
BEGIN
RETURN QUERY
SELECT source_id,
plaintext AS content_plaintext,
1 - (embedding <=> query_embedding) AS similarity
SELECT content.source_id AS source_id,
content.plaintext AS content_plaintext,
1 - (content.embedding <=> query_embedding) AS similarity
FROM content
WHERE source_id <> current_source_id
AND 1 - (embedding <=> query_embedding) >= threshold
WHERE content.source_id <> curr_source_id
AND 1 - (content.embedding <=> query_embedding) >= threshold
ORDER BY similarity DESC;
END;
$$ LANGUAGE plpgsql;

0 comments on commit 88489d8

Please sign in to comment.