From 1a1599717374d76918fe5e4970bd8b6d7f8815be Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Fri, 13 Sep 2024 03:20:27 -0400 Subject: [PATCH] feat: added cols markdown and plaintext, adds code for conversion from markdown to plaintext --- eslint.config.mjs | 2 +- src/adapters/supabase/helpers/comment.ts | 23 +++++++++------ src/adapters/supabase/helpers/issues.ts | 28 ++++++++++++------- src/adapters/utils/markdown-to-plaintext.ts | 20 +++++++++++++ src/handlers/add-issue.ts | 4 +-- src/handlers/update-comments.ts | 4 +-- src/handlers/update-issue.ts | 4 +-- .../20240912225853_issue_comments.sql | 4 ++- .../20240913070225_issue_comments.sql | 5 ++++ 9 files changed, 67 insertions(+), 27 deletions(-) create mode 100644 src/adapters/utils/markdown-to-plaintext.ts create mode 100644 supabase/migrations/20240913070225_issue_comments.sql diff --git a/eslint.config.mjs b/eslint.config.mjs index f43237a..f594657 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -9,7 +9,7 @@ export default tsEslint.config({ "@typescript-eslint": tsEslint.plugin, "check-file": checkFile, }, - ignores: [".github/knip.ts", "src/types/database.ts"], + ignores: [".github/knip.ts", "src/types/database.ts", "src/adapters/utils/markdown-to-plaintext.ts"], extends: [eslint.configs.recommended, ...tsEslint.configs.recommended, sonarjs.configs.recommended], languageOptions: { parser: tsEslint.parser, diff --git a/src/adapters/supabase/helpers/comment.ts b/src/adapters/supabase/helpers/comment.ts index f2ff62a..3fa08b2 100644 --- a/src/adapters/supabase/helpers/comment.ts +++ b/src/adapters/supabase/helpers/comment.ts @@ -1,10 +1,11 @@ import { SupabaseClient } from "@supabase/supabase-js"; import { SuperSupabase } from "./supabase"; import { Context } from "../../../types/context"; +import { markdownToPlainText } from "../../utils/markdown-to-plaintext"; export interface CommentType { id: string; - plaintext?: string; + markdown?: string; author_id: number; created_at: string; modified_at: string; @@ -17,7 +18,7 @@ export class Comment extends SuperSupabase { } async createComment( - plaintext: string | null, + markdown: string | null, commentNodeId: string, authorId: number, payload: Record | null, @@ -35,14 +36,16 @@ export class Comment extends SuperSupabase { return; } else { //Create the embedding for this comment - const embedding = await this.context.adapters.voyage.embedding.createEmbedding(plaintext); + const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); + let plaintext: string | null = markdownToPlainText(markdown || ""); if (isPrivate) { - plaintext = null as string | null; + markdown = null as string | null; payload = null as Record | null; + plaintext = null as string | null; } const { error } = await this.supabase .from("issue_comments") - .insert([{ id: commentNodeId, plaintext, author_id: authorId, type: "comment", payload, embedding: embedding, issue_id: issueId }]); + .insert([{ id: commentNodeId, markdown, plaintext, author_id: authorId, payload, embedding: embedding, issue_id: issueId }]); if (error) { this.context.logger.error("Error creating comment", error); return; @@ -51,16 +54,18 @@ export class Comment extends SuperSupabase { this.context.logger.info("Comment created successfully"); } - async updateComment(plaintext: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { + async updateComment(markdown: string | null, commentNodeId: string, payload: Record | null, isPrivate: boolean) { //Create the embedding for this comment - const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(plaintext)); + const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); + let plaintext: string | null = markdownToPlainText(markdown || ""); if (isPrivate) { - plaintext = null as string | null; + markdown = null as string | null; payload = null as Record | null; + plaintext = null as string | null; } const { error } = await this.supabase .from("issue_comments") - .update({ plaintext, embedding: embedding, payload, modified_at: new Date() }) + .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) .eq("id", commentNodeId); if (error) { this.context.logger.error("Error updating comment", error); diff --git a/src/adapters/supabase/helpers/issues.ts b/src/adapters/supabase/helpers/issues.ts index 7624ff8..063d8a9 100644 --- a/src/adapters/supabase/helpers/issues.ts +++ b/src/adapters/supabase/helpers/issues.ts @@ -1,10 +1,11 @@ import { SupabaseClient } from "@supabase/supabase-js"; import { SuperSupabase } from "./supabase"; import { Context } from "../../../types/context"; +import { markdownToPlainText } from "../../utils/markdown-to-plaintext"; export interface IssueType { id: string; - plaintext?: string; + markdown?: string; author_id: number; created_at: string; modified_at: string; @@ -17,7 +18,7 @@ export class Issues extends SuperSupabase { super(supabase, context); } - async createIssue(issueNodeId: string, payload: Record | null, isPrivate: boolean, plaintext: string | null, authorId: number) { + async createIssue(issueNodeId: string, payload: Record | null, isPrivate: boolean, markdown: string | null, authorId: number) { //First Check if the issue already exists const { data, error } = await this.supabase.from("issues").select("*").eq("id", issueNodeId); if (error) { @@ -28,12 +29,14 @@ export class Issues extends SuperSupabase { this.context.logger.info("Issue already exists"); return; } else { - const embedding = await this.context.adapters.voyage.embedding.createEmbedding(plaintext); + const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); + let plaintext: string | null = markdownToPlainText(markdown || ""); if (isPrivate) { payload = null; + markdown = null; plaintext = null; } - const { error } = await this.supabase.from("issues").insert([{ id: issueNodeId, payload, type: "issue", plaintext, author_id: authorId, embedding }]); + const { error } = await this.supabase.from("issues").insert([{ id: issueNodeId, payload, markdown, plaintext, author_id: authorId, embedding }]); if (error) { this.context.logger.error("Error creating issue", error); return; @@ -42,14 +45,19 @@ export class Issues extends SuperSupabase { this.context.logger.info("Issue created successfully"); } - async updateIssue(plaintext: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { + async updateIssue(markdown: string | null, issueNodeId: string, payload: Record | null, isPrivate: boolean) { //Create the embedding for this comment - const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(plaintext)); + const embedding = Array.from(await this.context.adapters.voyage.embedding.createEmbedding(markdown)); + let plaintext: string | null = markdownToPlainText(markdown || ""); if (isPrivate) { - plaintext = null as string | null; + markdown = null as string | null; payload = null as Record | null; + plaintext = null as string | null; } - const { error } = await this.supabase.from("issues").update({ plaintext, embedding: embedding, payload, modified_at: new Date() }).eq("id", issueNodeId); + const { error } = await this.supabase + .from("issues") + .update({ markdown, plaintext, embedding: embedding, payload, modified_at: new Date() }) + .eq("id", issueNodeId); if (error) { this.context.logger.error("Error updating comment", error); } @@ -62,8 +70,8 @@ export class Issues extends SuperSupabase { } } - async findSimilarIssues(plaintext: string, threshold: number): Promise { - const embedding = await this.context.adapters.voyage.embedding.createEmbedding(plaintext); + async findSimilarIssues(markdown: string, threshold: number): Promise { + const embedding = await this.context.adapters.voyage.embedding.createEmbedding(markdown); const { data, error } = await this.supabase .from("issues") .select("*") diff --git a/src/adapters/utils/markdown-to-plaintext.ts b/src/adapters/utils/markdown-to-plaintext.ts new file mode 100644 index 0000000..5edf926 --- /dev/null +++ b/src/adapters/utils/markdown-to-plaintext.ts @@ -0,0 +1,20 @@ +/** + * Converts a Markdown string to plain text. + * @param markdown + * @returns + */ +export function markdownToPlainText(markdown: string): string { + let text = markdown.replace(/^#{1,6}\s+/gm, ""); // Remove headers + text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1"); // Inline links + text = text.replace(/!\[([^\]]*)\]\([^\)]+\)/g, "$1"); // Inline images + text = text.replace(/```[\s\S]*?```/g, (match) => match.replace(/```/g, "").trim()); // Code blocks + text = text.replace(/`([^`]+)`/g, "$1"); // Inline code + text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold + text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic + text = text.replace(/~~(.*?)~~/g, "$1"); // Strikethrough + text = text.replace(/^>\s+/gm, ""); // Block quotes + text = text.replace(/^\s*[-*]{3,}\s*$/gm, ""); // Horizontal rules + text = text.replace(/\n{3,}/g, "\n\n"); // Remove extra newlines + text = text.replace(/\s+/g, " ").trim(); // Remove extra spaces + return text; +} diff --git a/src/handlers/add-issue.ts b/src/handlers/add-issue.ts index 4264a44..2afec8c 100644 --- a/src/handlers/add-issue.ts +++ b/src/handlers/add-issue.ts @@ -7,13 +7,13 @@ export async function addIssue(context: Context) { adapters: { supabase }, } = context; const { payload } = context as { payload: IssuePayload }; - const plaintext = payload.issue.body + " " + payload.issue.title || ""; + const markdown = payload.issue.body + " " + payload.issue.title || ""; const authorId = payload.issue.user?.id || -1; const nodeId = payload.issue.node_id; const isPrivate = payload.repository.private; try { - await supabase.issue.createIssue(nodeId, payload, isPrivate, plaintext, authorId); + await supabase.issue.createIssue(nodeId, payload, isPrivate, markdown, authorId); } catch (error) { if (error instanceof Error) { logger.error(`Error creating issue:`, { error: error, stack: error.stack }); diff --git a/src/handlers/update-comments.ts b/src/handlers/update-comments.ts index 6dd2dae..94e9715 100644 --- a/src/handlers/update-comments.ts +++ b/src/handlers/update-comments.ts @@ -9,10 +9,10 @@ export async function updateComment(context: Context) { const { payload } = context as { payload: CommentPayload }; const nodeId = payload.comment.node_id; const isPrivate = payload.repository.private; - const plaintext = payload.comment.body; + const markdown = payload.comment.body; // Fetch the previous comment and update it in the db try { - await supabase.comment.updateComment(plaintext, nodeId, payload, isPrivate); + await supabase.comment.updateComment(markdown, nodeId, payload, isPrivate); } catch (error) { if (error instanceof Error) { logger.error(`Error updating comment:`, { error: error, stack: error.stack }); diff --git a/src/handlers/update-issue.ts b/src/handlers/update-issue.ts index 64f3548..488aec7 100644 --- a/src/handlers/update-issue.ts +++ b/src/handlers/update-issue.ts @@ -10,10 +10,10 @@ export async function updateIssue(context: Context) { const payloadObject = payload; const nodeId = payload.issue.node_id; const isPrivate = payload.repository.private; - const plaintext = payload.issue.body + " " + payload.issue.title || ""; + const markdown = payload.issue.body + " " + payload.issue.title || ""; // Fetch the previous issue and update it in the db try { - await supabase.issue.updateIssue(plaintext, nodeId, payloadObject, isPrivate); + await supabase.issue.updateIssue(markdown, nodeId, payloadObject, isPrivate); } catch (error) { if (error instanceof Error) { logger.error(`Error updating issue:`, { error: error, stack: error.stack }); diff --git a/supabase/migrations/20240912225853_issue_comments.sql b/supabase/migrations/20240912225853_issue_comments.sql index a6e24b4..0d71cab 100644 --- a/supabase/migrations/20240912225853_issue_comments.sql +++ b/supabase/migrations/20240912225853_issue_comments.sql @@ -7,7 +7,6 @@ CREATE TABLE IF NOT EXISTS issues ( embedding Vector(1024) not null, payload jsonb, author_id VARCHAR not null, - type text not null default 'issue', created_at timestamptz not null default now(), modified_at timestamptz not null default now() ); @@ -17,5 +16,8 @@ ADD COLUMN issue_id VARCHAR REFERENCES issues(id) ON DELETE CASCADE; +ALTER TABLE issue_comments +DROP COLUMN type; + ALTER TABLE issue_comments RENAME COLUMN payloadobject TO payload; \ No newline at end of file diff --git a/supabase/migrations/20240913070225_issue_comments.sql b/supabase/migrations/20240913070225_issue_comments.sql new file mode 100644 index 0000000..09daf65 --- /dev/null +++ b/supabase/migrations/20240913070225_issue_comments.sql @@ -0,0 +1,5 @@ +ALTER TABLE issue_comments +ADD COLUMN markdown TEXT; + +ALTER TABLE issues +ADD COLUMN markdown TEXT; \ No newline at end of file