From 0d5d9758ef7c992e0beb58d2df5f007e635320e6 Mon Sep 17 00:00:00 2001 From: Shivaditya Shivganesh Date: Fri, 13 Sep 2024 03:55:49 -0400 Subject: [PATCH] fix: removed custom code for conversion of markdown to plaintext --- package.json | 3 + src/adapters/utils/markdown-to-plaintext.ts | 21 +++---- yarn.lock | 62 +++++++++++++++++++++ 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/package.json b/package.json index 7718367..6ef92a4 100644 --- a/package.json +++ b/package.json @@ -35,8 +35,11 @@ "@octokit/webhooks": "13.2.7", "@sinclair/typebox": "0.32.33", "@supabase/supabase-js": "^2.45.2", + "@types/markdown-it": "^14.1.2", "@ubiquity-dao/ubiquibot-logger": "^1.3.0", "dotenv": "16.4.5", + "markdown-it": "^14.1.0", + "markdown-it-plain-text": "^0.3.0", "typebox-validators": "0.3.5", "voyageai": "^0.0.1-5" }, diff --git a/src/adapters/utils/markdown-to-plaintext.ts b/src/adapters/utils/markdown-to-plaintext.ts index 448a043..93e5787 100644 --- a/src/adapters/utils/markdown-to-plaintext.ts +++ b/src/adapters/utils/markdown-to-plaintext.ts @@ -1,3 +1,6 @@ +import markdownit from "markdown-it"; +import plainTextPlugin from "markdown-it-plain-text"; + /** * Converts a Markdown string to plain text. * @param markdown @@ -7,17 +10,9 @@ export function markdownToPlainText(markdown: string | null): string | null { if (!markdown) { return markdown; } - let text = markdown.replace(/^#{1,6}\s+/gm, ""); // Remove headers - text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1"); // Inline links - text = text.replace(/!\[([^\]]*)\]\([^\)]+\)/g, "$1"); // Inline images - text = text.replace(/```[\s\S]*?```/g, (match) => match.replace(/```/g, "").trim()); // Code blocks - text = text.replace(/`([^`]+)`/g, "$1"); // Inline code - text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold - text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic - text = text.replace(/~~(.*?)~~/g, "$1"); // Strikethrough - text = text.replace(/^>\s+/gm, ""); // Block quotes - text = text.replace(/^\s*[-*]{3,}\s*$/gm, ""); // Horizontal rules - text = text.replace(/\n{3,}/g, "\n\n"); // Remove extra newlines - text = text.replace(/\s+/g, " ").trim(); // Remove extra spaces - return text; + const md = markdownit(); + md.use(plainTextPlugin); + md.render(markdown); + //Package markdown-it-plain-text does not have types + return (md as any).plainText; } diff --git a/yarn.lock b/yarn.lock index e653515..8439328 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1961,16 +1961,34 @@ expect "^29.0.0" pretty-format "^29.0.0" +"@types/linkify-it@^5": + version "5.0.0" + resolved "https://registry.yarnpkg.com/@types/linkify-it/-/linkify-it-5.0.0.tgz#21413001973106cda1c3a9b91eedd4ccd5469d76" + integrity sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q== + "@types/lodash@^4.14.172": version "4.17.4" resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.4.tgz#0303b64958ee070059e3a7184048a55159fe20b7" integrity sha512-wYCP26ZLxaT3R39kiN2+HcJ4kTd3U1waI/cY7ivWYqFP6pW3ZNpvi6Wd6PHZx7T/t8z0vlkXMg3QYLa7DZ/IJQ== +"@types/markdown-it@^14.1.2": + version "14.1.2" + resolved "https://registry.yarnpkg.com/@types/markdown-it/-/markdown-it-14.1.2.tgz#57f2532a0800067d9b934f3521429a2e8bfb4c61" + integrity sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog== + dependencies: + "@types/linkify-it" "^5" + "@types/mdurl" "^2" + "@types/md5@^2.3.0": version "2.3.5" resolved "https://registry.yarnpkg.com/@types/md5/-/md5-2.3.5.tgz#481cef0a896e3a5dcbfc5a8a8b02c05958af48a5" integrity sha512-/i42wjYNgE6wf0j2bcTX6kuowmdL/6PE4IVitMpm2eYKBUuYCprdcWVK+xEF0gcV6ufMCRhtxmReGfc6hIK7Jw== +"@types/mdurl@^2": + version "2.0.0" + resolved "https://registry.yarnpkg.com/@types/mdurl/-/mdurl-2.0.0.tgz#d43878b5b20222682163ae6f897b20447233bdfd" + integrity sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg== + "@types/mute-stream@^0.0.4": version "0.0.4" resolved "https://registry.yarnpkg.com/@types/mute-stream/-/mute-stream-0.0.4.tgz#77208e56a08767af6c5e1237be8888e2f255c478" @@ -3201,6 +3219,11 @@ emoji-regex@^9.2.2: resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72" integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg== +entities@^4.4.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48" + integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw== + env-paths@^2.2.1: version "2.2.1" resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2" @@ -4937,6 +4960,13 @@ lines-and-columns@^1.1.6: resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632" integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg== +linkify-it@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-5.0.0.tgz#9ef238bfa6dc70bd8e7f9572b52d369af569b421" + integrity sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ== + dependencies: + uc.micro "^2.0.0" + lint-staged@15.2.7: version "15.2.7" resolved "https://registry.yarnpkg.com/lint-staged/-/lint-staged-15.2.7.tgz#97867e29ed632820c0fb90be06cd9ed384025649" @@ -5117,6 +5147,23 @@ map-obj@^2.0.0: resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-2.0.0.tgz#a65cd29087a92598b8791257a523e021222ac1f9" integrity sha512-TzQSV2DiMYgoF5RycneKVUzIa9bQsj/B3tTgsE3dOGqlzHnGIDaC7XBE7grnA+8kZPnfqSGFe95VHc2oc0VFUQ== +markdown-it-plain-text@^0.3.0: + version "0.3.0" + resolved "https://registry.yarnpkg.com/markdown-it-plain-text/-/markdown-it-plain-text-0.3.0.tgz#374abfcc1c95ae3d7a5e09365558d43184e1e7db" + integrity sha512-JT5x7oXZaTY6lwxsnxaIAT4hm5Q2Mi8dZoCaCNg3s7JXmDxY84D90OtV0US436UvN4zOUHoac4ExceTogydOLw== + +markdown-it@^14.1.0: + version "14.1.0" + resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-14.1.0.tgz#3c3c5992883c633db4714ccb4d7b5935d98b7d45" + integrity sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg== + dependencies: + argparse "^2.0.1" + entities "^4.4.0" + linkify-it "^5.0.0" + mdurl "^2.0.0" + punycode.js "^2.3.1" + uc.micro "^2.1.0" + md5@^2.3.0: version "2.3.0" resolved "https://registry.yarnpkg.com/md5/-/md5-2.3.0.tgz#c3da9a6aae3a30b46b7b0c349b87b110dc3bda4f" @@ -5126,6 +5173,11 @@ md5@^2.3.0: crypt "0.0.2" is-buffer "~1.1.6" +mdurl@^2.0.0: + version "2.0.0" + resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0" + integrity sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w== + memorystream@^0.3.1: version "0.3.1" resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2" @@ -5738,6 +5790,11 @@ prompts@^2.0.1: kleur "^3.0.3" sisteransi "^1.0.5" +punycode.js@^2.3.1: + version "2.3.1" + resolved "https://registry.yarnpkg.com/punycode.js/-/punycode.js-2.3.1.tgz#6b53e56ad75588234e79f4affa90972c7dd8cdb7" + integrity sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA== + punycode@^2.1.0: version "2.3.1" resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" @@ -6601,6 +6658,11 @@ typescript@5.4.5: resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.4.5.tgz#42ccef2c571fdbd0f6718b1d1f5e6e5ef006f611" integrity sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ== +uc.micro@^2.0.0, uc.micro@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-2.1.0.tgz#f8d3f7d0ec4c3dea35a7e3c8efa4cb8b45c9e7ee" + integrity sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A== + ufo@^1.5.3: version "1.5.3" resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.5.3.tgz#3325bd3c977b6c6cd3160bf4ff52989adc9d3344"