Skip to content

Commit

Permalink
fix: removed custom code for conversion of markdown to plaintext
Browse files Browse the repository at this point in the history
  • Loading branch information
sshivaditya committed Sep 13, 2024
1 parent 6beefba commit 0d5d975
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 13 deletions.
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@
"@octokit/webhooks": "13.2.7",
"@sinclair/typebox": "0.32.33",
"@supabase/supabase-js": "^2.45.2",
"@types/markdown-it": "^14.1.2",
"@ubiquity-dao/ubiquibot-logger": "^1.3.0",
"dotenv": "16.4.5",
"markdown-it": "^14.1.0",
"markdown-it-plain-text": "^0.3.0",
"typebox-validators": "0.3.5",
"voyageai": "^0.0.1-5"
},
Expand Down
21 changes: 8 additions & 13 deletions src/adapters/utils/markdown-to-plaintext.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import markdownit from "markdown-it";
import plainTextPlugin from "markdown-it-plain-text";

/**
* Converts a Markdown string to plain text.
* @param markdown
Expand All @@ -7,17 +10,9 @@ export function markdownToPlainText(markdown: string | null): string | null {
if (!markdown) {
return markdown;
}
let text = markdown.replace(/^#{1,6}\s+/gm, ""); // Remove headers
text = text.replace(/\[([^\]]+)\]\([^\)]+\)/g, "$1"); // Inline links
text = text.replace(/!\[([^\]]*)\]\([^\)]+\)/g, "$1"); // Inline images
text = text.replace(/```[\s\S]*?```/g, (match) => match.replace(/```/g, "").trim()); // Code blocks
text = text.replace(/`([^`]+)`/g, "$1"); // Inline code
text = text.replace(/(\*\*|__)(.*?)\1/g, "$2"); // Bold
text = text.replace(/(\*|_)(.*?)\1/g, "$2"); // Italic
text = text.replace(/~~(.*?)~~/g, "$1"); // Strikethrough
text = text.replace(/^>\s+/gm, ""); // Block quotes
text = text.replace(/^\s*[-*]{3,}\s*$/gm, ""); // Horizontal rules
text = text.replace(/\n{3,}/g, "\n\n"); // Remove extra newlines
text = text.replace(/\s+/g, " ").trim(); // Remove extra spaces
return text;
const md = markdownit();
md.use(plainTextPlugin);
md.render(markdown);
//Package markdown-it-plain-text does not have types
return (md as any).plainText;
}
62 changes: 62 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1961,16 +1961,34 @@
expect "^29.0.0"
pretty-format "^29.0.0"

"@types/linkify-it@^5":
version "5.0.0"
resolved "https://registry.yarnpkg.com/@types/linkify-it/-/linkify-it-5.0.0.tgz#21413001973106cda1c3a9b91eedd4ccd5469d76"
integrity sha512-sVDA58zAw4eWAffKOaQH5/5j3XeayukzDk+ewSsnv3p4yJEZHCCzMDiZM8e0OUrRvmpGZ85jf4yDHkHsgBNr9Q==

"@types/lodash@^4.14.172":
version "4.17.4"
resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.4.tgz#0303b64958ee070059e3a7184048a55159fe20b7"
integrity sha512-wYCP26ZLxaT3R39kiN2+HcJ4kTd3U1waI/cY7ivWYqFP6pW3ZNpvi6Wd6PHZx7T/t8z0vlkXMg3QYLa7DZ/IJQ==

"@types/markdown-it@^14.1.2":
version "14.1.2"
resolved "https://registry.yarnpkg.com/@types/markdown-it/-/markdown-it-14.1.2.tgz#57f2532a0800067d9b934f3521429a2e8bfb4c61"
integrity sha512-promo4eFwuiW+TfGxhi+0x3czqTYJkG8qB17ZUJiVF10Xm7NLVRSLUsfRTU/6h1e24VvRnXCx+hG7li58lkzog==
dependencies:
"@types/linkify-it" "^5"
"@types/mdurl" "^2"

"@types/md5@^2.3.0":
version "2.3.5"
resolved "https://registry.yarnpkg.com/@types/md5/-/md5-2.3.5.tgz#481cef0a896e3a5dcbfc5a8a8b02c05958af48a5"
integrity sha512-/i42wjYNgE6wf0j2bcTX6kuowmdL/6PE4IVitMpm2eYKBUuYCprdcWVK+xEF0gcV6ufMCRhtxmReGfc6hIK7Jw==

"@types/mdurl@^2":
version "2.0.0"
resolved "https://registry.yarnpkg.com/@types/mdurl/-/mdurl-2.0.0.tgz#d43878b5b20222682163ae6f897b20447233bdfd"
integrity sha512-RGdgjQUZba5p6QEFAVx2OGb8rQDL/cPRG7GiedRzMcJ1tYnUANBncjbSB1NRGwbvjcPeikRABz2nshyPk1bhWg==

"@types/mute-stream@^0.0.4":
version "0.0.4"
resolved "https://registry.yarnpkg.com/@types/mute-stream/-/mute-stream-0.0.4.tgz#77208e56a08767af6c5e1237be8888e2f255c478"
Expand Down Expand Up @@ -3201,6 +3219,11 @@ emoji-regex@^9.2.2:
resolved "https://registry.yarnpkg.com/emoji-regex/-/emoji-regex-9.2.2.tgz#840c8803b0d8047f4ff0cf963176b32d4ef3ed72"
integrity sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==

entities@^4.4.0:
version "4.5.0"
resolved "https://registry.yarnpkg.com/entities/-/entities-4.5.0.tgz#5d268ea5e7113ec74c4d033b79ea5a35a488fb48"
integrity sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==

env-paths@^2.2.1:
version "2.2.1"
resolved "https://registry.yarnpkg.com/env-paths/-/env-paths-2.2.1.tgz#420399d416ce1fbe9bc0a07c62fa68d67fd0f8f2"
Expand Down Expand Up @@ -4937,6 +4960,13 @@ lines-and-columns@^1.1.6:
resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.2.4.tgz#eca284f75d2965079309dc0ad9255abb2ebc1632"
integrity sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==

linkify-it@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/linkify-it/-/linkify-it-5.0.0.tgz#9ef238bfa6dc70bd8e7f9572b52d369af569b421"
integrity sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==
dependencies:
uc.micro "^2.0.0"

[email protected]:
version "15.2.7"
resolved "https://registry.yarnpkg.com/lint-staged/-/lint-staged-15.2.7.tgz#97867e29ed632820c0fb90be06cd9ed384025649"
Expand Down Expand Up @@ -5117,6 +5147,23 @@ map-obj@^2.0.0:
resolved "https://registry.yarnpkg.com/map-obj/-/map-obj-2.0.0.tgz#a65cd29087a92598b8791257a523e021222ac1f9"
integrity sha512-TzQSV2DiMYgoF5RycneKVUzIa9bQsj/B3tTgsE3dOGqlzHnGIDaC7XBE7grnA+8kZPnfqSGFe95VHc2oc0VFUQ==

markdown-it-plain-text@^0.3.0:
version "0.3.0"
resolved "https://registry.yarnpkg.com/markdown-it-plain-text/-/markdown-it-plain-text-0.3.0.tgz#374abfcc1c95ae3d7a5e09365558d43184e1e7db"
integrity sha512-JT5x7oXZaTY6lwxsnxaIAT4hm5Q2Mi8dZoCaCNg3s7JXmDxY84D90OtV0US436UvN4zOUHoac4ExceTogydOLw==

markdown-it@^14.1.0:
version "14.1.0"
resolved "https://registry.yarnpkg.com/markdown-it/-/markdown-it-14.1.0.tgz#3c3c5992883c633db4714ccb4d7b5935d98b7d45"
integrity sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==
dependencies:
argparse "^2.0.1"
entities "^4.4.0"
linkify-it "^5.0.0"
mdurl "^2.0.0"
punycode.js "^2.3.1"
uc.micro "^2.1.0"

md5@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/md5/-/md5-2.3.0.tgz#c3da9a6aae3a30b46b7b0c349b87b110dc3bda4f"
Expand All @@ -5126,6 +5173,11 @@ md5@^2.3.0:
crypt "0.0.2"
is-buffer "~1.1.6"

mdurl@^2.0.0:
version "2.0.0"
resolved "https://registry.yarnpkg.com/mdurl/-/mdurl-2.0.0.tgz#80676ec0433025dd3e17ee983d0fe8de5a2237e0"
integrity sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==

memorystream@^0.3.1:
version "0.3.1"
resolved "https://registry.yarnpkg.com/memorystream/-/memorystream-0.3.1.tgz#86d7090b30ce455d63fbae12dda51a47ddcaf9b2"
Expand Down Expand Up @@ -5738,6 +5790,11 @@ prompts@^2.0.1:
kleur "^3.0.3"
sisteransi "^1.0.5"

punycode.js@^2.3.1:
version "2.3.1"
resolved "https://registry.yarnpkg.com/punycode.js/-/punycode.js-2.3.1.tgz#6b53e56ad75588234e79f4affa90972c7dd8cdb7"
integrity sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==

punycode@^2.1.0:
version "2.3.1"
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5"
Expand Down Expand Up @@ -6601,6 +6658,11 @@ [email protected]:
resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.4.5.tgz#42ccef2c571fdbd0f6718b1d1f5e6e5ef006f611"
integrity sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==

uc.micro@^2.0.0, uc.micro@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-2.1.0.tgz#f8d3f7d0ec4c3dea35a7e3c8efa4cb8b45c9e7ee"
integrity sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==

ufo@^1.5.3:
version "1.5.3"
resolved "https://registry.yarnpkg.com/ufo/-/ufo-1.5.3.tgz#3325bd3c977b6c6cd3160bf4ff52989adc9d3344"
Expand Down

0 comments on commit 0d5d975

Please sign in to comment.