Skip to content

Commit

Permalink
html: support highlighting
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Jan 15, 2025
1 parent b0dbb6c commit 8c2f866
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 14 deletions.
5 changes: 4 additions & 1 deletion src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ def get_inline_tags(text: str, start_characters: list[str]) -> list[str]:
"markdown_strict"
# https://pandoc.org/chunkedhtml-demo/8.5-verbatim-code-blocks.html#extension-backtick_code_blocks
"+backtick_code_blocks"
# https://pandoc.org/chunkedhtml-demo/8.21-non-default-extensions.html#extension-mark
"+mark"
# https://pandoc.org/chunkedhtml-demo/8.9-tables.html#extension-pipe_tables
"+pipe_tables"
# https://pandoc.org/chunkedhtml-demo/8.12-inline-formatting.html#extension-strikeout
Expand All @@ -237,10 +239,11 @@ def markup_to_markdown(text: str, format_: str = "html") -> str:
# some needed preprocessing
soup = BeautifulSoup(text, "html.parser")
markdown_lib.html_preprocessing.div_checklists(soup)
markdown_lib.html_preprocessing.handle_newlines_in_math(soup)
markdown_lib.html_preprocessing.highlighting(soup)
markdown_lib.html_preprocessing.iframes_to_links(soup)
markdown_lib.html_preprocessing.streamline_tables(soup)
markdown_lib.html_preprocessing.synology_note_station_fix_img_src(soup)
markdown_lib.html_preprocessing.whitespace_in_math(soup)
text = str(soup)
text_md = pypandoc.convert_text(
text,
Expand Down
30 changes: 18 additions & 12 deletions src/markdown_lib/html_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,10 @@ def div_checklists(soup: BeautifulSoup):
child.name = "li"


def handle_newlines_in_math(soup: BeautifulSoup):
"""
- Escape unescaped newlines inside tex math blocks.
- Strip trailing (escaped) whitespace.
"""
for annotation in soup.find_all("annotation"):
if (encoding := annotation.attrs.get("encoding")) != "application/x-tex":
LOGGER.debug(f'Unsupported annotation encoding "{encoding}"')
continue
annotation.string = annotation.string.rstrip("\\" + string.whitespace).replace(
"\n\n", "\n\\\\\n"
)
def highlighting(soup: BeautifulSoup):
"""Remove all attributes and enable the "mark" extension to get highlighting."""
for mark in soup.find_all("mark"):
mark.attrs = {}


def iframes_to_links(soup: BeautifulSoup):
Expand Down Expand Up @@ -186,3 +178,17 @@ def streamline_tables(soup: BeautifulSoup):
body.unwrap()

table.attrs = {}


def whitespace_in_math(soup: BeautifulSoup):
"""
- Escape unescaped newlines inside tex math blocks.
- Strip trailing (escaped) whitespace.
"""
for annotation in soup.find_all("annotation"):
if (encoding := annotation.attrs.get("encoding")) != "application/x-tex":
LOGGER.debug(f'Unsupported annotation encoding "{encoding}"')
continue
annotation.string = annotation.string.rstrip("\\" + string.whitespace).replace(
"\n\n", "\n\\\\\n"
)

0 comments on commit 8c2f866

Please sign in to comment.