Skip to content

Commit

Permalink
html: improve inline math with whitespaces
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Jan 15, 2025
1 parent 4aa85aa commit b0dbb6c
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ def markup_to_markdown(text: str, format_: str = "html") -> str:
# some needed preprocessing
soup = BeautifulSoup(text, "html.parser")
markdown_lib.html_preprocessing.div_checklists(soup)
markdown_lib.html_preprocessing.handle_newlines_in_math(soup)
markdown_lib.html_preprocessing.iframes_to_links(soup)
markdown_lib.html_preprocessing.streamline_tables(soup)
markdown_lib.html_preprocessing.synology_note_station_fix_img_src(soup)
Expand Down
19 changes: 19 additions & 0 deletions src/markdown_lib/html_preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
"""HTML preprocessing functions to prepare for Pandoc conversion."""

import logging
import string

from bs4 import BeautifulSoup

LOGGER = logging.getLogger("jimmy")


def div_checklists(soup: BeautifulSoup):
"""Convert div checklists to plain HTML checklists."""
Expand All @@ -18,6 +23,20 @@ def div_checklists(soup: BeautifulSoup):
child.name = "li"


def handle_newlines_in_math(soup: BeautifulSoup):
"""
- Escape unescaped newlines inside tex math blocks.
- Strip trailing (escaped) whitespace.
"""
for annotation in soup.find_all("annotation"):
if (encoding := annotation.attrs.get("encoding")) != "application/x-tex":
LOGGER.debug(f'Unsupported annotation encoding "{encoding}"')
continue
annotation.string = annotation.string.rstrip("\\" + string.whitespace).replace(
"\n\n", "\n\\\\\n"
)


def iframes_to_links(soup: BeautifulSoup):
"""Convert iframes to simple links."""
for iframe in soup.find_all("iframe"):
Expand Down
2 changes: 1 addition & 1 deletion test/data

0 comments on commit b0dbb6c

Please sign in to comment.