From 6d223d6967d6f922ac4161a35ded9173901f03f3 Mon Sep 17 00:00:00 2001 From: Taneli Hukkinen <3275109+hukkin@users.noreply.github.com> Date: Sat, 26 Oct 2024 20:35:52 +0300 Subject: [PATCH] improve: Escape less than signs less --- docs/users/changelog.md | 1 + src/mdformat/renderer/_context.py | 3 ++- src/mdformat/renderer/_util.py | 13 +++++++++++++ tests/data/default_style.md | 13 +++++++++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/docs/users/changelog.md b/docs/users/changelog.md index 0773555..f4c249c 100644 --- a/docs/users/changelog.md +++ b/docs/users/changelog.md @@ -13,6 +13,7 @@ Note that there is currently no guarantee for a stable Markdown formatting style With this plugins can now read CLI arguments merged with values from `.mdformat.toml`. - Changed - Style: No longer escape square bracket enclosures. + - Style: No longer escape less than sign followed by space character. - Improved - Plugin interface: A trailing newline is added to fenced code blocks if a plugin fails to add it. diff --git a/src/mdformat/renderer/_context.py b/src/mdformat/renderer/_context.py index fc40147..936ab87 100644 --- a/src/mdformat/renderer/_context.py +++ b/src/mdformat/renderer/_context.py @@ -18,6 +18,7 @@ decimalify_leading, decimalify_trailing, escape_asterisk_emphasis, + escape_less_than_sign, escape_square_brackets, escape_underscore_emphasis, get_list_marker_type, @@ -118,7 +119,7 @@ def text(node: RenderTreeNode, context: RenderContext) -> str: text = escape_underscore_emphasis(text) # Escape emphasis/strong marker. # Escape link label and link ref enclosures text = escape_square_brackets(text, context.env["used_refs"]) - text = text.replace("<", "\\<") # Escape URI enclosure + text = escape_less_than_sign(text) # Escape URI enclosure and HTML. text = text.replace("`", "\\`") # Escape code span marker # Escape "&" if it starts a sequence that can be interpreted as diff --git a/src/mdformat/renderer/_util.py b/src/mdformat/renderer/_util.py index ff1bcdb..c9313e9 100644 --- a/src/mdformat/renderer/_util.py +++ b/src/mdformat/renderer/_util.py @@ -254,3 +254,16 @@ def escape_square_brackets(text: str, used_refs: Iterable[str]) -> str: RE_SQUARE_BRACKET = re.compile(r"[\[\]]") + + +def escape_less_than_sign(text: str) -> str: + """Escape less than sign ('<') to prevent unexpected HTML or autolink. + + Current heuristic to use: Always escape, except when + - followed by a space: This should be safe. Neither HTML nor autolink + allow space after the '<' sign + """ + return RE_LESS_THAN_SIGN__NO_FOLLOWING_SPACE.sub(r"\\\g<0>", text) + + +RE_LESS_THAN_SIGN__NO_FOLLOWING_SPACE = re.compile("<(?:[^ ]|$)") diff --git a/tests/data/default_style.md b/tests/data/default_style.md index 9c0be8e..653f4d3 100644 --- a/tests/data/default_style.md +++ b/tests/data/default_style.md @@ -495,3 +495,16 @@ Square bracket escapes [link-label]: /url . + +Less than sign escapes +. +< no escape < no escape, now escape < +< + +< +. +< no escape < no escape, now escape \< +\< + +\< +.