Skip to content

Commit

Permalink
Handle attached leading and trailing characters of links (fixes #55) (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jmholla authored Apr 30, 2022
1 parent ece1ac9 commit 14e24f2
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 16 deletions.
6 changes: 3 additions & 3 deletions markflow/_utils/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from markflow.typing import Number

INLINE_CODE_MARKER_REGEX = re.compile(r"(((?!<\\)`)+)")
FOOTNOTE_REGEX = re.compile(r"\[[^\[]+\]\[[^\]]+\]")
FOOTNOTE_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\[[^\]]+\][^\s\[\(]*")
HTML_NEWLINE_REGEX = re.compile(r"<br ?/?>")
URL_REGEX = re.compile(r"\[[^\[]+\]\([^\)]+\)")
URL_REGEX = re.compile(r"[^\s\]\)]*\[[^\[]+\]\([^\)]+\)[^\s\[\(]*")


def join(split_text: List[str], leading_spaces: List[bool], width: Number) -> str:
Expand Down Expand Up @@ -134,7 +134,7 @@ def link_split(
leading_spaces: List[bool] = []
evaluates: List[bool] = []
last_end = 0
# Each iteration of this for loop operates operates on non-link text followed by
# Each iteration of this for loop operates on non-link text followed by
# link text.
for match in matches:
non_link_text = text[last_end : match.start()]
Expand Down
9 changes: 9 additions & 0 deletions tests/files/0018_in_urls_with_trailing_characters.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers
constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface
drivers, more BSODs trying to get around [multiple audio interface setups in software](
https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces),
I started steering away from in-the-box production and started looking into hardware.

[This is a really long URL](http://www.example.com/ssssssssssssssssssssssssssssssssssss),

[This is a really long footnote][fffffffffffffffffffffffffffffffffffffffffffffffffffffffff],
11 changes: 11 additions & 0 deletions tests/files/0018_out_urls_with_trailing_characters.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Ever since I sold the first MacBook, it felt like I was fighting Windows audio drivers
constantly. Between latency issues, BSODs due to Focusrite's unstable audio interface
drivers, more BSODs trying to get around [multiple audio interface setups in software](
https://help.ableton.com/hc/en-us/articles/209071609-Using-Aggregate-Devices-and-multiple-audio-interfaces),
I started steering away from in-the-box production and started looking into hardware.

[This is a really long URL](
http://www.example.com/ssssssssssssssssssssssssssssssssssss),

[This is a really long footnote][
fffffffffffffffffffffffffffffffffffffffffffffffffffffffff],
22 changes: 9 additions & 13 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_all_splits(self) -> None:
abc abc abc abc abc abc abc abc abc
``abc ``` abc[0][0] ``abc abc abc abc <br />
<br />
abc abc [url](http://example.com) abc[url][
abc abc [url](http://example.com)abc[url][
http://example.com]abc[url][URL][url][URL] <br/>
abc<br/>"""
)
Expand Down Expand Up @@ -96,16 +96,14 @@ def test_code_split_solo_tilda(self) -> None:
def test_link_split(self) -> None:
input_ = "a[URL][url] b [URL](http://example.com)c"
expected_split_text = [
"a",
"[URL][",
"a[URL][",
"url]",
"b",
"[URL](",
"http://example.com)",
"c",
"http://example.com)c",
]
expected_leading_spaces = [False, False, False, True, True, False, False]
expected_evaluates = [True, True, False, True, True, False, True]
expected_leading_spaces = [False, False, True, True, False]
expected_evaluates = [True, False, True, True, False]
split_text, leading_spaces, evaluates = link_split(input_, False)
assert len(split_text) == len(leading_spaces) == len(evaluates)
assert split_text == expected_split_text
Expand All @@ -115,16 +113,14 @@ def test_link_split(self) -> None:
def test_link_split_sentence(self) -> None:
input_ = "a[URL][url]. b [URL](http://example.com).c"
expected_split_text = [
"a",
"[URL][",
"a[URL][",
"url].",
"b",
"[URL](",
"http://example.com).",
"c",
"http://example.com).c",
]
expected_leading_spaces = [False, False, False, True, True, False, False]
expected_evaluates = [True, True, False, True, True, False, True]
expected_leading_spaces = [False, False, True, True, False]
expected_evaluates = [True, False, True, True, False]
split_text, leading_spaces, evaluates = link_split(input_, False)
assert len(split_text) == len(leading_spaces) == len(evaluates)
assert split_text == expected_split_text
Expand Down

0 comments on commit 14e24f2

Please sign in to comment.