Skip to content

Commit

Permalink
Fix parenthesis in broken link checks (#380)
Browse files Browse the repository at this point in the history
Signed-off-by: Glenn Jocher <[email protected]>
  • Loading branch information
glenn-jocher authored Jan 24, 2025
1 parent abdf80d commit 8ef38fc
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
2 changes: 1 addition & 1 deletion actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
# ├── test_summarize_pr.py
# └── ...

__version__ = "0.0.45"
__version__ = "0.0.46"
10 changes: 7 additions & 3 deletions actions/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,14 @@
)

URL_PATTERN = re.compile(
r"\[([^]]+)]\(([^)]+)\)" # Matches Markdown links [text](url)
r"\[([^]]+)]\((.*?)(?=\)(?:\s|$))\)" # Markdown links with lookahead for space/end
r"|"
r"(" # Start capturing group for plaintext URLs
r"(?:https?://)?" # Optional http:// or https://
r"(?:www\.)?" # Optional www.
r"(?:[\w.-]+)?" # Optional domain name and subdomains
r"\.[a-zA-Z]{2,}" # TLD
r"(?:/[^\s\"')\]]*)?" # Optional path
r"(?:/[^\s\"'\]]*)?" # Optional path
r")"
)

Expand Down Expand Up @@ -128,4 +128,8 @@ def check_links_in_string(text, verbose=True, return_bad=False):


if __name__ == "__main__":
print(is_url("https://ultralytics.com/images/bus.jpg"))
url = "https://ultralytics.com/images/bus.jpg"
string = f"This is a string with a [Markdown link]({url}) inside it."

print(f"is_url(): {is_url(url)}")
print(f"check_links_in_string(): {check_links_in_string(string)}")
16 changes: 16 additions & 0 deletions tests/test_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"https://www.reddit.com/r/Ultralytics/comments/1fw3605/release_megathread/",
"https://www.kaggle.com/models/ultralytics/yolo11",
"https://apps.apple.com/xk/app/ultralytics/id1583935240",
"https://en.wikipedia.org/wiki/Active_learning_(machine_learning)", # parentheses in link
]


Expand All @@ -38,6 +39,21 @@ def test_is_url():
assert is_url(url), f"URL check failed: {url}"


def test_links_in_string_func():
"""Test URLs in strings function."""
assert check_links_in_string(" abc ".join(url for url in URLS))


def test_markdown_links_in_string_func():
"""Test Markdown links in strings function."""
assert check_links_in_string(" abc ".join(f"[text]({url})" for url in URLS))


def test_html_links_in_string_func():
"""Test HTML links in strings function."""
assert check_links_in_string(" abc ".join(f'<a href="{url}">text</a>' for url in URLS))


def test_html_links(verbose):
"""Tests the validity of URLs within HTML anchor tags and returns any invalid URLs found."""
text = "Visit <a href='https://err.com'>our site</a> or <a href=\"http://test.org\">test site</a>"
Expand Down

0 comments on commit 8ef38fc

Please sign in to comment.