Skip to content

Commit

Permalink
Add 403 and 410 http error codes (#382)
Browse files Browse the repository at this point in the history
Co-authored-by: UltralyticsAssistant <[email protected]>
  • Loading branch information
glenn-jocher and UltralyticsAssistant authored Jan 25, 2025
1 parent cd5bde7 commit e0408a0
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
2 changes: 1 addition & 1 deletion actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
# ├── test_summarize_pr.py
# └── ...

__version__ = "0.0.47"
__version__ = "0.0.48"
17 changes: 13 additions & 4 deletions actions/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@
"Referer": "https://www.google.com/",
"Origin": "https://www.google.com/",
}

BAD_HTTP_CODES = frozenset(
{
# 403, # Forbidden - client lacks permission to access the resource (commented as works in browser typically)
404, # Not Found - requested resource doesn't exist
405, # Method Not Allowed - HTTP method not supported for this endpoint
410, # Gone - resource permanently removed
500, # Internal Server Error - server encountered an error
502, # Bad Gateway - upstream server sent invalid response
503, # Service Unavailable - server temporarily unable to handle request
504, # Gateway Timeout - upstream server didn't respond in time
}
)
URL_IGNORE_LIST = frozenset(
{
"localhost",
Expand All @@ -44,7 +55,6 @@
"storage.googleapis.com", # private GCS buckets
}
)

URL_PATTERN = re.compile(
r"\[([^]]+)]\(([^)]+)\)" # Matches Markdown links [text](url)
r"|"
Expand Down Expand Up @@ -85,7 +95,6 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):

if check:
requester = session or requests
bad_codes = {404, 410, 500, 502, 503, 504}
kwargs = {"timeout": timeout, "allow_redirects": True}
if not session:
kwargs["headers"] = REQUESTS_HEADERS
Expand All @@ -94,7 +103,7 @@ def is_url(url, session=None, check=True, max_attempts=3, timeout=2):
try:
# Try HEAD first, then GET if needed
for method in (requester.head, requester.get):
if method(url, stream=method == requester.get, **kwargs).status_code not in bad_codes:
if method(url, stream=method == requester.get, **kwargs).status_code not in BAD_HTTP_CODES:
return True
return False
except Exception:
Expand Down

0 comments on commit e0408a0

Please sign in to comment.