|
4 | 4 | import functools |
5 | 5 | import logging |
6 | 6 | import re |
| 7 | +import time |
7 | 8 | from collections.abc import Iterable |
8 | 9 | from re import Match |
9 | 10 | from typing import Any, Callable, Optional, Union |
10 | 11 |
|
11 | 12 | from urllib3.util import Retry |
12 | 13 |
|
| 14 | +try: |
| 15 | + # If urllib3~=2.0 is installed |
| 16 | + from urllib3 import BaseHTTPResponse |
| 17 | +except ImportError: |
| 18 | + # If urllib3~=1.0 is installed |
| 19 | + from urllib3 import HTTPResponse as BaseHTTPResponse |
| 20 | + |
| 21 | + |
13 | 22 | from ogr.abstract import AnyComment, Comment |
14 | 23 |
|
| 24 | +logger = logging.getLogger(__name__) |
| 25 | + |
15 | 26 |
|
16 | 27 | class CustomRetry(Retry): |
17 | 28 | """ |
18 | 29 | Custom Retry class that includes 403 in RETRY_AFTER_STATUS_CODES |
19 | 30 | so that Retry-After headers are respected for 403 errors. |
| 31 | +
|
| 32 | + Also handles GitHub rate limit headers (X-RateLimit-Reset) when |
| 33 | + Retry-After is not present. |
20 | 34 | """ |
21 | 35 |
|
22 | 36 | # Include 403 in the list of status codes that respect Retry-After header |
23 | 37 | RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503, 403]) |
24 | 38 |
|
| 39 | + def get_ratelimit_reset(self, response: BaseHTTPResponse) -> Optional[float]: |
| 40 | + """ |
| 41 | + Get retry wait time from X-RateLimit-Reset header. |
| 42 | +
|
| 43 | + Rate limit reset header (Unix timestamp) which is converted |
| 44 | + to seconds to wait, compatible with Retry-After format. |
25 | 45 |
|
26 | | -logger = logging.getLogger(__name__) |
| 46 | + Args: |
| 47 | + response: HTTP response object that may contain X-RateLimit-Reset header. |
| 48 | +
|
| 49 | + Returns: |
| 50 | + Number of seconds to wait before retrying, or None if header is not present |
| 51 | + or cannot be parsed. |
| 52 | + """ |
| 53 | + # Only check X-RateLimit-Reset for rate limit responses |
| 54 | + if ( # noqa: SIM102 This is more readable than a single if statement |
| 55 | + response.status |
| 56 | + in ( |
| 57 | + 403, |
| 58 | + 429, |
| 59 | + ) |
| 60 | + ): |
| 61 | + # urllib3 HTTPHeaderDict does a case-insensitive lookup |
| 62 | + # https://github.com/urllib3/urllib3/blob/83f8643ffb5b7f197457379148e2fa118ab0fcdc/src/urllib3/_collections.py#L215-L217 |
| 63 | + if rate_limit_reset := response.headers.get( |
| 64 | + "X-RateLimit-Reset", |
| 65 | + ): |
| 66 | + try: |
| 67 | + reset_timestamp = float(rate_limit_reset) |
| 68 | + except ValueError: |
| 69 | + logger.error( |
| 70 | + f"Could not parse X-RateLimit-Reset header '{rate_limit_reset}'", |
| 71 | + ) |
| 72 | + return None |
| 73 | + else: |
| 74 | + return max(0.0, reset_timestamp - time.time()) |
| 75 | + return None |
| 76 | + |
| 77 | + def sleep_for_retry(self, response: BaseHTTPResponse) -> bool: |
| 78 | + """ |
| 79 | + Override to handle X-RateLimit-Reset header in addition to Retry-After. |
| 80 | +
|
| 81 | + Choose between Retry-After and X-RateLimit-Reset header. |
| 82 | + If both are present, choose the longer wait time. |
| 83 | +
|
| 84 | + Args: |
| 85 | + response: HTTP response object that may contain Retry-After or X-RateLimit-Reset header. |
| 86 | +
|
| 87 | + Returns: |
| 88 | + True if the wait time is greater than 0, False otherwise. |
| 89 | + """ |
| 90 | + retry_after = self.get_retry_after(response) |
| 91 | + rate_limit_reset = self.get_ratelimit_reset(response) |
| 92 | + |
| 93 | + if not retry_after and not rate_limit_reset: |
| 94 | + return False |
| 95 | + |
| 96 | + wait_time, header = max( |
| 97 | + ( |
| 98 | + (retry_after or 0, "Retry-After"), |
| 99 | + (rate_limit_reset or 0, "X-RateLimit-Reset"), |
| 100 | + ), |
| 101 | + key=lambda x: x[0], |
| 102 | + ) |
| 103 | + logger.error( |
| 104 | + f"Rate limit hit (status {response.status}). " |
| 105 | + f"Waiting {wait_time}s until reset ({header} header)", |
| 106 | + ) |
| 107 | + time.sleep(wait_time) |
| 108 | + return True |
27 | 109 |
|
28 | 110 |
|
29 | 111 | def filter_comments( |
|
0 commit comments