Skip to content

Commit

Permalink
Switch from urllib to requests
Browse files Browse the repository at this point in the history
Rebased from alastair#199

Co-authored-by: Adrian Sampson <[email protected]>
Co-authored-by: Wieland Hoffmann <[email protected]>
  • Loading branch information
2 people authored and nikosmichas committed Nov 26, 2021
1 parent 8ed0187 commit dc12740
Show file tree
Hide file tree
Showing 18 changed files with 392 additions and 408 deletions.
7 changes: 7 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
0.7.2 (2021-11-26):
* Use requests to make HTTP requests (Adrian Sampson & Wieland Hoffmann)

Note that this changes the exception types of the `cause` attribute of
musicbrainzngs.musicbrainz.WebServiceError and its subclasses to instances
of requests.exceptions.RequestException and its subclasses.

0.7.1 (2020-01-11):
* include README file in pypi

Expand Down
26 changes: 9 additions & 17 deletions musicbrainzngs/caa.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
]

import json
import requests

from musicbrainzngs import compat
from musicbrainzngs import musicbrainz
Expand Down Expand Up @@ -63,32 +64,23 @@ def _caa_request(mbid, imageid=None, size=None, entitytype="release"):
))
musicbrainz._log.debug("GET request for %s" % (url, ))

# Set up HTTP request handler and URL opener.
httpHandler = compat.HTTPHandler(debuglevel=0)
handlers = [httpHandler]

opener = compat.build_opener(*handlers)
headers = {}
if musicbrainz._useragent != "":
headers['User-Agent'] = musicbrainz._useragent
musicbrainz._log.debug("requesting with UA %s" % musicbrainz._useragent)

# Make request.
req = musicbrainz._MusicbrainzHttpRequest("GET", url, None)
req = requests.Request("GET", url, headers=headers)
# Useragent isn't needed for CAA, but we'll add it if it exists
if musicbrainz._useragent != "":
req.add_header('User-Agent', musicbrainz._useragent)
musicbrainz._log.debug("requesting with UA %s" % musicbrainz._useragent)

resp = musicbrainz._safe_read(opener, req, None)
resp = musicbrainz._safe_read(req)

# TODO: The content type declared by the CAA for JSON files is
# 'applicaiton/octet-stream'. This is not useful to detect whether the
# content is JSON, so default to decoding JSON if no imageid was supplied.
# http://tickets.musicbrainz.org/browse/CAA-75
if imageid:
# If we asked for an image, return the image
return resp
return resp.content
else:
# Otherwise it's json
data = _unicode(resp)
return json.loads(data)
return resp.json()


def get_image_list(releaseid):
Expand Down
7 changes: 0 additions & 7 deletions musicbrainzngs/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@

if is_py2:
from StringIO import StringIO
from urllib2 import HTTPPasswordMgr, HTTPDigestAuthHandler, Request,\
HTTPHandler, build_opener, HTTPError, URLError
from httplib import BadStatusLine, HTTPException
from urlparse import urlunparse
from urllib import urlencode, quote_plus

Expand All @@ -50,10 +47,6 @@
basestring = basestring
elif is_py3:
from io import StringIO
from urllib.request import HTTPPasswordMgr, HTTPDigestAuthHandler, Request,\
HTTPHandler, build_opener
from urllib.error import HTTPError, URLError
from http.client import HTTPException, BadStatusLine
from urllib.parse import urlunparse, urlencode, quote_plus

unicode = str
Expand Down
229 changes: 80 additions & 149 deletions musicbrainzngs/musicbrainz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,20 @@
# This file is distributed under a BSD-2-Clause type license.
# See the COPYING file for more information.

import json
import re
import threading
import time
import logging
import socket
import hashlib
import locale
import sys
import json
import xml.etree.ElementTree as etree
from xml.parsers import expat
from warnings import warn
import requests
from requests.auth import HTTPDigestAuth

# Don't import Retry from urllib3 directly, this only works correctly the Retry
# class form the urllib3 package requests vendors in is used.
from requests.packages.urllib3.util.retry import Retry

from musicbrainzngs import mbxml
from musicbrainzngs import util
Expand All @@ -23,6 +25,28 @@
_version = "0.7.1"
_log = logging.getLogger("musicbrainzngs")


class MBRetry(Retry):
"""Retry class whose backoff time is::
{ number of observed errors } * 2.0
"""

def get_backoff_time(self):
""" Formula for computing the current backoff
:rtype: float
"""
if self._observed_errors == 0:
return 0

return self._observed_errors * 2.0


_retry = MBRetry(total=8,
status_forcelist=[500, 502, 503])

LUCENE_SPECIAL = r'([+\-&|!(){}\[\]\^"~*?:\\\/])'

# Constants for validation.
Expand Down Expand Up @@ -419,126 +443,9 @@ def __call__(self, *args, **kwargs):
self.remaining_requests -= 1.0
return self.fun(*args, **kwargs)

# From pymb2
class _RedirectPasswordMgr(compat.HTTPPasswordMgr):
def __init__(self):
self._realms = { }

def find_user_password(self, realm, uri):
# ignoring the uri parameter intentionally
try:
return self._realms[realm]
except KeyError:
return (None, None)

def add_password(self, realm, uri, username, password):
# ignoring the uri parameter intentionally
self._realms[realm] = (username, password)

class _DigestAuthHandler(compat.HTTPDigestAuthHandler):
def get_authorization (self, req, chal):
qop = chal.get ('qop', None)
if qop and ',' in qop and 'auth' in qop.split (','):
chal['qop'] = 'auth'

return compat.HTTPDigestAuthHandler.get_authorization (self, req, chal)

def _encode_utf8(self, msg):
"""The MusicBrainz server also accepts UTF-8 encoded passwords."""
encoding = sys.stdin.encoding or locale.getpreferredencoding()
try:
# This works on Python 2 (msg in bytes)
msg = msg.decode(encoding)
except AttributeError:
# on Python 3 (msg is already in unicode)
pass
return msg.encode("utf-8")

def get_algorithm_impls(self, algorithm):
# algorithm should be case-insensitive according to RFC2617
algorithm = algorithm.upper()
# lambdas assume digest modules are imported at the top level
if algorithm == 'MD5':
H = lambda x: hashlib.md5(self._encode_utf8(x)).hexdigest()
elif algorithm == 'SHA':
H = lambda x: hashlib.sha1(self._encode_utf8(x)).hexdigest()
# XXX MD5-sess
KD = lambda s, d: H("%s:%s" % (s, d))
return H, KD

class _MusicbrainzHttpRequest(compat.Request):
""" A custom request handler that allows DELETE and PUT"""
def __init__(self, method, url, data=None):
compat.Request.__init__(self, url, data)
allowed_m = ["GET", "POST", "DELETE", "PUT"]
if method not in allowed_m:
raise ValueError("invalid method: %s" % method)
self.method = method

def get_method(self):
return self.method


# Core (internal) functions for calling the MB API.

def _safe_read(opener, req, body=None, max_retries=8, retry_delay_delta=2.0):
"""Open an HTTP request with a given URL opener and (optionally) a
request body. Transient errors lead to retries. Permanent errors
and repeated errors are translated into a small set of handleable
exceptions. Return a bytestring.
"""
last_exc = None
for retry_num in range(max_retries):
if retry_num: # Not the first try: delay an increasing amount.
_log.info("retrying after delay (#%i)" % retry_num)
time.sleep(retry_num * retry_delay_delta)

try:
if body:
f = opener.open(req, body)
else:
f = opener.open(req)
return f.read()

except compat.HTTPError as exc:
if exc.code in (400, 404, 411):
# Bad request, not found, etc.
raise ResponseError(cause=exc)
elif exc.code in (503, 502, 500):
# Rate limiting, internal overloading...
_log.info("HTTP error %i" % exc.code)
elif exc.code in (401, ):
raise AuthenticationError(cause=exc)
else:
# Other, unknown error. Should handle more cases, but
# retrying for now.
_log.info("unknown HTTP error %i" % exc.code)
last_exc = exc
except compat.BadStatusLine as exc:
_log.info("bad status line")
last_exc = exc
except compat.HTTPException as exc:
_log.info("miscellaneous HTTP exception: %s" % str(exc))
last_exc = exc
except compat.URLError as exc:
if isinstance(exc.reason, socket.error):
code = exc.reason.errno
if code == 104: # "Connection reset by peer."
continue
raise NetworkError(cause=exc)
except socket.timeout as exc:
_log.info("socket timeout")
last_exc = exc
except socket.error as exc:
if exc.errno == 104:
continue
raise NetworkError(cause=exc)
except IOError as exc:
raise NetworkError(cause=exc)

# Out of retries!
raise NetworkError("retried %i times" % max_retries, last_exc)

# Get the XML parsing exceptions to catch. The behavior chnaged with Python 2.7
# and ElementTree 1.3.
if hasattr(etree, 'ParseError'):
Expand Down Expand Up @@ -607,6 +514,26 @@ def set_format(fmt="xml"):
raise ValueError("invalid format: %s" % fmt)


def _safe_read(request):
"""
:param request:
"""
# Make request (with retries).
with requests.Session() as session:
adapter = requests.adapters.HTTPAdapter(max_retries=_retry)
session.mount('http://', adapter)
session.mount('https://', adapter)
try:
resp = session.send(request.prepare(), allow_redirects=True)
resp.raise_for_status()
return resp
except requests.HTTPError as exc:
if exc.response.status_code == 401:
raise AuthenticationError(cause=exc)
raise ResponseError(cause=exc)
except requests.RequestException as exc:
raise NetworkError(cause=exc)

@_rate_limit
def _mb_request(path, method='GET', auth_required=AUTH_NO,
client_required=False, args=None, data=None, body=None):
Expand All @@ -633,6 +560,17 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
if ws_format != "xml":
args["fmt"] = ws_format

headers = {
"User-Agent": _useragent
}

if body:
headers['Content-Type'] = 'application/xml; charset=UTF-8'
else:
# Explicitly indicate zero content length if no request data
# will be sent (avoids HTTP 411 error).
headers['Content-Length'] = '0'

# Convert args from a dictionary to a list of tuples
# so that the ordering of elements is stable for easy
# testing (in this case we order alphabetically)
Expand All @@ -644,7 +582,7 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
newargs.append((key, value))

# Construct the full URL for the request, including hostname and
# query string.
# query string
url = compat.urlunparse((
'https' if https else 'http',
hostname,
Expand All @@ -653,12 +591,6 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
compat.urlencode(newargs),
''
))
_log.debug("%s request for %s" % (method, url))

# Set up HTTP request handler and URL opener.
httpHandler = compat.HTTPHandler(debuglevel=0)
handlers = [httpHandler]

# Add credentials if required.
add_auth = False
if auth_required == AUTH_YES:
Expand All @@ -673,26 +605,25 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
add_auth = True

if add_auth:
passwordMgr = _RedirectPasswordMgr()
authHandler = _DigestAuthHandler(passwordMgr)
authHandler.add_password("musicbrainz.org", (), user, password)
handlers.append(authHandler)
auth_handler = HTTPDigestAuth(user, password)
else:
auth_handler = None

opener = compat.build_opener(*handlers)
allowed_m = ["GET", "POST", "DELETE", "PUT"]
if method not in allowed_m:
raise ValueError("invalid method: %s" % method)

# Make request.
req = _MusicbrainzHttpRequest(method, url, data)
req.add_header('User-Agent', _useragent)
_log.debug("requesting with UA %s" % _useragent)
if body:
req.add_header('Content-Type', 'application/xml; charset=UTF-8')
elif not data and not req.has_header('Content-Length'):
# Explicitly indicate zero content length if no request data
# will be sent (avoids HTTP 411 error).
req.add_header('Content-Length', '0')
resp = _safe_read(opener, req, body)
req = requests.Request(
method,
url,
auth=auth_handler,
headers=headers,
data=body,
)

resp = _safe_read(req)

return parser_fun(resp)
return parser_fun(resp.content)


def _get_auth_type(entity, id, includes):
Expand Down Expand Up @@ -733,7 +664,7 @@ def _do_mb_query(entity, id, includes=[], params={}):
return _mb_request(path, 'GET', auth_required, args=args)

def _do_mb_search(entity, query='', fields={},
limit=None, offset=None, strict=False):
limit=None, offset=None, strict=False):
"""Perform a full-text search on the MusicBrainz search server.
`query` is a lucene query string when no fields are set,
but is escaped when any fields are given. `fields` is a dictionary
Expand All @@ -746,7 +677,7 @@ def _do_mb_search(entity, query='', fields={},
clean_query = util._unicode(query)
if fields:
clean_query = re.sub(LUCENE_SPECIAL, r'\\\1',
clean_query)
clean_query)
if strict:
query_parts.append('"%s"' % clean_query)
else:
Expand Down Expand Up @@ -984,7 +915,7 @@ def search_releases(query='', limit=None, offset=None, strict=False, **fields):

@_docstring_search("release-group")
def search_release_groups(query='', limit=None, offset=None,
strict=False, **fields):
strict=False, **fields):
"""Search for release groups and return a dict
with a 'release-group-list' key.
Expand Down
Loading

0 comments on commit dc12740

Please sign in to comment.