alastair · agateblue · Jun 18, 2016 · Jun 18, 2016
diff --git a/docs/cache.rst b/docs/cache.rst
@@ -0,0 +1,97 @@
+Cache
+~~~~~
+
+For various reasons, you'll probably want to reduce the number of queries you run against the public Musicbrainz API endpoint:
+
+- Since requests are throttled, if you send too much requests in a short time, you may end up with 503 errors
+- The public endpoint is sometimes under heavy load and can become unavailable
+- The public endpoint is used by a lot of people. Less requests imply a faster service for everybody
+
+In order to reduce the number of queries, you can setup a cache to store the result, avoiding requesting data you already fetched.
+
+The cache workflow is a follows:
+
+1. When requesting the API, if a cache is configured, a cache key is created using the request URL and parameters
+2. After the API respond, the result is store in the cache under this key, before being returned
+3. Next time you make a request, a lookup will be made in the cache. If the value is cached, it will be returned directly (avoiding an extra HTTP request). If not present, the HTTP request is send normally (and the result will be stored in the cache)
+
+
+Enabling the cache
+------------------
+
+Caching is disabled by default, so you'll need to configure the cache:
+
+.. code-block:: python
+
+    import musicbrainzngs.cache
+
+    cache = musicbrainzngs.cache.DictCache()
+    musicbrainzngs.set_cache(cache)
+
+    mbid = 'musicbrainz-artist-id'
+
+    # here, the HTTP request is sent to the Musicbrainz API
+    real_result = musicbrainzngs.get_artist_by_id(mbid)
+
+    # then, if we send the same request, the cache will be used instead
+    cached_result = musicbrainzngs.get_artist_by_id(mbid)
+
+Creating your own cache
+-----------------------
+
+The ``DictCache`` used in the previous example is a really simple cache implementation
+built using a Python dictionary. Is will be wiped if you leave the Python process.
+
+In production environment, you'll probably want to use a persistent cache, for example
+by storing the data in Redis or Memcached.
+
+To do so, you just need to create your own cache class. Here is a dummy example of such an implementation
+for redis:
+
+.. code-block:: python
+
+    # int this example, we use https://pypi.python.org/pypi/redis
+    import redis
+    import musicbrainzngs.cache
+
+    redis_client = redis.StrictRedis(host='localhost', port=6379, db=0)
+
+
+    class RedisCache(musicbrainzngs.cache.BaseCache):
+
+        def __init__(self, prefix='musicbrainz:', expires=60 * 60 * 6):
+            # cache key will look like 'musicbrainz:ab876bnb987'
+            self.prefix = prefix
+            # Cache values will expire after 6 hours
+            self.expires = expires
+
+        def build_key(self, url, args, method):
+            # We want to prefix the cache key with a custom value
+            return self.prefix + super().build_key(key, url, args, method)
+
+        def get(self, key, url, args, method):
+            """Called to get a value from the cache"""
+            from_cache = redis_client.get(key)
+            if not from_cache:
+                # It's important to raise the exception here
+                # So musicbrainz can send the actual HTTP request
+                # if no value is found
+                raise musicbrainzngs.cache.NotInCache(key)
+            return from_cache
+
+        def set(self, key, value, url, args, method):
+            """Called to store a value in the cache"""
+            redis_client.setex(key, value, self.expires)
+
+    redis_cache = RedisCache()
+
+    musicbrainzngs.set_cache(redis_cache)
+
+Disabling the cache
+-------------------
+
+If, for some reason, you want to disable the cache, just run:
+
+.. code-block:: python
+
+    musicbrainzngs.set_cache(None)
diff --git a/docs/index.rst b/docs/index.rst
@@ -18,6 +18,7 @@ Contents
     installation
     usage
     api
+    cache
 
 .. currentmodule:: musicbrainzngs.musicbrainz
 

diff --git a/musicbrainzngs/cache.py b/musicbrainzngs/cache.py
@@ -0,0 +1,61 @@
+import hashlib
+
+
+class NotInCache(Exception):
+    pass
+
+
+class BaseCache(object):
+    def get(self, key, url, args, method):
+        raise NotImplementedError
+
+    def set(self, key, value, url, args, method):
+        raise NotImplementedError
+
+    def build_key(self, url, args, method):
+        """Since the URL contains all the arguments, we can build
+        the cache key by simply hashing the url"""
+        return hashlib.sha1(url.encode('utf-8')).hexdigest()
+
+
+class DictCache(BaseCache):
+    """A really basic implementation of a cache that will store
+    the cached values in a dictionary"""
+
+    def __init__(self):
+        self._cache = {}
+
+    def get(self, key, url, args, method):
+        try:
+            return self._cache[key]
+        except KeyError:
+            raise NotInCache(key)
+
+    def set(self, key, value, url, args, method):
+        self._cache[key] = value
+
+
+def _get_from_cache(cache_obj, url, args, method):
+    if not cache_obj:
+        #  no cache is configured, just return
+        raise NotInCache('Cache is not configured')
+
+    if method != 'GET':
+        # we don't want to cache respons for requests that modify data
+        raise NotInCache('{0} method is not cachable'.format(method))
+
+    cache_key = cache_obj.build_key(url=url, args=args, method=method)
+    return cache_obj.get(key=cache_key, url=url, args=args, method=method)
+
+
+def _set_cache(cache_obj, resp, url, args, method):
+    if not cache_obj:
+        #  no cache is configured, just return
+        return
+
+    if method != 'GET':
+        # we don't want to cache respons for requests that modify data
+        return
+
+    cache_key = cache_obj.build_key(url=url, args=args, method=method)
+    return cache_obj.set(key=cache_key, value=resp, url=url, args=args, method=method)
diff --git a/musicbrainzngs/musicbrainz.py b/musicbrainzngs/musicbrainz.py
@@ -19,6 +19,7 @@
 from musicbrainzngs import mbxml
 from musicbrainzngs import util
 from musicbrainzngs import compat
+from musicbrainzngs import cache
 
 _version = "0.7dev"
 _log = logging.getLogger("musicbrainzngs")
@@ -293,6 +294,7 @@ def _decorator(func):
 
 user = password = ""
 hostname = "musicbrainz.org"
+_cache = None
 _client = ""
 _useragent = ""
 
@@ -324,6 +326,11 @@ def set_hostname(new_hostname):
     global hostname
     hostname = new_hostname
 
+def set_cache(new_cache):
+    """Set the cache (defaults to None)"""
+    global _cache
+    _cache = new_cache
+
 # Rate limiting.
 
 limit_interval = 1.0
@@ -660,6 +667,12 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
 
     opener = compat.build_opener(*handlers)
 
+    try:
+        cache_value = cache._get_from_cache(_cache, url=url, args=newargs, method=method)
+        return parser_fun(cache_value)
+    except cache.NotInCache:
+        pass
+
     # Make request.
     req = _MusicbrainzHttpRequest(method, url, data)
     req.add_header('User-Agent', _useragent)
@@ -672,8 +685,11 @@ def _mb_request(path, method='GET', auth_required=AUTH_NO,
         req.add_header('Content-Length', '0')
     resp = _safe_read(opener, req, body)
 
+    cache._set_cache(_cache, resp, url=url, args=newargs, method=method)
+
     return parser_fun(resp)
 
+
 def _get_auth_type(entity, id, includes):
     """ Some calls require authentication. This returns
     True if a call does, False otherwise

diff --git a/test/test_cache.py b/test/test_cache.py
@@ -0,0 +1,81 @@
+import unittest
+import mock
+import hashlib
+import musicbrainzngs.cache
+
+
+class DummyCache(musicbrainzngs.cache.BaseCache):
+    pass
+
+
+class CacheTest(unittest.TestCase):
+    """ Test that we can cache results"""
+
+    def setUp(self):
+        musicbrainzngs.set_useragent("a", "1")
+        musicbrainzngs.set_rate_limit(False)
+        self.cache = musicbrainzngs.cache.BaseCache()
+        musicbrainzngs.set_cache(self.cache)
+
+    def tearDown(self):
+        musicbrainzngs.set_cache(None)
+
+    @mock.patch('musicbrainzngs.musicbrainz._safe_read', return_value='return_value')
+    @mock.patch('musicbrainzngs.musicbrainz.parser_fun')
+    @mock.patch('musicbrainzngs.cache.BaseCache.get', side_effect=musicbrainzngs.cache.NotInCache)
+    def test_cache_is_set_after_mb_request(self, *mocks):
+        """ Check the cache is called when set on the client"""
+        expected_kwargs = {
+            'args': [],
+            'method': 'GET',
+            'url': 'http://musicbrainz.org/ws/2/artist/mbid',
+        }
+        expected_kwargs['key'] = self.cache.build_key(**expected_kwargs)
+        h = hashlib.sha1(expected_kwargs['url'].encode('utf-8')).hexdigest()
+
+        self.assertEqual(expected_kwargs['key'], h)
+
+        with mock.patch('musicbrainzngs.cache.BaseCache.set') as cache:
+            musicbrainzngs.get_artist_by_id('mbid')
+            cache.assert_called_once_with(value='return_value', **expected_kwargs)
+
+    @mock.patch('musicbrainzngs.musicbrainz.parser_fun', side_effect=lambda v: v)
+    @mock.patch('musicbrainzngs.cache.BaseCache.set')
+    def test_cache_get_is_called_before_request(self, *mocks):
+        expected_kwargs = {
+            'args': [],
+            'method': 'GET',
+            'url': 'http://musicbrainz.org/ws/2/artist/mbid',
+        }
+
+        expected_kwargs['key'] = self.cache.build_key(**expected_kwargs)
+        h = hashlib.sha1(expected_kwargs['url'].encode('utf-8')).hexdigest()
+
+        self.assertEqual(expected_kwargs['key'], h)
+
+        with mock.patch('musicbrainzngs.cache.BaseCache.get', return_value='value') as cache:
+            r = musicbrainzngs.get_artist_by_id('mbid')
+            cache.assert_called_once_with(**expected_kwargs)
+            self.assertEqual(r, 'value')
+
+    @mock.patch('musicbrainzngs.musicbrainz._safe_read', return_value='return_value')
+    @mock.patch('musicbrainzngs.musicbrainz.parser_fun', side_effect=lambda v: v)
+    def test_basic_dict_cache(self, parser, read):
+        cache = musicbrainzngs.cache.DictCache()
+        musicbrainzngs.set_cache(cache)
+
+        # first, we populate the cache
+        r = musicbrainzngs.get_artist_by_id('mbid')
+        self.assertEqual(r, 'return_value')
+        read.assert_called_once()
+
+
+        with mock.patch.object(cache, 'get', wraps=cache.get) as mocked:
+            # now we make the same request, so the cache is used
+            r = musicbrainzngs.get_artist_by_id('mbid')
+            self.assertEqual(r, 'return_value')
+
+            mocked.assert_called_once()
+
+            # read should not have been called again
+            read.assert_called_once()
diff --git a/tox.ini b/tox.ini
@@ -1,4 +1,5 @@
 [tox]
 envlist=py26,py27,py32,py33,py34,py35
 [testenv]
-commands=python setup.py test
+deps=mock
+commands=python setup.py test {posargs}
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ Contents @@
         installation
         usage
         api
+        cache
     .. currentmodule:: musicbrainzngs.musicbrainz
@@ Expand Down @@