In [1]:
import os
try:                  # get to root of project
    print(od)
except NameError:
    od = os.getcwd()
    
os.chdir(od + '/../..')
print(os.getcwd())

/home/mike/py/mylib/CacheGrab


In [2]:
import collections
from collections import Hashable
from hashlib import md5
import requests
import json
from time import sleep
# import decorator # basically obsolete
import functools

In [3]:
type(requests)

module

In [4]:
import cachegrab as cg

In [5]:
from cachegrab.utils import dummy

# Architecture

Basic idea is to create a caching decorator that allows any arbitrary function to be cached effortlessly and in an extensible manner

```
@cachegrab(someCacherProperties='whatever')
def my_complex_function(target, foo=5, bar='stuff'):
    do_a_slow_thing()
    return myValue
```    
In other words, like @lru_cache, but the backend is configurable. Or, like requests_cache, but the function is configurable. Best of both worlds. 

In order to do this, we must do several things: 

* connect to some caching engine
* throw some useful errors if this connection fails
* wrap an arbitrary function call
* freeze the signature of that call
* use a hashable variant (or some voodoo if strict=False) of the signature to key the cache
* optionally store the signature with the data
* sensibly return the value
* maybe even do some metrics along the way

Things we want to be able to configure:

* Backend storage format (json, sql, nosql, etc)
* Cache replacement policy (everything, LRU, random, etc)
* timeout
* blocking properties
* expiration/refresh time/conditions

Bonus round: 

* interoperate with iterables
* tqdm
* parallelize! 

Stages:

* "dumb" fully json-based disk caching
* better file management
* expiration
* more sophisticated engine

In [6]:
resp = dummy.get('foo', params=None)
print(type(resp))
resp.json()

<class 'requests.models.Response'>


{'RetrievedDate': '2018-08-15T19:07:52.833852',
 'done': True,
 'records': [{'AccountId': '1337BEEF',
   'Amount': 31415.926,
   'CreatedDate': '2015-07-13T07:30:24.000+0000',
   'IsClosed': True,
   'IsWon': False,
   'Name': 'FooBar Opportunity AAA',
   'attributes': {'type': 'Opportunity', 'url': '/services/data/foo/bar'}}],
 'totalSize': 1}

In [7]:
def try_json(*args, **kwargs):
    # Pack args and kwargs into a JSON. If this fails, it's because one or more of the arguments is not serializable
    print("Args: {}".format(args))
    print("kwArgs: {}".format(kwargs))
    data = {'args': args, 'kwargs': kwargs}
    j = json.dumps(data)
    return(j)

try_json('foo', 'bar', spam=2, eggs=True, pi=3.14)

Args: ('foo', 'bar')
kwArgs: {'pi': 3.14, 'spam': 2, 'eggs': True}


'{"args": ["foo", "bar"], "kwargs": {"pi": 3.14, "spam": 2, "eggs": true}}'

In [8]:
jtest = try_json('foo', 'bar', spam=2, eggs=True, pi=3.14)
jdict = json.loads(jtest)
print(jdict['args'])
print(jdict['kwargs'])

try_json(*jdict['args'], **jdict['kwargs'])

Args: ('foo', 'bar')
kwArgs: {'pi': 3.14, 'spam': 2, 'eggs': True}
['foo', 'bar']
{'pi': 3.14, 'spam': 2, 'eggs': True}
Args: ('foo', 'bar')
kwArgs: {'pi': 3.14, 'spam': 2, 'eggs': True}


'{"args": ["foo", "bar"], "kwargs": {"pi": 3.14, "spam": 2, "eggs": true}}'

In [9]:
class memoized(object):
    '''Basic Memoizing Decorator. Caches a function's return value each time it is called.
    If called later with the same arguments, the cached value is returned
    (not reevaluated).
    From the python website, https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
    '''

    def __init__(self, func):
        self.func = func
        self.cache = {}

    def __call__(self, *args):
        if not isinstance(args, collections.Hashable):
            # uncacheable. a list, for instance.
            # better to not cache than blow up.
            return self.func(*args)
        if args in self.cache:
            return self.cache[args]
        else:
            value = self.func(*args)
            self.cache[args] = value
            return value

    def __repr__(self):
        '''Return the function's docstring.'''
        return self.func.__doc__

    def __get__(self, obj, objtype):
        '''Support instance methods.'''
        return functools.partial(self.__call__, obj)


In [10]:
class hmemoized(object):
    '''Hashing Memoizing Decorator. Caches a function's return value each time it is called.
    If called later with the same arguments, the cached value is returned
    (not reevaluated).
    From the python website, https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
    '''

    def __init__(self, func):
        self.func = func
        print(self.func)
        self.cache = {}

    def __call__(self, *args, **kwargs):
        key = try_json(*args, **kwargs) # will raise a TypeError from JSON if any args or kwargs are not serializable
        if key in self.cache:
            return self.cache[key]
        else:
            value = self.func(*args, **kwargs)
            self.cache[key] = value
            return value

    def __repr__(self):
        '''Return the function's docstring.'''
        return self.func.__doc__

    def __get__(self, obj, objtype):
        '''Support instance methods.'''
        return functools.partial(self.__call__, obj)

In [11]:
dd = requests.Response()

In [12]:
dd._content = bytes('fooz', 'utf8') 

In [13]:
dd.text

'fooz'

In [14]:
x = 'fooz'

In [15]:
@memoized
def get(url, params=None, delay=1):
    """Some sort of docstring"""
    return dummy.get(url, params=params, delay=delay)

In [16]:
get('foo', None, 1).json()

{'RetrievedDate': '2018-08-15T19:07:53.382231',
 'done': True,
 'records': [{'AccountId': '1337BEEF',
   'Amount': 31415.926,
   'CreatedDate': '2015-07-13T07:30:24.000+0000',
   'IsClosed': True,
   'IsWon': False,
   'Name': 'FooBar Opportunity AAA',
   'attributes': {'type': 'Opportunity', 'url': '/services/data/foo/bar'}}],
 'totalSize': 1}

In [63]:
@hmemoized
def get(url, params=None, delay=1, **kwargs):
    """Some sort of docstring"""
    return dummy.get(url, params=params, delay=delay)

<function get at 0x7f88ed972730>


In [64]:
get(2, {'stuff':1} , spam=True, eggs='yes').json()

Args: (2, {'stuff': 1})
kwArgs: {'spam': True, 'eggs': 'yes'}


{'RetrievedDate': '2018-08-15T19:13:11.864617',
 'done': True,
 'records': [{'AccountId': '1337BEEF',
   'Amount': 31415.926,
   'CreatedDate': '2015-07-13T07:30:24.000+0000',
   'IsClosed': True,
   'IsWon': False,
   'Name': 'FooBar Opportunity AAA',
   'attributes': {'type': 'Opportunity', 'url': '/services/data/foo/bar'}}],
 'totalSize': 1}

In [19]:
# get({'stuff': collections})

In [20]:
for obj in collections.__all__:
    cont = getattr(collections, obj)
    try:
        data = cont()
    except:
        pass
    try:
        data = cont('foo', 'bar')
    except:
        pass
    print("{} {}".format(obj, isinstance(data, collections.Hashable)))

deque False
defaultdict False
namedtuple True
UserDict False
UserList False
UserString False
Counter False
OrderedDict False
ChainMap False
Awaitable False
Coroutine False
AsyncIterable False
AsyncIterator False
Hashable False
Iterable False
Iterator False
Generator False
Sized False
Container False
Callable False
Set False
MutableSet False
Mapping False
MutableMapping False
MappingView False
KeysView False
ItemsView False
ValuesView False
Sequence False
MutableSequence False
ByteString False


In [21]:
isinstance({'foo':'bar'}, collections.Hashable)

False

In [22]:
frozenset({'a': 1, 'b':2}.items())

frozenset({('a', 1), ('b', 2)})

In [23]:
dd = {'a': 1, 'b': 2, 'c': [1,2]}
# frozenset(dd.items()) # TypeError

In [65]:
def my_deco(f):
    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        print('calling decorated fn. args: {} kwargs: {}'.format(args, frozenset(kwargs.items())))
        print(f.__name__)
        return f(*args, **kwargs)
    return wrapper

@my_deco
def example(*args, **kwargs):
    """does a thing"""
    print('calling example fn')
    
example(123, foo=2, bar=23)

calling decorated fn. args: (123,) kwargs: frozenset({('bar', 23), ('foo', 2)})
example
calling example fn


In [66]:
@my_deco
def get(url, params=None, delay=1, **kwargs):
    """Some sort of docstring"""
    return dummy.get(url, params=params, delay=delay)

get('')

calling decorated fn. args: ('',) kwargs: frozenset()
get


<Response [None]>

In [61]:
get('')

calling decorated fn. args: ('',) kwargs: frozenset()
get


<Response [None]>

In [28]:
def freeze_sig(f=None, *args, **kwargs):
    name = f.__name__ if f is not None else None
    return json.dumps({'@args': args, '@kwargs': kwargs, '@name': name})

def thaw_sig(string):
    dd = json.loads(string)
    __args = dd.get('@args') # sanitize???
    __kwargs = dd.get('@kwargs')
    __name = dd.get('@name')
    return __name, __args, __kwargs

def hash_md5(stringable, strict=True):
    """

    :param stringable:
    :return:
    """
    if strict and not isinstance(stringable, Hashable):
        raise TypeError('Argument of type {} is not hashable. '
                        'Use a hashable object, or set strict=False'.format(type(stringable)))

    
    md5_key = md5(str(stringable).encode()).hexdigest()
    return md5_key

def to_filename(args, kwargs):
    print('calling to filename. args: {} kwargs: {}'.format(args, kwargs))
#     print(hash_md5(2))

def baz( *args, bank=None, **kwargs):
    print('calling baz. args: {} kwargs: {}'.format(args, kwargs))

    try:
        print('foo={}'.format(foo))
    except NameError:
        print('no foo here')
    print('bank={}'.format(bank))
    to_filename(args, kwargs)
    
print(freeze_sig())
ice = freeze_sig(baz, 1,2,3, foo=23, bar=45, bank='$$$')
print(ice)
name, args, kwargs = thaw_sig(ice)
print(name)
baz(*args, **kwargs)


{"@kwargs": {}, "@name": null, "@args": []}
{"@kwargs": {"bank": "$$$", "foo": 23, "bar": 45}, "@name": "baz", "@args": [1, 2, 3]}
baz
calling baz. args: (1, 2, 3) kwargs: {'bar': 45, 'foo': 23}
no foo here
bank=$$$
calling to filename. args: (1, 2, 3) kwargs: {'bar': 45, 'foo': 23}


what we want is the ability to call baz with some keyword arguments

In [30]:
hash_md5(ice)

'b1ff24c757c6c7a35965bacb6c671be7'

In [72]:
cache = {}

def general_cacher(f):
    print('💰 cacher')
    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        print('🍬 wrapper')
        sig = freeze_sig(f, *args, **kwargs)
        key = hash_md5(sig)
        if key in cache:
            data = cache[key]
        else:
            data = f(*args, **kwargs)
            cache[key] = data
        return data
    return wrapper



In [73]:
@general_cacher
def get(url, params=None, delay=1, **kwargs):
    """Some sort of docstring"""
    return dummy.get(url, params=params, delay=delay)

💰 cacher


In [76]:
get('').json()

🍬 wrapper


{'RetrievedDate': '2018-08-15T19:15:32.809675',
 'done': True,
 'records': [{'AccountId': '1337BEEF',
   'Amount': 31415.926,
   'CreatedDate': '2015-07-13T07:30:24.000+0000',
   'IsClosed': True,
   'IsWon': False,
   'Name': 'FooBar Opportunity AAA',
   'attributes': {'type': 'Opportunity', 'url': '/services/data/foo/bar'}}],
 'totalSize': 1}