/
caching.py
118 lines (95 loc) 路 2.95 KB
/
caching.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
__author__ = 'maartenbreddels'
from cachetools import Cache, LRUCache
import os
import numpy as np
import logging
logger = logging.getLogger("vaex.caching")
class MemoryCache(LRUCache):
def __init__(self, maxsize, missing=None, getsizeof=None, delitem=lambda key: None):
LRUCache.__init__(self, maxsize, missing, getsizeof)
self.__delitem = delitem
def __delitem__(self, key):
self.__delitem(key)
super(MemoryCache, self).__delitem__(key)
class FileWrapped(object):
def __init__(self, maxsize, missing=None, getsizeof=None, delitem=lambda key: None):
LRUCache.__init__(self, maxsize, missing, getsizeof)
self.__delitem = delitem
def __delitem__(self, key):
self.__delitem(key)
super(FileWrapped, self).__delitem__(key)
def __setitem__(self, key, value, cache_setitem=Cache.__setitem__):
cache_setitem(self, key, value)
MB = 1024**2
GB = MB * 1024
class SelectionCache(object):
"""
:type LRUCache: memory_cache
:type LRUCache: file_cache
"""
def __init__(self, max_memory_size=1*GB, max_disk_size=4*GB):
#self.file_cache = MemoryCache(max_disk_size, getsizeof=self.getsizeof, delitem=self.on_delete_file)
#self.memory_cache = MemoryCache(max_memory_size, missing=self.get_from_disk, delitem=self.on_delete_memory, )
self.memory_cache = LRUCache(max_memory_size)
def __getitem__(self, key):
return self.memory_cache[key]
def __setitem__(self, key, value):
self.memory_cache[key] = value
def __contains__(self, key):
return key in self.memory_cache or key in self.file_cache
import collections
class NumpyFileDict(object): #collections.MutableMapping):
def __init__(self):
self.key_to_path = {}
def __filename(self, key):
#print "key", key
return "_".join(map(str, key)) + ".npy"
def path(self, key):
if isinstance(key, tuple):
key = self.__filename(key)
else:
key = repr(key)
#print "key=", key
return key
#def __len__(self):
# return len(self.key_to_path)
def __iter__(self):
for key in self.key_to_path.keys():
yield key #self[key]
#return iter(self.key_to_path)
def __contains__(self, key):
return key in self.key_to_path
def __getitem__(self, key):
#print("get", key)
if key in self.key_to_path:
path = self.path(key)
return np.load(path)
else:
raise KeyError(key)
def __setitem__(self, key, value):
logger.debug("set %r", key)
path = self.path(key)
np.save(path, value)
self.key_to_path[key] = path
def __delitem__(self, key):
logger.debug("delete %r", key)
path = self.path(key)
os.remove(path)
del self.key_to_path[key]
if __name__ == "__main__":
logger.setLevel("DEBUG")
def f(key):
a, b = key
return np.arange(a, b)
np_dict = NumpyFileDict()
#np_dict[(1,2)] = np.arange(10)
cache = Cache(2, missing=f, dict_value=np_dict)
print("cache[1,3] =", cache[(1, 3)], "...")
print("cache[1,13] =", cache[(1, 13)], "...")
print(cache[(2, 3)])
print(cache[(3, 4)])
print(cache[(3, 5)])
print(cache[(3, 6)])
print("keys", cache.keys())
for ar in np_dict:
print("-->", ar)