Skip to content

Commit

Permalink
multiple cache reconstruction methods, binary object persistence
Browse files Browse the repository at this point in the history
  • Loading branch information
wasimusu committed Jul 14, 2020
1 parent ec8308a commit a8efa34
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 13 deletions.
3 changes: 2 additions & 1 deletion description.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ And opens up cache design for testing.
- Write documentation
- Write in batches to the disk. Make provision for flush as well.
- Implement increment and decrement in redis like counter
- Write binary objects (image, pdfs, etc) to the disk
- Multiple methods of cache reconstruction and persistence.

##### TODO for Release/General
- Make some calls aysnc after testing code on single thread
Expand All @@ -47,7 +49,6 @@ the communication between client and server works flawlessly. Cache is fine. :)
- Allow users to change the communication protocol: TCP/UDP/Unix Sockets.

##### TODO for persistent storage
- Write binary objects (image, pdfs, etc) to the disk
- When key is not in the cache, check for it in the disk/database

##### TODO for Cache Design
Expand Down
22 changes: 19 additions & 3 deletions distcache/cache_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ class CacheServer:
"""
Implements cache client. It has different types of cache eviction policies at disposal.
It responds to queries of cache server.
By default all of the operations that the cache server carries out are logged and can be used to reconstruct
the cache in the event of error or server shutdown.
The snapshots are however the very cache that will be the result of replaying logs but may miss some of the
latest server operations. It will be faster to rebuild from snapshot but saving snapshots are time consuming
operations.
"""

def __init__(self, host='localhost', port=2050, capacity=100, expire=0, filename=0):
Expand Down Expand Up @@ -45,7 +52,7 @@ def __init__(self, host='localhost', port=2050, capacity=100, expire=0, filename
self.LISTEN_CAPACITY = config.LISTEN_CAPACITY

# Logging
self.dbname = 'cache.json' if filename is None else filename
self.dbname = 'cache.db' if filename is None else filename
self.logger = logger.Logger(filename=self.dbname, mode='a', batch_size=1)

self.save_every_k_seconds = config.save_every_k_seconds
Expand All @@ -66,13 +73,22 @@ def snapshot(self):

def reconstruct(self):
"""
Load the cache from the database
Load the cache from the latest database snapshot
:return: None
"""
if os.path.exists(self.dbname):
with open(self.dbname, mode='rb') as db:
self.cache = pickle.load(db)

def replay_log(self):
"""
Rebuild the cache by treating each of the logged objects as a client operation.
:return: None
"""
logs = self.logger.read_logs()
for log in logs:
self.parse_message(log)

def parse_message(self, message):
"""
Parse and execute the command
Expand All @@ -81,7 +97,7 @@ def parse_message(self, message):
"""
# This should run in a separate thread
message = pickle.loads(message)
self.logger.log(message)
self.logger.log_bytes(message)

if message[0] == "set":
return self.cache.set(message[1], message[2])
Expand Down
56 changes: 48 additions & 8 deletions distcache/logger.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import json
from collections import deque
import pickle


class Logger:
"""
Implements a simple logger
"""

def __init__(self, filename='cache.json', mode='a', batch_size=1):
def __init__(self, filename='cache.db', mode='ab', batch_size=1):
"""
Initializes Logger object:
Expand All @@ -16,24 +16,48 @@ def __init__(self, filename='cache.json', mode='a', batch_size=1):
To write objects in batches, increase the value of batch_size.
"""
self.filename = filename
self.mode = mode
self.batch_size = batch_size
self.logs = deque() # List of logs to be written to the file
self.file = open(file=filename, mode=mode)

def log(self, object):
def log_bytes(self, object):
"""
Write objects in batches to the log file
Write objects of bytes type in batches to the log file
:param object: basically any serialized object
for instance:
obj = ("life", "is wonderful")
byte_obj = pickle.dumps(obj)
logger = Logger()
logger.log_bytes(byte_obj)
You can do the same with images or pdfs
obj = open("some_image_file.png", mode='rb').read()
logger = Logger()
logger.log_bytes(byte_obj)
Note: This function does not check if the object is not bytes. The user should do the checks.
:returns: None
"""
self.logs.append(object)
if len(self.logs) == self.batch_size:
self.flush() # TODO: Gotta be async

def log(self, object):
"""
Write objects that are not bytes type in batches to the log file
:param object: basically anything [int, str, list, etc]
For instance:
Object instances:
("set", "hi", "greeting"),
("set", 1, 100),
("del", 1)
:returns: None
"""
self.logs.append(object)
self.logs.append(pickle.dumps(object))
if len(self.logs) == self.batch_size:
self.flush() # TODO: Gotta be async

Expand All @@ -44,12 +68,28 @@ def flush(self):
"""
n = len(self.logs)
for i in range(n):
self.file.write(json.dumps(self.logs.popleft()))
self.file.write('\n')
pickle.dump(self.logs.popleft(), self.file)

def close(self):
"""
Close the logger. Close the log file safely
"""
self.flush()
self.file.close()

def read_logs(self):
"""
Reads logs from the file.
"""
self.file.close()
objs = []
with open(self.filename, mode='rb') as file:
while True:
try:
obj = pickle.load(file)
objs.append(obj)
except EOFError as e:
break

self.file = open(self.filename, mode='ab') # Open the file again but in an append mode of-course.
return objs
3 changes: 2 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ See [wiki](https://github.com/wasimusu/distcache/wiki) or [readthedocs](https://
4. Since, distcache has pure python implementation the installation process should painless. It's makes it easier to get started up and running.
5. Its' architecture assumes that the cache clients and servers can fail and plans for it. The impact is minimal on adding and removing cache servers.
6. Snapshot the servers at regular intervals to avoid cold starts upon crash or planned shutdowns
7. Thread safe increment and decrement operations on keys.
7. Log replays also available for slow but complete reconstruction of the cache upon server crash, error or shutdown.
8. Thread safe increment and decrement operations on keys.

### Coming Up
1. Health monitoring of clients and servers.
Expand Down

0 comments on commit a8efa34

Please sign in to comment.