In [None]:
#!pip install redis
#!sudo snap install redis

In [1]:
import pandas as pd
import numpy as np
import redis
import concurrent.futures
import pickle as pkl
from loguru import logger

In [2]:
USE_REDIS_CACHE = True

logger.info("Connect to redis")
r = redis.Redis(host="127.0.0.1", port=6379, db=1)

if r.ping():
    logger.info("Connecting to redis works!")
else:
    logger.error("Problem connect to redis")


def cache(key, use_cache=USE_REDIS_CACHE):
    def def_decorator(func):
        def wrapper(*args, **kwargs):
            overwrite_cache = kwargs.get("overwrite_cache", False)
            logger.info(overwrite_cache)
            if False == overwrite_cache and use_cache:
                buff = r.get(key)
                if buff:
                    return pkl.loads(buff)
                    logger.info("Finish load cache for {}".format(key))
            result = func(*args)
            logger.info("Finish load original for {}".format(key))
            if (False == use_cache) or (use_cache and overwrite_cache) or (not buff):
                r.set(key, pkl.dumps(result, pkl.HIGHEST_PROTOCOL))
                logger.info("Finish save cache for {}".format(key))
            return result
        return wrapper
    return def_decorator

def list_key(str_find):
    k=[]
    for key in r.scan_iter(str_find):
        k.append(str(key))
    return k

def find_user_id(data):
    list_user_id = []
    if data:
        list_user_id = [x.decode('UTF-8') for x in set(r.mget(data)) if x]
    return list_user_id


def get_from_redis(vals, max_workers=200):
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        return [x for x in executor.map(lambda x: find_user_id(x), vals)]
    
def get_cache_on_key(key):
    buff = r.get(key)
    if buff:
        return pkl.loads(buff)
    else:
        logger.info('Not found data in redis')

2023-02-05 10:20:52.240 | INFO     | __main__:<module>:3 - Connect to redis
2023-02-05 10:20:52.246 | INFO     | __main__:<module>:7 - Connecting to redis works!


In [3]:
def create_random_df():
    data = {'col1': np.random.randint(0,100,10),
            'col2': np.random.normal(100,10,10),
            'col3': np.random.choice(['A','B','C','D'],10)}
    return pd.DataFrame(data)
df=create_random_df()
df1=create_random_df()

In [4]:
df

Unnamed: 0,col1,col2,col3
0,75,103.665576,C
1,81,89.810697,D
2,49,106.664496,C
3,87,104.459514,D
4,85,115.248025,A
5,59,79.259704,D
6,0,90.992853,B
7,2,91.481207,B
8,95,101.146953,C
9,71,89.994226,A


In [5]:
df1

Unnamed: 0,col1,col2,col3
0,94,87.956357,D
1,87,92.874043,B
2,61,114.338332,B
3,57,93.173889,D
4,4,106.019589,B
5,18,80.831515,C
6,7,91.840946,B
7,57,85.623549,D
8,25,97.477843,A
9,73,115.181631,D


In [6]:
# create key
@cache(key="df.test.1")
def __get_day_sales(df, overwrite_cache=False):
    return df

In [7]:
# save to redis
__get_day_sales(df, overwrite_cache=True)

2023-02-05 10:21:00.624 | INFO     | __main__:wrapper:16 - True
2023-02-05 10:21:00.626 | INFO     | __main__:wrapper:23 - Finish load original for df.test.1
2023-02-05 10:21:00.632 | INFO     | __main__:wrapper:26 - Finish save cache for df.test.1


Unnamed: 0,col1,col2,col3
0,75,103.665576,C
1,81,89.810697,D
2,49,106.664496,C
3,87,104.459514,D
4,85,115.248025,A
5,59,79.259704,D
6,0,90.992853,B
7,2,91.481207,B
8,95,101.146953,C
9,71,89.994226,A


In [8]:
# list key in redis
str_find="df*"
list_key(str_find)

["b'df.test.1'"]

In [9]:
# get from redis
key="df.test.1"
df2=get_cache_on_key(key)
df2

Unnamed: 0,col1,col2,col3
0,75,103.665576,C
1,81,89.810697,D
2,49,106.664496,C
3,87,104.459514,D
4,85,115.248025,A
5,59,79.259704,D
6,0,90.992853,B
7,2,91.481207,B
8,95,101.146953,C
9,71,89.994226,A


In [10]:
# don't set to redis
__get_day_sales(df1, overwrite_cache=False)
# get on key
key="df.test.1"
df2=get_cache_on_key(key)
df2

2023-02-05 10:21:07.534 | INFO     | __main__:wrapper:16 - False


Unnamed: 0,col1,col2,col3
0,75,103.665576,C
1,81,89.810697,D
2,49,106.664496,C
3,87,104.459514,D
4,85,115.248025,A
5,59,79.259704,D
6,0,90.992853,B
7,2,91.481207,B
8,95,101.146953,C
9,71,89.994226,A


In [11]:
# save to redis
__get_day_sales(df1, overwrite_cache=True)
# get on key
key="df.test.1"
df2=get_cache_on_key(key)
df2

2023-02-05 10:21:10.059 | INFO     | __main__:wrapper:16 - True
2023-02-05 10:21:10.061 | INFO     | __main__:wrapper:23 - Finish load original for df.test.1
2023-02-05 10:21:10.063 | INFO     | __main__:wrapper:26 - Finish save cache for df.test.1


Unnamed: 0,col1,col2,col3
0,94,87.956357,D
1,87,92.874043,B
2,61,114.338332,B
3,57,93.173889,D
4,4,106.019589,B
5,18,80.831515,C
6,7,91.840946,B
7,57,85.623549,D
8,25,97.477843,A
9,73,115.181631,D
