In [2]:
import pandas as pd
import numpy as np
import scipy.sparse as sp

### Get Sample Func 

In [3]:
data = pd.read_csv("../data/interim/selected_interactions.csv", parse_dates=['last_watch_dt'])

gap = pd.Timedelta(days=2)
start_test_date = data['last_watch_dt'].max() - pd.Timedelta(days=7)
end_test_date = data['last_watch_dt'].max()
start_train_date = data['last_watch_dt'].max() - pd.Timedelta(days=14) - gap
end_train_date = data['last_watch_dt'].max() - gap - pd.Timedelta(days=7)

## splitting 
train_sample = data[(data['last_watch_dt'] > start_train_date) & (data['last_watch_dt'] < end_train_date)]
test_sample = data[(data['last_watch_dt'] > start_test_date) & (data['last_watch_dt'] < end_test_date)]

## saving 
train_sample.to_csv("../data/interim/one_week_train_sample.csv", index=False)
test_sample.to_csv("../data/interim/one_week_test_sample.csv", index=False)

### Get Matrix Func

In [4]:
users_inv_mapping = dict(enumerate(train_sample['user_id'].unique()))
users_mapping = {v: k for k, v in users_inv_mapping.items()}
top_N = 10
last_n_days = 7
items_inv_mapping = dict(enumerate(train_sample['item_id'].unique()))
items_mapping = {v: k for k, v in items_inv_mapping.items()}

In [5]:
def get_coo_matrix(df, 
                   user_col='user_id', 
                   item_col='item_id', 
                   weight_col=None, 
                   users_mapping={}, 
                   items_mapping={}):
    
    if weight_col is None:
        weights = np.ones(len(df), dtype=np.float32)
    else:
        weights = df[weight_col].astype(np.float32)

    interaction_matrix = sp.coo_matrix((
        weights, 
        (
            df[user_col].map(users_mapping.get), 
            df[item_col].map(items_mapping.get)
        )
    ))
    return interaction_matrix

In [6]:
coo_mat = get_coo_matrix(train_sample, users_mapping=users_mapping, items_mapping=items_mapping)