In [1]:
import random
import datetime
import numpy as np

import scipy.sparse as sp
import pandas as pd

from itertools import islice, cycle
from more_itertools import pairwise
from typing import List
from implicit.nearest_neighbours import TFIDFRecommender, BM25Recommender, CosineRecommender

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

import seaborn as sns
sns.set(style='whitegrid')
sns.set(rc={'figure.figsize':(17, 9)})

from IPython.core.display import display, HTML, clear_output
display(HTML('<style>.container { width:80% !important; }</style>'))
display(HTML('<style>.prompt { min-width:10ex !important; }</style>'))
display(HTML('<style>div#notebook { font-size:12px !important; }</style>'))

  from IPython.core.display import display, HTML, clear_output
  from IPython.core.display import display, HTML, clear_output


In [2]:
users_df = pd.read_csv('data/users.csv',)
items_df = pd.read_csv('data/items.csv',)
interactions_df = pd.read_csv('data/interactions.csv', parse_dates=['last_watch_dt'])

## Popular Model

In [3]:
class PopularRecommender():

	def __init__(self, days:int, max_k:int, item_col:str='item_id', dt_col:str='date'):
		self.item_col = item_col
		self.dt_col = dt_col
		self.days = days
		self.max_k = max_k
		self.recommendations = []
	
	def fit(self, df) -> None:
		min_date = df[self.dt_col] - pd.DateOffset(days=30)
		self.recommendations = df.loc[df[self.dt_col] > min_date, self.item_col].value_counts().head(self.max_k).index.values
    		
	def recommend(self, users:List[int], N=10):

		recs = self.recommendations[:N]
		return list(islice(cycle([recs]), len(users)))

In [4]:
interactions_df['last_watch_dt'].min(), interactions_df['last_watch_dt'].max()
split_date = interactions_df['last_watch_dt'].max() - pd.DateOffset(days=45) 

In [5]:
train_data = interactions_df[interactions_df['last_watch_dt'] <= split_date]
test_data = interactions_df[interactions_df['last_watch_dt'] > split_date]

In [6]:
from implicit.nearest_neighbours import TFIDFRecommender
model = TFIDFRecommender()

In [7]:
def get_coo_matrix(df, 
                   user_col='user_id', 
                   item_col='item_id', 
                   weight_col=None, 
                   users_mapping={}, 
                   items_mapping={}):
    
    if weight_col is None:
        weights = np.ones(len(df), dtype=np.float32)
    else:
        weights = df[weight_col].astype(np.float32)

    interaction_matrix = sp.coo_matrix((
        weights, 
        (
            df[user_col].map(users_mapping.get), 
            df[item_col].map(items_mapping.get)
        )
    ))
    return interaction_matrix

In [8]:
users_inv_mapping = dict(enumerate(interactions_df['user_id'].unique()))
users_mapping = {v: k for k, v in users_inv_mapping.items()}

items_inv_mapping = dict(enumerate(interactions_df['item_id'].unique()))
items_mapping = {v: k for k, v in items_inv_mapping.items()}

train_mat = get_coo_matrix(
	train_data,
	users_mapping=users_mapping,
	items_mapping=items_mapping,
).tocsr()

model = TFIDFRecommender(num_threads=8)
model.fit(train_mat.T)

  3%|▎         | 25667/962177 [00:45<28:14, 552.80it/s]

KeyboardInterrupt: 

  3%|▎         | 25677/962177 [01:00<28:14, 552.80it/s]