In [1]:
from pipeline import get_data, get_user_recommendation, train_and_evalute_model_pipeline, benchmark
import pandas as pd
import numpy as np
from surprise.prediction_algorithms.algo_base import AlgoBase
from surprise import SVD
from surprise.prediction_algorithms.knns import KNNBasic
from surprise.prediction_algorithms.matrix_factorization import NMF
from surprise.model_selection import cross_validate

data = get_data(from_surprise=True)

### train model manually, one by one 

In [7]:
# # train the KNNBasic model
kwargs_KNN = {'k':40, 'min_k':1 ,'sim_options': {'user_based': False, 'name': 'pearson'}}
model, metrics_dict = train_and_evalute_model_pipeline('KNN', model_kwargs=kwargs_KNN, from_surprise=True, test_size=0.2)
# # train the NMF model
kwargs_NMF = {'n_factors':15, 'n_epochs': 50, 'verbose':False}
model, metrics_dict = train_and_evalute_model_pipeline('NMF', model_kwargs=kwargs_NMF, from_surprise=True, test_size=0.2)
# # train the SVD model
kwargs_SVD = {}
model, metrics_dict = train_and_evalute_model_pipeline('SVD', model_kwargs=kwargs_SVD, from_surprise=True, test_size=0.2)


{'RMSE': 0.9349585947285977, 'MAE': 0.9349585947285977}

### benchmark on a seq of models of your choices

In [2]:
model_dict = {'KNN user based cosine': 
    {'algo': 'KNN', 'model_kwargs':
        {'k':40, 'min_k':1 ,'sim_options': {'user_based': False, 'name': 'cosine'}},
        'from_surprise':True, 'test_size':0.2},
 'KNN user based pearson':
    {'algo': 'KNN', 'model_kwargs':
        {'k':40, 'min_k':1 ,'sim_options': {'user_based': False, 'name': 'pearson'}},
        'from_surprise':True, 'test_size':0.2},
 'NMF user based':
    {'algo': 'NMF', 'model_kwargs':
        {'n_factors':15, 'n_epochs': 50, 'verbose':False},
        'from_surprise':True, 'test_size':0.2},
 'SVD user based':
    {'algo': 'SVD', 'model_kwargs':{}, 'from_surprise':True, 'test_size':0.2}
}

df_res, model_dict = benchmark(model_dict)
display(df_res)

------------------processing KNN user based cosine ------------------
Computing the cosine similarity matrix...
Done computing similarity matrix.
------------------processing KNN user based pearson ------------------
Computing the pearson similarity matrix...
Done computing similarity matrix.
------------------processing NMF user based ------------------
------------------processing SVD user based ------------------


Unnamed: 0,model name,RMSE,MAE,fit_time(ms)
0,KNN user based cosine,1.02643,1.02643,1500.095129
1,KNN user based pearson,1.041104,1.041104,2044.845104
2,NMF user based,0.961725,0.961725,5158.902168
3,SVD user based,0.938988,0.938988,4610.15439


In [4]:
print(model_dict)

{'KNN user based cosine': <surprise.prediction_algorithms.knns.KNNBasic object at 0x7f978cbd3370>, 'KNN user based pearson': <surprise.prediction_algorithms.knns.KNNBasic object at 0x7f978cc1beb0>, 'NMF user based': <surprise.prediction_algorithms.matrix_factorization.NMF object at 0x7f978b6de370>, 'SVD user based': <surprise.prediction_algorithms.matrix_factorization.SVD object at 0x7f978cc1b400>}


### Cross validation

In [5]:
cross_validate(model_dict['KNN user based cosine'], data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    1.0230  1.0325  1.0274  1.0194  1.0286  1.0262  0.0045  
MAE (testset)     0.8094  0.8166  0.8128  0.8059  0.8137  0.8117  0.0037  
Fit time          1.51    1.38    1.39    1.40    1.40    1.42    0.05    
Test time         3.04    3.12    3.21    3.12    3.31    3.16    0.09    


{'test_rmse': array([1.02303874, 1.03246724, 1.02742041, 1.01943299, 1.02863252]),
 'test_mae': array([0.80944126, 0.81656332, 0.81277952, 0.80590468, 0.81365753]),
 'fit_time': (1.5068390369415283,
  1.3813419342041016,
  1.386152982711792,
  1.3997838497161865,
  1.4022619724273682),
 'test_time': (3.0384819507598877,
  3.1199748516082764,
  3.214918375015259,
  3.1224870681762695,
  3.3084840774536133)}

In [2]:
data

<surprise.dataset.DatasetAutoFolds at 0x7fc3c467f610>