In [1]:
import numpy as np
import pandas as pd
import random

from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import KNNBasic
from surprise import KNNWithZScore
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

from surprise.model_selection import GridSearchCV

In [2]:
my_seed = 0
random.seed(my_seed)
np.random.seed(my_seed)

In [3]:
# Train data set with Normalized Purchase
User_Prod_NP = pd.read_csv('./Files_Folder/User_Prod_NormPurc.csv')

# Statistics of each Product_ID in train data sets
train_Prod_sta = pd.read_csv('./Files_Folder/train_User_Prod_stat.csv') 

# Statistics of new Product_IDs; those are Prodcut_IDs in test data set but not in train data set
New_Prod_LUT = pd.read_csv('./Files_Folder/New_Product_Details.csv')

In [4]:
# A reader is still needed but only the rating_scale param is required ... Comment from Surprise library
reader = Reader(rating_scale=(1, 5))

In [5]:
# The columns must correspond to user id, item id and ratings (in that order) ... Comment from Surprise library
data = Dataset.load_from_df(User_Prod_NP[['User_ID', 'Product_ID', 'Normalized_Purchase']], reader)

In [6]:
# n_epochs as the name suggest, lr_all = learning rate, reg_all = regularization parameter
param_grid = {'n_epochs': [100, 105, 110, 115, 120, 125, 130], 'lr_all': [0.005],'reg_all': [0.1]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.8832248122355065
{'n_epochs': 130, 'lr_all': 0.005, 'reg_all': 0.1}


In [7]:
param_grid = {'n_epochs': [130, 135, 140, 145, 150, 155, 160], 'lr_all': [0.005],'reg_all': [0.1]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.8834163377840737
{'n_epochs': 130, 'lr_all': 0.005, 'reg_all': 0.1}


In [8]:
param_grid = {'n_epochs': [130], 'lr_all': [0.004, 0.005, 0.006],'reg_all': [0.09, 0.1, 0.11]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.8837866146036214
{'n_epochs': 130, 'lr_all': 0.006, 'reg_all': 0.11}


In [9]:
param_grid = {'n_epochs': [130], 'lr_all': [0.006, 0.007, 0.008],'reg_all': [0.11, 0.12, 0.13]}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=3)

gs.fit(data)

# best RMSE score
print(gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

0.8836070171304421
{'n_epochs': 130, 'lr_all': 0.006, 'reg_all': 0.11}
