In [1]:
# ! pip install surprise --user

In [2]:
from sklearn.metrics import *

In [3]:
from surprise.model_selection import train_test_split
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise.model_selection import cross_validate
from surprise.model_selection import KFold
from surprise import Reader
from surprise import Dataset
import pandas as pd
import numpy as np
from surprise import accuracy
from surprise.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [4]:
np.random.seed(10)

In [5]:
# path to dataset file

utility_table = pd.read_csv('yelp_table_utility.csv')

# utility_table1 = pd.read_csv('C:/Users/vasha/OneDrive/unsupervised/Project/Data/yelp_table_utility1.csv')
#reader = Reader(name=None, line_format=u'user item rating', sep=None, rating_scale=(1, 5), skip_lines=0

# A reader is still needed but only the rating_scale param is requiered.
reader = Reader(rating_scale=(1, 5))

# The columns must correspond to user id, item id and ratings (in that order).
data = Dataset.load_from_df(utility_table[['user_id', 'name', 'stars']], reader)


In [6]:
utility_matrix = pd.read_csv('yelp_utility_matrix.csv')
utility_matrix

Unnamed: 0,1001 Nights Hookah & Restaurant,101 Asian Buffet,1130 The Restaurant,12 West Brewing,12th Fairway Bar & Grill,12th Street Barbers,16th Street Sports Bar & Grill,1865 Coffee,19th Donut Hole,1UP Barbershop,...,Zen Chinese Food,Zen's Cafe,Zev's New York Barber Shop,Zona Hotel & Suites Scottsdale,Zorba's Greek Cafe,barre3 Paradise Valley,barre3 Scottsdale,i-Tea,minibarbershop,teres : A Nail Bar
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31819,,,,,,,,,,,...,,,,,,,,,,
31820,,,,,,,,,,,...,,,,,,,,,,
31821,,,,,,,,,,,...,,,,,,,,,,
31822,,,,,,,,,,,...,,,,,,,,,,


In [7]:
utility_table

Unnamed: 0,user_id,name,stars
0,XPZVfP7DQCSL3Nb9t2vxsA,Boba Tea House,5.0
1,v7FPnMzdbl6J7U_8H1BWZA,Caribou Coffee And Einsteins Bros Bagels,1.0
2,U2sN2-HGvh27FyXKFlvkBg,Songbird Coffee & Tea House,1.0
3,qkQ5iVnEdqSxjd3faoRTpw,IL Forno Italian Restaurant,4.0
4,XQDfhRd54J7OgimNNxdAcg,Fleming's Prime Steakhouse & Wine Bar- Chandler,3.0
...,...,...,...
210328,g28M9mRqUH7nd97By6K0BA,Persian Garden Cafe,5.0
210329,xNGBY48SkzWcwSd88CYoOw,True Food Kitchen,5.0
210330,_PThbLGMJYcBxil2HZeaiw,El Sol Mexican Cafe,5.0
210331,O7ojHP59iSi32uhEDUR44A,La Bocca Urban Pizzeria + Wine Bar,4.0


In [7]:
# data.head(10)

# Build Recomender Systems

## Global Baseline

In [8]:
def results_func(gb_result_dict, method_name ) :
    gb_result_array = gb_result_dict['test_rmse']
    gb_result_array = np.concatenate([np.array([method_name] ) , np.around(gb_result_array ,decimals = 5 ) ] ) 
    gb_result_array = np.concatenate([gb_result_array , np.around( [gb_result_dict['test_rmse'].mean() ], decimals = 5 ) ]  )
    gb_result_array = np.concatenate([gb_result_array , np.around( [gb_result_dict['test_rmse'].std() ] , decimals = 5 ) ] )
    gb_result_array = np.reshape(gb_result_array, (1, 8))
    gb_result_df = pd.DataFrame(gb_result_array , columns = ['Model_Name','Fold 1' , 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5', 'mean', 'std. dev'])
    return gb_result_df
    
    

In [16]:
def baseline_func(utility_matrix):
    overall_mu = utility_matrix.mean().mean()
    business_mu = utility_matrix.mean()
    user_mu = utility_matrix.mean(axis = 1)
    bias_user =  overall_mu -  user_mu.values 
    bias_user = np.reshape(bias_user , (len(bias_user) , 1 ) )
    bias_business = overall_mu - business_mu.values 
    bias_business = np.reshape(bias_business , (1, len(bias_business)) )
    results_array = overall_mu + bias_user + bias_business
    return results_array

In [17]:
import math
def cv_algorithm(X, folds , print_option = True):
    n = X.shape[0]
    k = folds
    fold_n = math.floor(n/k)
    X_train_folds = {}
    val_errors = {}
    ### Divide data into k-folds
    for j in range(folds):
            X_train_folds[j] = X[(j)*fold_n : (j+1)*fold_n] 
            
    ### Run Experiments
    for i in range(k):
        X_train_total = np.empty((1 ,X.shape[1]))
        X_val_fold = X_train_folds[i]
        X_val_zeros = np.zeros(X_train_folds[i].shape)
        
        for m in range(k):
            if m != i:
                X_train_total = np.concatenate(([X_train_total,X_train_folds[m]] ) ,axis = 0)
                
        X_train_total = np.concatenate(([X_train_total, X_val_zeros]) ,axis = 0)
        X_val_preds = baseline_func(pd.DataFrame(X_train_total))
        X_val_preds_1 = X_val_preds[(k - 1 )*fold_n : (k)*fold_n]
        val_errors[i] = np.sqrt(mean_squared_error(X_val_fold, X_val_preds_1))
    val_errors_list = list(val_errors.values())
    avg_val_error = np.mean(np.array(val_errors_list))
    if print_option:
        print("average of {}-fold cross-validation error = {} \n" .format(k ,avg_val_error) )
    return avg_val_error, val_errors_list

In [18]:
overall_mu = utility_matrix.mean().mean()
# print(overall_mu )

fold_values = [5]
avg_val_errors_dict = {}
for l, fold in enumerate(fold_values):
    avg_val_errors_dict[fold] = cv_algorithm( utility_matrix.replace(np.nan, overall_mu ).values , fold)
    

average of 5-fold cross-validation error = 2.29076738009965 



In [15]:
avg_val_errors_df = pd.DataFrame(np.array(avg_val_errors_dict[5][1]),
                                 index = ['Fold 1','Fold 2','Fold 3','Fold 4','Fold 5'],
                                 columns = ['Baseline Only'] )
avg_val_errors_df    


Unnamed: 0,Baseline Only
Fold 1,0.218827
Fold 2,0.218833
Fold 3,0.221082
Fold 4,0.217928
Fold 5,0.215798


In [13]:
baseline_dict = {}
baseline_dict['test_rmse'] = np.squeeze(avg_val_errors_df.values ,axis = -1)
baseline_dict

{'test_rmse': array([3.81655858, 3.81659503, 3.81661849, 3.816497  , 3.81654677])}

In [18]:
pd_gb = results_func(baseline_dict, method_name = 'Baseline Correct' ) 
pd_gb

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,Baseline Correct,2.29053,2.29079,2.29074,2.29093,2.29083,2.29077,0.00013


## Item Item Collaborative filtering

In [210]:
np.random.seed(10)

In [222]:



param_grid_iicf = {'k': [190, 200, 210], 'min_k':[9],'sim_options': {'name': ["pearson"], "min_support": [2,3, 4],'user_based': [False]} }






In [223]:
iicf_gs = GridSearchCV(KNNBasic, param_grid_iicf, measures=['rmse'], cv=5)

iicf_gs.fit(data)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.

In [213]:
print(iicf_gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(iicf_gs.best_params['rmse'])

1.2848934838710584
{'min_k': 9, 'k': 200, 'sim_options': {'min_support': 3, 'user_based': False, 'name': 'pearson'}}


In [214]:
item_item_cf = iicf_gs.best_estimator['rmse']
item_item_cf_dict = cross_validate(item_item_cf, data, cv = 5, verbose=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     1.0463  1.0491  1.0523  1.0477  1.0487  1.0488  0.0020  
RMSE (testset)    1.2824  1.2861  1.2901  1.2828  1.2857  1.2854  0.0028  
Fit time          0.76    0.81    0.81    0.80    0.82    0.80    0.02    
Test time         1.10    1.66    1.11    1.10    1.10    1.21    0.22    


In [215]:
item_item_cf_dict['test_rmse']

array([1.28238765, 1.28609499, 1.29008302, 1.28277166, 1.28570481])

In [216]:
pd_iicf = results_func(item_item_cf_dict, method_name = 'Item-Item CF' ) 
pd_iicf 

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,Item-Item CF,1.28239,1.28609,1.29008,1.28277,1.2857,1.28541,0.00277


## Base line + item item collaborative filtering

In [26]:
param_grid_iicfgb = {'k': [190],'min_k':[10] , 'sim_options': {'name': ['cosine','pearson'],"min_support": [ 4],'user_based': [False]}, 'bsl_options': {'method': ['als'], 'n_epochs': [10]}}





In [27]:
iicfgb_gs = GridSearchCV(KNNBaseline, param_grid_iicfgb, measures=['rmse'], cv=3)

iicfgb_gs.fit(data)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Com

Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity 

In [28]:
print(iicfgb_gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(iicfgb_gs.best_params['rmse'])

1.1807207041810945
{'min_k': 10, 'bsl_options': {'n_epochs': 10, 'method': 'als'}, 'k': 190, 'sim_options': {'min_support': 4, 'user_based': False, 'name': 'pearson'}}


In [33]:
print(iicfgb_gs.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(iicfgb_gs.best_params['rmse'])

1.1807207041810945
{'min_k': 10, 'bsl_options': {'n_epochs': 10, 'method': 'als'}, 'k': 190, 'sim_options': {'min_support': 4, 'user_based': False, 'name': 'pearson'}}


In [71]:
item_item_base_line = iicfgb_gs.best_estimator['rmse']
iicfgb_result_dict = cross_validate(item_item_base_line, data, cv = 5, verbose=True)

Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the pearson similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
MAE (testset)     0.9451  0.9429  0.9454  0.9390  0.9403  0.9425  0.0025  
RMSE (testset)    1.1774  1.1751  1.1808  1.1718  1.1737  1.1758  0.0031  
Fit time          1.17    1.25    1.25    1.24    1.28    1.24    0.04    
Test time         1.65    1.62    1.29    1.31    1.30    1.43    0.16    


In [72]:
pd_iicfgb = results_func(iicfgb_result_dict, method_name = 'Item-Item+Baseline' ) 
pd_iicfgb 

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,Item-Item+Baseline,1.17736,1.17511,1.1808,1.17178,1.17369,1.17575,0.00312


## SVD only


In [135]:
param_grid_svd_only = {'n_epochs': [20], 'n_factors':[1, 2], 'lr_all':[0],  'reg_all':[0] , 'verbose': ['False']}

In [136]:
gs_svd_only = GridSearchCV(SVD, param_grid_svd_only, measures=['rmse'], cv=5)

In [137]:
gs_svd_only.fit(data)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [138]:
print(gs_svd_only.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_only.best_params['rmse'])

1.290771196414116
{'verbose': 'False', 'reg_all': 0, 'n_epochs': 20, 'lr_all': 0, 'n_factors': 1}


In [139]:
print(gs_svd_only.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_only.best_params['rmse'])

1.290771196414116
{'verbose': 'False', 'reg_all': 0, 'n_epochs': 20, 'lr_all': 0, 'n_factors': 1}


In [140]:
svd_only_best = gs_svd_only.best_estimator['rmse']
svd_only_dict = cross_validate(svd_only_best, data, cv = 5, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [142]:
pd_svd_only= results_func(svd_only_dict, method_name = 'SVD' ) 
pd_svd_only

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,SVD,1.29399,1.28812,1.29522,1.28383,1.29282,1.2908,0.00423


### SVD  + regularization

- Regularization parameter is set to '0.02' by default in surprise package.

In [81]:
import random
random.seed(10)

In [82]:
param_grid_svd_normal = {'n_epochs': [20], 'n_factors':[1, 2], 'verbose': ['False']}

In [83]:
gs_svd_normal = GridSearchCV(SVD, param_grid_svd_normal, measures=['rmse'], cv=5)

In [84]:
gs_svd_normal.fit(data)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [85]:
print(gs_svd_normal.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_normal.best_params['rmse'])

1.1688789454444621
{'verbose': 'False', 'n_epochs': 20, 'n_factors': 1}


In [88]:
print(gs_svd_normal.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_normal.best_params['rmse'])

1.1688789454444621
{'verbose': 'False', 'n_epochs': 20, 'n_factors': 1}


In [86]:
svd_normal_best = gs_svd_normal.best_estimator['rmse']
svd_normal_dict = cross_validate(svd_normal_best, data, cv = 5, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [134]:
pd_svd_reg= results_func(svd_normal_dict, method_name = 'SVD+reg' ) 
pd_svd_reg 

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,SVD+reg,1.17011,1.1685,1.16599,1.17219,1.16735,1.16883,0.00216


## SVD + Base line

In [185]:
param_grid_svd_gb = {'n_epochs': [20], 'n_factors':[100],'lr_all': [0.01], 'reg_all':[0.2], 'biased': [True], 'verbose': ['False']}




In [186]:
gs_svd_gb = GridSearchCV(SVD, param_grid_svd_gb, measures=['rmse'], cv=5)

In [187]:
gs_svd_gb.fit(data)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [184]:
print(gs_svd_gb.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_gb.best_params['rmse'])

1.1665189540801095
{'verbose': 'False', 'n_epochs': 20, 'reg_all': 0.2, 'biased': True, 'lr_all': 0.01, 'n_factors': 200}


In [188]:
print(gs_svd_gb.best_score['rmse'])

# combination of parameters that gave the best RMSE score
print(gs_svd_gb.best_params['rmse'])

1.1658087276481113
{'verbose': 'False', 'n_epochs': 20, 'reg_all': 0.2, 'biased': True, 'lr_all': 0.01, 'n_factors': 100}


In [189]:
svdgb_reg_best = gs_svd_gb.best_estimator['rmse']
svdgb_reg_dict = cross_validate(svdgb_reg_best, data, cv = 5, verbose=True)

Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing epoch 11
Processing epoch 12
Processing epoch 13
Processing epoch 14
Processing epoch 15
Processing epoch 16
Processing epoch 17
Processing epoch 18
Processing epoch 19
Processing epoch 0
Processing epoch 1
Processing epoch 2
Processing epoch 3
Processing epoch 4
Processing epoch 5
Processing epoch 6
Processing epoch 7
Processing epoch 8
Processing epoch 9
Processing epoch 10
Processing

In [190]:
pd_svdgb_reg= results_func(svdgb_reg_dict, method_name = 'SVD+Baseline+reg' ) 
pd_svdgb_reg

Unnamed: 0,Model_Name,Fold 1,Fold 2,Fold 3,Fold 4,Fold 5,mean,std. dev
0,SVD+Baseline+reg,1.16508,1.17069,1.16394,1.16605,1.16523,1.1662,0.00234


In [None]:
### 'lr_all': [0.01, 0.03,0.1], 'reg_all':[0.03, 0.05, 0.1, 0.3],

### Concat results to store in csv file

In [1]:
overall_result_df = pd.concat([pd_gb, pd_iicf , pd_iicfgb , pd_svd_only , pd_svd_reg, pd_svdgb_reg] ,axis = 0 , ignore_index = True)









NameError: name 'pd' is not defined

In [37]:
overall_result_df

NameError: name 'overall_result_df' is not defined

In [219]:
overall_result_df.to_csv(r'results_dataframe.csv')

## For concatenating with already obtained values (optional) - not required

In [None]:
input_results = pd.read_csv(r'results_dataframe.csv', index_col = 0)
input_results

In [None]:
overall_result_changed_df = pd.concat([input_results, pd_gb ] ,axis = 0)
overall_result_changed_df 

In [21]:
overall_result_changed_df.to_csv(r'results_dataframe.csv')

In [54]:
# input_results.iloc[0 , : ] = np.squeeze(pd_gb.values ,axis = 0)
# input_results