# Model-Based recommender system
## Plan
* Dataset of WineID,UserID,Rating
* Train using MF(SVD)
* Hyperparameter optimization
* Evaluate

In [1]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error
from surprise import SVD, Dataset, Reader



In [2]:
# Data loading
base_path = '..\..\data\main'

train = pd.read_csv(f'{base_path}\\trainset.csv', usecols=['UserID', 'WineID', 'Rating'])
test_uwarm_iwarm = pd.read_csv(f'{base_path}\\testset_warm_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_uwarm_icold = pd.read_csv(f'{base_path}\\testset_warm_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_iwarm = pd.read_csv(f'{base_path}\\testset_cold_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_icold = pd.read_csv(f'{base_path}\\testset_cold_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])

In [3]:
print('Train set shape:', train.shape)
print('Test set shape (warm user, warm item):', test_uwarm_iwarm.shape)
print('Test set shape (warm user, cold item):', test_uwarm_icold.shape)
print('Test set shape (cold user, warm item):', test_ucold_iwarm.shape)
print('Test set shape (cold user, cold item):', test_ucold_icold.shape)

Train set shape: (16917894, 3)
Test set shape (warm user, warm item): (2036778, 4)
Test set shape (warm user, cold item): (35456, 4)
Test set shape (cold user, warm item): (506800, 4)
Test set shape (cold user, cold item): (16504, 4)


## SVD using Surprise library

In [4]:
# Here we use only raw data since Surprise handles the encoding internally

# For train: convert to Surprise format
reader = Reader(rating_scale=(1, 5))
train_data = Dataset.load_from_df(train[['UserID', 'WineID', 'Rating']], reader)
train_surprise = train_data.build_full_trainset()

# For test: convert to Surprise format
test_uwarm_iwarm_surprise = list(test_uwarm_iwarm[['UserID', 'WineID', 'Rating']].itertuples(index=False, name=None))
test_uwarm_icold_surprise = list(test_uwarm_icold[['UserID', 'WineID', 'Rating']].itertuples(index=False, name=None))
test_ucold_iwarm_surprise = list(test_ucold_iwarm[['UserID', 'WineID', 'Rating']].itertuples(index=False, name=None))
test_ucold_icold_surprise = list(test_ucold_icold[['UserID', 'WineID', 'Rating']].itertuples(index=False, name=None))

In [5]:
# Train SVD
model = SVD()
model.fit(train_surprise)

# Predict ratings for test sets
pred_uwarm_iwarm = model.test(test_uwarm_iwarm_surprise)
pred_uwarm_icold = model.test(test_uwarm_icold_surprise)
pred_ucold_iwarm = model.test(test_ucold_iwarm_surprise)
pred_ucold_icold = model.test(test_ucold_icold_surprise)

# Predicted ratings
pred_uwarm_iwarm = [pred.est for pred in pred_uwarm_iwarm]
pred_uwarm_icold = [pred.est for pred in pred_uwarm_icold]
pred_ucold_iwarm = [pred.est for pred in pred_ucold_iwarm]
pred_ucold_icold = [pred.est for pred in pred_ucold_icold]

# Save predictions with RatingID
test_uwarm_iwarm['Prediction'] = pred_uwarm_iwarm
test_uwarm_icold['Prediction'] = pred_uwarm_icold
test_ucold_iwarm['Prediction'] = pred_ucold_iwarm
test_ucold_icold['Prediction'] = pred_ucold_icold

# Write predictions to CSV
test_uwarm_iwarm.to_csv(f'{base_path}\\svd\\svd_warm_user_warm_item.csv', index=False, columns=['RatingID', 'Prediction'], header=['RatingID', 'Rating'])
test_uwarm_icold.to_csv(f'{base_path}\\svd\\svd_warm_user_cold_item.csv', index=False, columns=['RatingID', 'Prediction'], header=['RatingID', 'Rating'])
test_ucold_iwarm.to_csv(f'{base_path}\\svd\\svd_cold_user_warm_item.csv', index=False, columns=['RatingID', 'Prediction'], header=['RatingID', 'Rating'])
test_ucold_icold.to_csv(f'{base_path}\\svd\\svd_cold_user_cold_item.csv', index=False, columns=['RatingID', 'Prediction'], header=['RatingID', 'Rating'])

# Evaluate MSE
mse_uwarm_iwarm = mean_squared_error(test_uwarm_iwarm['Rating'], pred_uwarm_iwarm)
mse_uwarm_icold = mean_squared_error(test_uwarm_icold['Rating'], pred_uwarm_icold)
mse_ucold_iwarm = mean_squared_error(test_ucold_iwarm['Rating'], pred_ucold_iwarm)
mse_ucold_icold = mean_squared_error(test_ucold_icold['Rating'], pred_ucold_icold)

# Evaluate RMSE
rmse_uwarm_iwarm = root_mean_squared_error(test_uwarm_iwarm['Rating'], pred_uwarm_iwarm)
rmse_uwarm_icold = root_mean_squared_error(test_uwarm_icold['Rating'], pred_uwarm_icold)
rmse_ucold_iwarm = root_mean_squared_error(test_ucold_iwarm['Rating'], pred_ucold_iwarm)
rmse_ucold_icold = root_mean_squared_error(test_ucold_icold['Rating'], pred_ucold_icold)


# Evaluate MAE
mae_uwarm_iwarm = mean_absolute_error(test_uwarm_iwarm['Rating'], pred_uwarm_iwarm)
mae_uwarm_icold = mean_absolute_error(test_uwarm_icold['Rating'], pred_uwarm_icold)
mae_ucold_iwarm = mean_absolute_error(test_ucold_iwarm['Rating'], pred_ucold_iwarm)
mae_ucold_icold = mean_absolute_error(test_ucold_icold['Rating'], pred_ucold_icold)


# Print results
print('SVD Results:')
print('MSE (warm user, warm item):', mse_uwarm_iwarm)
print('RMSE (warm user, warm item):', rmse_uwarm_iwarm)
print('MAE (warm user, warm item):', mae_uwarm_iwarm)
print('-' * 50)
print('MSE (warm user, cold item):', mse_uwarm_icold)
print('RMSE (warm user, cold item):', rmse_uwarm_icold)
print('MAE (warm user, cold item):', mae_uwarm_icold)
print('-' * 50)
print('MSE (cold user, warm item):', mse_ucold_iwarm)
print('RMSE (cold user, warm item):', rmse_ucold_iwarm)
print('MAE (cold user, warm item):', mae_ucold_iwarm)
print('-' * 50)
print('MSE (cold user, cold item):', mse_ucold_icold)
print('RMSE (cold user, cold item):', rmse_ucold_icold)
print('MAE (cold user, cold item):', mae_ucold_icold)


SVD Results:
MSE (warm user, warm item): 0.3198472927282139
RMSE (warm user, warm item): 0.5655504334082098
MAE (warm user, warm item): 0.412036745658953
--------------------------------------------------
MSE (warm user, cold item): 0.41533766974870867
RMSE (warm user, cold item): 0.644466965599253
MAE (warm user, cold item): 0.47336789039746074
--------------------------------------------------
MSE (cold user, warm item): 0.44334311057386555
RMSE (cold user, warm item): 0.6658401539212437
MAE (cold user, warm item): 0.49323154327054913
--------------------------------------------------
MSE (cold user, cold item): 0.5983717312186697
RMSE (cold user, cold item): 0.7735449122182045
MAE (cold user, cold item): 0.574148821209833
