In [None]:
from surprise import Dataset
from surprise.model_selection import train_test_split

data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=0.25)

In [3]:
full_trainset = data.build_full_trainset()
print("First 10 raw ratings:")
print(full_trainset.build_testset()[:10])

First 10 raw ratings:
[('196', '242', 3.0), ('196', '393', 4.0), ('196', '381', 4.0), ('196', '251', 3.0), ('196', '655', 5.0), ('196', '67', 5.0), ('196', '306', 4.0), ('196', '238', 4.0), ('196', '663', 5.0), ('196', '111', 4.0)]


## a) Simple Collaborative Filtering Algorithms

In [4]:
from surprise import KNNBasic
from surprise import accuracy

algo_user = KNNBasic(sim_options={'user_based': True})
algo_user.fit(trainset)
predictions_user = algo_user.test(testset)
print('User-User CF RMSE:', accuracy.rmse(predictions_user))

algo_item = KNNBasic(sim_options={'user_based': False})
algo_item.fit(trainset)
predictions_item = algo_item.test(testset)
print('Item-Item CF RMSE:', accuracy.rmse(predictions_item))


Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9794
User-User CF RMSE: 0.979360363804385
Computing the msd similarity matrix...
Done computing similarity matrix.
RMSE: 0.9758
Item-Item CF RMSE: 0.9758432915103421


### b) Matrix Factorization Algorithms (SVD, SVD++)

In [None]:
from surprise import SVD, SVDpp

algo_svd = SVD()
algo_svd.fit(trainset)
predictions_svd = algo_svd.test(testset)
print('SVD RMSE:', accuracy.rmse(predictions_svd))

algo_svdpp = SVDpp()
algo_svdpp.fit(trainset)
predictions_svdpp = algo_svdpp.test(testset)
print('SVD++ RMSE:', accuracy.rmse(predictions_svdpp))

RMSE: 0.9395
SVD RMSE: 0.9395352072563953
RMSE: 0.9219
SVD++ RMSE: 0.9219096621233217


## c) How to Compare Different Algorithms

Same users/items usually do not appear in both train and test sets in regular KFold split.

Some reasonable alternaitves would include a Leave-One-Out Cross-Validation method (for each user, hide one rating for testing) or Cold Start Evaluation (specifically test on users/items with few ratings).

## d) Experienced vs Less Active Users

In [None]:
from collections import defaultdict
from surprise import accuracy

user_ratings = defaultdict(list)
for uid, iid, true_r in trainset.all_ratings():
    user_ratings[trainset.to_raw_uid(uid)].append(true_r)

experienced_users = [u for u, ratings in user_ratings.items() if len(ratings) >= 50]
less_active_users = [u for u, ratings in user_ratings.items() if len(ratings) < 50]

In [14]:
experienced_predictions = []
less_active_predictions = []

for pred in predictions_user:
    uid = pred.uid
    if uid in experienced_users:
        experienced_predictions.append(pred)
    elif uid in less_active_users:
        less_active_predictions.append(pred)


print("Overall RMSE:", accuracy.rmse(predictions_user, verbose=False))
print("Experienced Users RMSE:", accuracy.rmse(experienced_predictions, verbose=False))
print("Less Active Users RMSE:", accuracy.rmse(less_active_predictions, verbose=False))

Overall RMSE: 0.979360363804385
Experienced Users RMSE: 0.9664100806625855
Less Active Users RMSE: 1.0384448645328845


In [16]:
experienced_predictions = []
less_active_predictions = []

for pred in predictions_svdpp:
    uid = pred.uid
    if uid in experienced_users:
        experienced_predictions.append(pred)
    elif uid in less_active_users:
        less_active_predictions.append(pred)


print("Overall RMSE:", accuracy.rmse(predictions_svdpp, verbose=False))
print("Experienced Users RMSE:", accuracy.rmse(experienced_predictions, verbose=False))
print("Less Active Users RMSE:", accuracy.rmse(less_active_predictions, verbose=False))

Overall RMSE: 0.9219096621233217
Experienced Users RMSE: 0.9073056125178115
Less Active Users RMSE: 0.9880837842308781


## e) Coverage Evaluation

In [None]:
# How many different items were recommended?
def coverage(predictions, threshold=4.0):
    all_items = set(iid for _, iid, _, _, _ in predictions)
    recommended_items = set(iid for _, iid, _, est, _ in predictions if est >= threshold)
    return len(recommended_items) / len(all_items)

In [20]:
coverage(predictions_user)

0.2248968363136176

In [21]:
coverage(predictions_svdpp)

0.4518569463548831