In [1]:
import os
import sys
sys.path.append('../')
import pickle
import pandas as pd
import numpy as np

# Evaluation of CF Recommender Systems

In [2]:
# TEST SET
df_test = pd.read_pickle("files/testset.pkl")[["reviewerID", "asin", "overall"]]\
            .rename(columns={"reviewerID": "uid", "asin": "iid"})

# PREDICTIONS
nb = pd.read_pickle("files/preds_knn.pkl")
lf = pd.read_pickle("files/preds_svd.pkl")
pred_nb_list = list(nb.itertuples(index=False))
pred_lf_list = list(lf.itertuples(index=False))

# Detect users from training set that are not in test
nb_users = set([pred.uid for pred in pred_nb_list])
lf_users = set([pred.uid for pred in pred_lf_list])
nb_users_in_pred_but_not_in_test = list(nb_users.difference(set(df_test['uid'])))
lf_users_in_pred_but_not_in_test = list(lf_users.difference(set(df_test['uid'])))
assert nb_users_in_pred_but_not_in_test == lf_users_in_pred_but_not_in_test
print(f"There are {len(lf_users_in_pred_but_not_in_test)} users in the training set that are not in the test set.")

# Remove these users' predictions for evaluation
print("Lengths before removing preds not in test set:", len(nb), len(lf))
nb = nb[~nb.uid.isin(nb_users_in_pred_but_not_in_test)]
lf = lf[~lf.uid.isin(nb_users_in_pred_but_not_in_test)]
print("After removing:", len(nb), len(lf))

nb_merge = nb.merge(df_test, how="inner", on=["uid", "iid"])
print("\nkNN RMSE:", np.sqrt(np.mean((nb_merge["overall"] - nb_merge["score"])**2)))

lf_merge = lf.merge(df_test, how="inner", on=["uid", "iid"])
print("SVD RMSE:", np.sqrt(np.mean((lf_merge["overall"] - lf_merge["score"])**2)))

There are 113 users in the training set that are not in the test set.
Lengths before removing preds not in test set: 1449029 1449029
After removing: 1359246 1359246

kNN RMSE: 1.1497250696788386
SVD RMSE: 0.9972278662314285


In [3]:
# often used functions (see metrics.py; rather concise implementations)
from metrics import PatK, MAPatK, MRRatK, HRatK

ks = [5, 15]

print(12*" " + "NB | LF")

for k in ks:
    P_nb, P_lf = PatK(nb, df_test, k), PatK(lf, df_test, k)
    MAP_nb, MAP_lf = MAPatK(nb, df_test, k), MAPatK(lf, df_test, k)
    MRR_nb, MRR_lf = MRRatK(nb, df_test, k), MRRatK(lf, df_test, k)
    MHR_nb, MHR_lf = HRatK(nb, df_test, k), HRatK(lf, df_test, k)
    print(f"  P@{k:2g} = {P_nb  :.4f}|{P_lf  :.4f}")
    print(f"MAP@{k:2g} = {MAP_nb:.4f}|{MAP_lf:.4f}")
    print(f"MRR@{k:2g} = {MRR_nb:.4f}|{MRR_lf:.4f}")
    print(f" HR@{k:2g} = {MHR_nb:.4f}|{MHR_lf:.4f}\n") 


            NB | LF
  P@ 5 = 0.0023|0.0153
MAP@ 5 = 0.0070|0.0172
MRR@ 5 = 0.0070|0.0172
 HR@ 5 = 0.0117|0.0766

  P@15 = 0.0016|0.0092
MAP@15 = 0.0085|0.0264
MRR@15 = 0.0085|0.0264
 HR@15 = 0.0245|0.1373



## Error analysis

In [4]:
from metrics import relevant_column, first

# identify users where model performs badly
train = pd.read_pickle("files/train.pkl")[["reviewerID","asin","overall"]]
merged = relevant_column(nb, df_test, 30)
scores = merged[["uid", "iid", "relevant"]].groupby(by="uid").apply(first)

In [5]:
# calculate statistics for these users 
ids = scores[(scores != 0)].index
train.groupby("reviewerID").size()[ids].mean(), train.groupby("reviewerID").size().mean()

(4.262376237623762, 5.576206140350878)

In [6]:
good_asins = train[train["reviewerID"].isin(ids)].asin.unique()
train.groupby("asin").size()[good_asins].mean(), train.groupby("asin").size().mean()

(18.58450704225352, 12.71375)

In [7]:
# identify users ('first' part of project description error analysis)
dft = pd.read_pickle("files/testset.pkl")[["reviewerID", "asin", "overall", "unixReviewTime"]] \
        .sort_values("unixReviewTime") \
        .reset_index(drop=True)

uid_first, uid_last = dft.iloc[[0,-1]]["reviewerID"]
uid_first, uid_last


('A2G0O4Y8QE10AE', 'A2SACTIFMC5DXO')

In [8]:
df_train = pd.read_pickle("files/train.pkl")
df = df_train[["overall", "reviewerID", "asin"]]

df_with_nans = df.pivot_table(values="overall", index="reviewerID", columns="asin")
df = df_with_nans.fillna(0)

corrs = df.T.corr("pearson")

In [9]:
first_nbs = corrs.loc[uid_first].nlargest(11)[1:]
last_nbs = corrs.loc[uid_last].nlargest(11)[1:]
first_nbs

reviewerID
A3FY1GXS48WR8B    0.564608
A37MH7ICH80QOX    0.498747
A11JU33HMT5XPU    0.440051
ACQYIC13JXAOI     0.426855
ADY836HK6QSYR     0.414072
A2E1EFNIZL2FVA    0.406457
A1JMB7RDVEMN71    0.398215
A3L8XRYMLZZES6    0.362919
A1GNYV0RA0EQSS    0.362127
A2D66KSHQQHOSD    0.356498
Name: A2G0O4Y8QE10AE, dtype: float64

In [10]:
reviewed_asins_first = df_train[df_train["reviewerID"]==uid_first]["asin"]
reviewed_asins_last = df_train[df_train["reviewerID"]==uid_last]["asin"]
reviewed_asins_first, reviewed_asins_last

(2492    B0017I8NQM
 2407    B0014KJ6EQ
 Name: asin, dtype: object,
 7861     B00EDSI8HW
 12661    B00EFRMECQ
 7506     B00CTTEKJW
 8747     B00FZ0E0HE
 8732     B00FZ0FETC
 10851    B01019T6O0
 11569    B015IHWAZW
 Name: asin, dtype: object)

In [13]:
#rerun with last_nbs.index to get rate history of the other user's neighbours
nb_reviews = []
for rid in last_nbs.index: 
    nb_reviews.append(df_train[df_train["reviewerID"]==rid][["overall", "reviewerID", "asin", "reviewTime"]])
rating_history = pd.concat(nb_reviews).reset_index(drop=True)
rating_history

Unnamed: 0,overall,reviewerID,asin,reviewTime
0,5.0,A2KIIQX2TH70MR,B00FZ0FK0U,"10 30, 2016"
1,5.0,A2KIIQX2TH70MR,B0095CATEG,"10 30, 2016"
2,5.0,A2KIIQX2TH70MR,B00FZ0E0HE,"10 30, 2016"
3,1.0,A2KIIQX2TH70MR,B0144NYGJY,"10 30, 2016"
4,5.0,A2KIIQX2TH70MR,B00FZ0FETC,"10 30, 2016"
5,5.0,A2CHQH95XQUY4E,B00FZ0E0HE,"03 8, 2014"
6,5.0,A2CHQH95XQUY4E,B00FZ0FK0U,"12 16, 2014"
7,5.0,A2CHQH95XQUY4E,B00FZ0FETC,"12 16, 2014"
8,5.0,A2CHQH95XQUY4E,B00PG8FWS6,"04 4, 2015"
9,4.0,A2CHQH95XQUY4E,B01637ROB6,"03 12, 2016"


In [12]:
df_train[df_train["asin"].isin(reviewed_asins_first)].groupby("asin")["overall"].count()

asin
B0014KJ6EQ     4
B0017I8NQM    17
Name: overall, dtype: int64