In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
import matplotlib.pyplot as plt
import tqdm

In [2]:
random_state = 42
n_bootstraped_samples = 1000

In [3]:
def map_rating_class(rating: float):
    return round(rating / 0.5) * 0.5

In [4]:
def align_dfs(true_df, pred_df):
    if true_df.shape != pred_df.shape:
        raise ValueError("The input dataframe have different dimensions")
    df = pd.merge(true_df, pred_df, on="RatingID")
    print(df.head())
    return df

In [5]:
def bootstrap_rmse(y_true, y_pred, n_bootstrap=n_bootstraped_samples, ci=95, random_state=random_state):
    rng = np.random.default_rng(random_state)
    rmse_score = []

    n = len(y_true)
    for _ in tqdm.tqdm(range(n_bootstrap)):
        indices = rng.choice(n, size=n, replace=True)
        sample_true = y_true[indices]
        sample_pred = y_pred[indices]
        
        rmse = root_mean_squared_error(sample_true, sample_pred)
        rmse_score.append(rmse)

    # Convert to numpy for easy math
    rmse_score = np.array(rmse_score)

    # Confidence interval
    lower = np.percentile(rmse_score, (100 - ci) / 2)
    upper = np.percentile(rmse_score, 100 - (100 - ci) / 2)

    return {
        'rmse_distribution': rmse_score,
        'mean': rmse_score.mean(),
        'ci': (lower, upper)
    }

In [6]:
# read all true y datasets
true_cold_user_cold_item = pd.read_csv(r"../testset_cold_user_cold_item.csv")
true_cold_user_warm_item = pd.read_csv(r"../testset_cold_user_warm_item.csv")
true_warm_user_cold_item = pd.read_csv(r"../testset_warm_user_cold_item.csv")
true_warm_user_warm_item = pd.read_csv(r"../testset_warm_user_warm_item.csv")

# only keep ReviewID, Rating
true_cold_user_cold_item = true_cold_user_cold_item[["RatingID", "Rating"]]
true_cold_user_warm_item = true_cold_user_warm_item[["RatingID", "Rating"]]
true_warm_user_cold_item = true_warm_user_cold_item[["RatingID", "Rating"]]
true_warm_user_warm_item = true_warm_user_warm_item[["RatingID", "Rating"]]


In [7]:
# read all predicted y datasets
vincent_cold_user_cold_item = pd.read_csv(r"../Vincent_evaluation_results/predictions_CU_CI.csv")
vincent_cold_user_warm_item = pd.read_csv(r"../Vincent_evaluation_results/predictions_CU_WI.csv")
vincent_warm_user_cold_item = pd.read_csv(r"../Vincent_evaluation_results/predictions_WU_CI.csv")
vincent_warm_user_warm_item = pd.read_csv(r"../Vincent_evaluation_results/predictions_WU_WI.csv")

# round ratings to closest .5 value
vincent_cold_user_cold_item["PredictedRating"] = vincent_cold_user_cold_item["PredictedRating"].map(lambda x: round(x / 0.5) * 0.5)
vincent_cold_user_warm_item["PredictedRating"] = vincent_cold_user_warm_item["PredictedRating"].map(lambda x: round(x / 0.5) * 0.5)
vincent_warm_user_cold_item["PredictedRating"] = vincent_warm_user_cold_item["PredictedRating"].map(lambda x: round(x / 0.5) * 0.5)
vincent_warm_user_warm_item["PredictedRating"] = vincent_warm_user_warm_item["PredictedRating"].map(lambda x: round(x / 0.5) * 0.5)

### Vincents Model-based Recommender System

#### Hot user, Hot Wine

In [8]:
aligned = align_dfs(true_warm_user_warm_item, vincent_warm_user_warm_item)
y_true = aligned['Rating'].values
y_pred = aligned['PredictedRating'].values
if len(y_true) != len(y_pred):
    raise ValueError("Different amount of rating")
results = bootstrap_rmse(y_true, y_pred)
print(results["mean"])
print(results["ci"])

   RatingID  Rating  PredictedRating
0  17115002     4.5              4.0
1  17115008     3.0              4.0
2   8821172     4.0              4.0
3  15626850     4.0              3.5
4  15627422     4.0              4.5


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:34<00:00, 10.57it/s]

0.8076758159543219
(np.float64(0.8052589283937425), np.float64(0.8100114552991685))





#### Hot user, Cold Wine

In [9]:
aligned = align_dfs(true_warm_user_cold_item, vincent_warm_user_cold_item)
y_true = aligned['Rating'].values
y_pred = aligned['PredictedRating'].values
if len(y_true) != len(y_pred):
    raise ValueError("Different amount of rating")
results = bootstrap_rmse(y_true, y_pred)
print(results["mean"])
print(results["ci"])

   RatingID  Rating  PredictedRating
0  18311916     3.5              4.0
1  18303342     4.0              4.0
2  20547045     4.0              4.0
3  11900766     3.5              4.0
4  18273405     3.5              4.0


100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1000.21it/s]

0.6982039367735612
(np.float64(0.6908314064682013), np.float64(0.7060356421755899))





#### Cold user, Hot Wine

In [10]:
aligned = align_dfs(true_cold_user_warm_item, vincent_cold_user_warm_item)
y_true = aligned['Rating'].values
y_pred = aligned['PredictedRating'].values
if len(y_true) != len(y_pred):
    raise ValueError("Different amount of rating")
results = bootstrap_rmse(y_true, y_pred)
print(results["mean"])
print(results["ci"])

   RatingID  Rating  PredictedRating
0  17618447     4.0              3.5
1  19286240     5.0              3.5
2  18939364     4.5              3.5
3  14312250     4.5              3.5
4  18967247     5.0              3.5


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:24<00:00, 40.95it/s]


1.0827310279498885
(np.float64(1.0767296820934482), np.float64(1.088779015407571))


#### Cold user, Cold Wine

In [11]:
aligned = align_dfs(true_cold_user_cold_item, vincent_cold_user_cold_item)
y_true = aligned['Rating'].values
y_pred = aligned['PredictedRating'].values
if len(y_true) != len(y_pred):
    raise ValueError("Different amount of rating")
results = bootstrap_rmse(y_true, y_pred)
print(results["mean"])
print(results["ci"])

   RatingID  Rating  PredictedRating
0  18274504     5.0              4.0
1  18317678     5.0              4.0
2  18317912     3.5              4.0
3  15708051     3.5              4.0
4   8822957     4.0              4.0


100%|████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:00<00:00, 1675.96it/s]

0.8644685161465561
(np.float64(0.8528238571153763), np.float64(0.8764994324254499))





### Denis Model-based Recommender Systems

#### LightGBM

#### SVD

#### XGBoost

# Compare bootstraped sampled difference for best performing models

In [12]:
for _ in range(1000):
    idx = rng.choice(len(y_true), size=len(y_true), replace=True)
    rmse_a = root_mean_squared_error(y_true[idx], y_pred_a[idx])
    rmse_b = root_mean_squared_error(y_true[idx], y_pred_b[idx])
    diff = mae_b - mae_a  # Positive means A is better
    diff_scores.append(diff)


NameError: name 'rng' is not defined