In [4]:

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, root_mean_squared_error, mean_absolute_error

base_path = '..\..\..\data\main'


In [20]:

def evaluate_topk_fast(df, k=10):
    # Pre-sort so top-k is at the top per user
    df = df.sort_values(['UserID', 'Rank_pred'], ascending=[True, True])

    # Assign group index per row (unique integer per user)
    user_index, user_pos = np.unique(df['UserID'], return_inverse=True)

    # Count items per user
    user_counts = np.bincount(user_pos)
    user_offsets = np.zeros(len(df), dtype=int)
    np.add.at(user_offsets, np.cumsum(user_counts)[:-1], 1)
    user_offsets = np.cumsum(user_offsets)

    # Mask to keep only top-k per user
    df['row_number'] = df.groupby('UserID').cumcount()
    topk_df = df[df['row_number'] < k].copy()

    # Precision@k
    precision = topk_df['Relevance'].groupby(topk_df['UserID']).mean().mean()

    # Recall@k
    relevant_per_user = df.groupby('UserID')['Relevance'].sum()
    hits_per_user = topk_df.groupby('UserID')['Relevance'].sum()
    recall = (hits_per_user / relevant_per_user).fillna(0).mean()

    # HitRate@k
    hits = (hits_per_user > 0).astype(int)
    hit_rate = hits.mean()

    # MAP@k
    def map_at_k_per_user(x):
        rels = x['Relevance'].values
        precisions = [(rels[:i + 1].sum() / (i + 1)) for i in range(len(rels)) if rels[i]]
        return np.mean(precisions) if precisions else 0
    mapk = topk_df.groupby('UserID').apply(map_at_k_per_user, include_groups=False).mean()

    # nDCG@k
    def dcg(rels):
        return np.sum(rels / np.log2(np.arange(2, len(rels) + 2)))
    def ndcg_per_user(x):
        dcg_val = dcg(x['Relevance'].values)
        ideal = x.sort_values('Relevance', ascending=False).head(k)
        idcg_val = dcg(ideal['Relevance'].values)
        return dcg_val / idcg_val if idcg_val > 0 else 0
    ndcg = topk_df.groupby('UserID').apply(ndcg_per_user, include_groups=False).mean()

    return {
        f'Precision@{k}': precision,
        f'Recall@{k}': recall,
        f'HitRate@{k}': hit_rate,
        f'MAP@{k}': mapk,
        f'nDCG@{k}': ndcg
    }

# LightGBM

In [8]:
# Load results
results_uwarm_iwarm = pd.read_csv(f'{base_path}\\lightgbm\\lightgbm_warm_user_warm_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_uwarm_icold = pd.read_csv(f'{base_path}\\lightgbm\\lightgbm_warm_user_cold_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_ucold_iwarm = pd.read_csv(f'{base_path}\\lightgbm\\lightgbm_cold_user_warm_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_ucold_icold = pd.read_csv(f'{base_path}\\lightgbm\\lightgbm_cold_user_cold_item.csv', header= 0, names=['RatingID','Rating_pred'])

# Load the test set
test_uwarm_iwarm = pd.read_csv(f'{base_path}\\testset_warm_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_uwarm_icold = pd.read_csv(f'{base_path}\\testset_warm_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_iwarm = pd.read_csv(f'{base_path}\\testset_cold_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_icold = pd.read_csv(f'{base_path}\\testset_cold_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])

# Merge the results with the test set
results_uwarm_iwarm = results_uwarm_iwarm.merge(test_uwarm_iwarm, on='RatingID', how='left')
results_uwarm_icold = results_uwarm_icold.merge(test_uwarm_icold, on='RatingID', how='left')
results_ucold_iwarm = results_ucold_iwarm.merge(test_ucold_iwarm, on='RatingID', how='left')
results_ucold_icold = results_ucold_icold.merge(test_ucold_icold, on='RatingID', how='left')


In [9]:
evaluated_results_lgbm = {
    'warm_user_warm_item': {},
    'warm_user_cold_item': {},
    'cold_user_warm_item': {},
    'cold_user_cold_item': {}
}

In [10]:
# MSE
evaluated_results_lgbm['warm_user_warm_item']['MSE'] = mean_squared_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_lgbm['warm_user_cold_item']['MSE'] = mean_squared_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_lgbm['cold_user_warm_item']['MSE'] = mean_squared_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_lgbm['cold_user_cold_item']['MSE'] = mean_squared_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])
# RMSE
evaluated_results_lgbm['warm_user_warm_item']['RMSE'] = root_mean_squared_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_lgbm['warm_user_cold_item']['RMSE'] = root_mean_squared_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_lgbm['cold_user_warm_item']['RMSE'] = root_mean_squared_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_lgbm['cold_user_cold_item']['RMSE'] = root_mean_squared_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])
# MAE
evaluated_results_lgbm['warm_user_warm_item']['MAE'] = mean_absolute_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_lgbm['warm_user_cold_item']['MAE'] = mean_absolute_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_lgbm['cold_user_warm_item']['MAE'] = mean_absolute_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_lgbm['cold_user_cold_item']['MAE'] = mean_absolute_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])

In [11]:
# Create Rank, Rank_pred and Relevance columns

# Warm user, warm item
results_uwarm_iwarm["Rank"] = results_uwarm_iwarm.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_uwarm_iwarm["Rank_pred"] = results_uwarm_iwarm.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Warm user, cold item
results_uwarm_icold["Rank"] = results_uwarm_icold.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_uwarm_icold["Rank_pred"] = results_uwarm_icold.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Cold user, warm item
results_ucold_iwarm["Rank"] = results_ucold_iwarm.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_ucold_iwarm["Rank_pred"] = results_ucold_iwarm.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Cold user, cold item
results_ucold_icold["Rank"] = results_ucold_icold.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_ucold_icold["Rank_pred"] = results_ucold_icold.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)

# Calculate Relevance
results_uwarm_iwarm["Relevance"] = results_uwarm_iwarm["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_uwarm_icold["Relevance"] = results_uwarm_icold["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_ucold_iwarm["Relevance"] = results_ucold_iwarm["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_ucold_icold["Relevance"] = results_ucold_icold["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)


In [None]:
# Run evaluation
ks = [10, 20, 50, 100]

for k in ks:
    evaluated_results_lgbm['warm_user_warm_item']['topk'] = evaluate_topk_fast(results_uwarm_iwarm, k=k)
    evaluated_results_lgbm['warm_user_cold_item']['topk'] = evaluate_topk_fast(results_uwarm_icold, k=k)
    evaluated_results_lgbm['cold_user_warm_item']['topk'] = evaluate_topk_fast(results_ucold_iwarm, k=k)
    evaluated_results_lgbm['cold_user_cold_item']['topk'] = evaluate_topk_fast(results_ucold_icold, k=k)

    # Print evaluation results
    for case, metrics in evaluated_results_lgbm.items():
        print(f"Evaluation on {case}:")
        print('-' * 25 + 'MSE, RMSE, MAE' + '-' * 25) 
        print(f"MSE: {metrics['MSE']:.4f}")
        print(f"RMSE: {metrics['RMSE']:.4f}")
        print(f"MAE: {metrics['MAE']:.4f}")
        print('-' * 25 + 'Top-K Metrics' + '-' * 25)
        print(f"Precision@{k}: {metrics['topk']['Precision@k']:.4f}")
        print(f"Recall@{k}: {metrics['topk']['Recall@k']:.4f}")
        print(f"HitRate@{k}: {metrics['topk']['HitRate@k']:.4f}")
        print(f"MAP@{k}: {metrics['topk']['MAP@k']:.4f}")
        print(f"nDCG@{k}: {metrics['topk']['nDCG@k']:.4f}")
        print('-' * 50)
    

  mapk = topk_df.groupby('UserID').apply(map_at_k_per_user).mean()
  ndcg = topk_df.groupby('UserID').apply(ndcg_per_user).mean()
  mapk = topk_df.groupby('UserID').apply(map_at_k_per_user).mean()
  ndcg = topk_df.groupby('UserID').apply(ndcg_per_user).mean()
  mapk = topk_df.groupby('UserID').apply(map_at_k_per_user).mean()
  ndcg = topk_df.groupby('UserID').apply(ndcg_per_user).mean()
  mapk = topk_df.groupby('UserID').apply(map_at_k_per_user).mean()


Evaluation on warm_user_warm_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.3818
RMSE: 0.6179
MAE: 0.4561
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8815
Recall@10: 0.9235
HitRate@10: 0.9526
MAP@10: 0.9236
nDCG@10: 0.9346
--------------------------------------------------
Evaluation on warm_user_cold_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.4436
RMSE: 0.6660
MAE: 0.5046
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8389
Recall@10: 0.8605
HitRate@10: 0.8605
MAP@10: 0.8508
nDCG@10: 0.8537
--------------------------------------------------
Evaluation on cold_user_warm_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.4804
RMSE: 0.6931
MAE: 0.5139
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8182
Recall@10: 0.9226
HitRate@10: 0.9929
MAP@10: 0.9103
nDCG@10: 0.9452
----------------------------

  ndcg = topk_df.groupby('UserID').apply(ndcg_per_user).mean()


# XGBoost

In [13]:
# Load results
results_uwarm_iwarm = pd.read_csv(f'{base_path}\\xgboost\\xgboost_warm_user_warm_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_uwarm_icold = pd.read_csv(f'{base_path}\\xgboost\\xgboost_warm_user_cold_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_ucold_iwarm = pd.read_csv(f'{base_path}\\xgboost\\xgboost_cold_user_warm_item.csv', header= 0, names=['RatingID','Rating_pred'])
results_ucold_icold = pd.read_csv(f'{base_path}\\xgboost\\xgboost_cold_user_cold_item.csv', header= 0, names=['RatingID','Rating_pred'])

# Load the test set
test_uwarm_iwarm = pd.read_csv(f'{base_path}\\testset_warm_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_uwarm_icold = pd.read_csv(f'{base_path}\\testset_warm_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_iwarm = pd.read_csv(f'{base_path}\\testset_cold_user_warm_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])
test_ucold_icold = pd.read_csv(f'{base_path}\\testset_cold_user_cold_item.csv', usecols=['RatingID', 'UserID', 'WineID', 'Rating'])

# Merge the results with the test set
results_uwarm_iwarm = results_uwarm_iwarm.merge(test_uwarm_iwarm, on='RatingID', how='left')
results_uwarm_icold = results_uwarm_icold.merge(test_uwarm_icold, on='RatingID', how='left')
results_ucold_iwarm = results_ucold_iwarm.merge(test_ucold_iwarm, on='RatingID', how='left')
results_ucold_icold = results_ucold_icold.merge(test_ucold_icold, on='RatingID', how='left')


In [14]:
evaluated_results_xgb = {
    'warm_user_warm_item': {},
    'warm_user_cold_item': {},
    'cold_user_warm_item': {},
    'cold_user_cold_item': {}
}

In [16]:
# MSE
evaluated_results_xgb['warm_user_warm_item']['MSE'] = mean_squared_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_xgb['warm_user_cold_item']['MSE'] = mean_squared_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_xgb['cold_user_warm_item']['MSE'] = mean_squared_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_xgb['cold_user_cold_item']['MSE'] = mean_squared_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])
# RMSE
evaluated_results_xgb['warm_user_warm_item']['RMSE'] = root_mean_squared_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_xgb['warm_user_cold_item']['RMSE'] = root_mean_squared_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_xgb['cold_user_warm_item']['RMSE'] = root_mean_squared_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_xgb['cold_user_cold_item']['RMSE'] = root_mean_squared_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])
# MAE
evaluated_results_xgb['warm_user_warm_item']['MAE'] = mean_absolute_error(results_uwarm_iwarm['Rating'], results_uwarm_iwarm['Rating_pred'])
evaluated_results_xgb['warm_user_cold_item']['MAE'] = mean_absolute_error(results_uwarm_icold['Rating'], results_uwarm_icold['Rating_pred'])
evaluated_results_xgb['cold_user_warm_item']['MAE'] = mean_absolute_error(results_ucold_iwarm['Rating'], results_ucold_iwarm['Rating_pred'])
evaluated_results_xgb['cold_user_cold_item']['MAE'] = mean_absolute_error(results_ucold_icold['Rating'], results_ucold_icold['Rating_pred'])

In [17]:
# Create Rank, Rank_pred and Relevance columns

# Warm user, warm item
results_uwarm_iwarm["Rank"] = results_uwarm_iwarm.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_uwarm_iwarm["Rank_pred"] = results_uwarm_iwarm.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Warm user, cold item
results_uwarm_icold["Rank"] = results_uwarm_icold.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_uwarm_icold["Rank_pred"] = results_uwarm_icold.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Cold user, warm item
results_ucold_iwarm["Rank"] = results_ucold_iwarm.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_ucold_iwarm["Rank_pred"] = results_ucold_iwarm.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)
# Cold user, cold item
results_ucold_icold["Rank"] = results_ucold_icold.groupby("UserID")["Rating"].rank(method="first", ascending=False)
results_ucold_icold["Rank_pred"] = results_ucold_icold.groupby("UserID")["Rating_pred"].rank(method="first", ascending=False)

# Calculate Relevance
results_uwarm_iwarm["Relevance"] = results_uwarm_iwarm["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_uwarm_icold["Relevance"] = results_uwarm_icold["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_ucold_iwarm["Relevance"] = results_ucold_iwarm["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)
results_ucold_icold["Relevance"] = results_ucold_icold["Rating"].apply(lambda x: 1 if x >= 3.5 else 0)


In [22]:
# Run evaluation
ks = [10, 20, 50, 100]

for k in ks:
    evaluated_results_xgb['warm_user_warm_item']['topk'] = evaluate_topk_fast(results_uwarm_iwarm, k=k)
    evaluated_results_xgb['warm_user_cold_item']['topk'] = evaluate_topk_fast(results_uwarm_icold, k=k)
    evaluated_results_xgb['cold_user_warm_item']['topk'] = evaluate_topk_fast(results_ucold_iwarm, k=k)
    evaluated_results_xgb['cold_user_cold_item']['topk'] = evaluate_topk_fast(results_ucold_icold, k=k)

    # Print evaluation results
    for case, metrics in evaluated_results_xgb.items():
        print(f"Evaluation on {case}:")
        print('-' * 25 + 'MSE, RMSE, MAE' + '-' * 25) 
        print(f"MSE: {metrics['MSE']:.4f}")
        print(f"RMSE: {metrics['RMSE']:.4f}")
        print(f"MAE: {metrics['MAE']:.4f}")
        print('-' * 25 + 'Top-K Metrics' + '-' * 25)
        print(f"Precision@{k}: {metrics['topk'][f'Precision@{k}']:.4f}")
        print(f"Recall@{k}: {metrics['topk'][f'Recall@{k}']:.4f}")
        print(f"HitRate@{k}: {metrics['topk'][f'HitRate@{k}']:.4f}")
        print(f"MAP@{k}: {metrics['topk'][f'MAP@{k}']:.4f}")
        print(f"nDCG@{k}: {metrics['topk'][f'nDCG@{k}']:.4f}")
        print('-' * 50)

Evaluation on warm_user_warm_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.3757
RMSE: 0.6130
MAE: 0.4513
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8817
Recall@10: 0.9236
HitRate@10: 0.9526
MAP@10: 0.9246
nDCG@10: 0.9353
--------------------------------------------------
Evaluation on warm_user_cold_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.4668
RMSE: 0.6832
MAE: 0.4953
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8389
Recall@10: 0.8605
HitRate@10: 0.8605
MAP@10: 0.8504
nDCG@10: 0.8533
--------------------------------------------------
Evaluation on cold_user_warm_item:
-------------------------MSE, RMSE, MAE-------------------------
MSE: 0.4778
RMSE: 0.6912
MAE: 0.5134
-------------------------Top-K Metrics-------------------------
Precision@10: 0.8186
Recall@10: 0.9229
HitRate@10: 0.9929
MAP@10: 0.9122
nDCG@10: 0.9464
----------------------------