In [1]:
import time
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from data_utils import *
from evaluate_cbf import *
from models import *
from train import *

%load_ext autoreload
%autoreload 2

In [2]:
# Initialize DataFrame for results

try:
    df_results = pd.read_csv('model_results.csv')
except FileNotFoundError:
    df_results = pd.DataFrame(columns=['Model', 'K', 'Recall', 'NDCG', 'ILD', 'F1'])

def update_results(model, k, recall, ndcg, ild, f1):
    global df_results
    df_results = df_results.append({
        'Model': model,
        'K': k,
        'Recall': recall,
        'NDCG': ndcg,
        'ILD': ild,
        'F1': f1
    }, ignore_index=True)
    df_results.to_csv('model_results.csv', index=False)


In [6]:
# Initialise global parameters

seed = 4242
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.set_num_threads(torch.get_num_threads())

emb_size = 100
lr = 0.001
dropout = 0.0
batch_size = 128
epochs = 500
device = "cpu"
top_k = [10,20,30,40,50,60,70,80,90,100]
log_name = "log"
num_categories = 368
num_visual_features = 512
embedding_dim = 64
hidden_dim = 64
diversity_param = 0


In [7]:
# Load data
user_num, item_num, train_dict, valid_dict, test_dict, train_data, valid_gt, test_gt, category_features, category_features_onehot, visual_features, train_user_profiles, valid_user_profiles, test_user_profiles = load_data()

Number of users: 506, Number of items: 1674
Training samples: 12358, Validation samples: 406, Test samples: 406
Category features shape: (1674, 368)
Visual features shape: (512,)


In [9]:
# Train CBF

train_cbf('ContentBasedModel', user_num, item_num, train_data, valid_dict, train_dict, category_features, category_features_onehot, visual_features, train_user_profiles, num_categories, num_visual_features, 
          hidden_dim=hidden_dim,
          top_k=top_k,
          epochs=epochs,
          batch_size=batch_size, 
          lr=lr, 
          device=device, 
          diversity_param=diversity_param)

Epoch 1, Loss: 0.695481602064113, Time elapsed: 28.55s
Top-10: Avg Recall: 0.0271, Avg NDCG: 0.0124, Avg ILD: 0.9808, Avg F1 Score: 0.0245
Top-20: Avg Recall: 0.0271, Avg NDCG: 0.0124, Avg ILD: 0.9954, Avg F1 Score: 0.0245
Top-30: Avg Recall: 0.0345, Avg NDCG: 0.0140, Avg ILD: 0.9868, Avg F1 Score: 0.0275
Top-40: Avg Recall: 0.0517, Avg NDCG: 0.0173, Avg ILD: 0.9832, Avg F1 Score: 0.0340
Top-50: Avg Recall: 0.0591, Avg NDCG: 0.0186, Avg ILD: 0.9835, Avg F1 Score: 0.0366
Top-60: Avg Recall: 0.0690, Avg NDCG: 0.0203, Avg ILD: 0.9788, Avg F1 Score: 0.0399
Top-70: Avg Recall: 0.0739, Avg NDCG: 0.0212, Avg ILD: 0.9790, Avg F1 Score: 0.0414
Top-80: Avg Recall: 0.0764, Avg NDCG: 0.0216, Avg ILD: 0.9801, Avg F1 Score: 0.0422
Top-90: Avg Recall: 0.0862, Avg NDCG: 0.0231, Avg ILD: 0.9833, Avg F1 Score: 0.0451
Top-100: Avg Recall: 0.0936, Avg NDCG: 0.0242, Avg ILD: 0.9841, Avg F1 Score: 0.0473
New best model saved with Recall: 0.027093596059113302, F1: 0.024470447078427788, model path: ./models/b

KeyboardInterrupt: 

In [None]:
# Test CBF model
cbf_model = ContentBasedModel(num_categories, num_visual_features, hidden_dim)
cbf_model.load_state_dict(torch.load('./models/best_model_CBF.pth'))
cbf_model.to(device)
cbf_model.eval()
recommends, results = metrics_cbf(cbf_model, top_k, train_dict, test_dict, test_user_profiles, category_features, category_features_onehot, visual_features, device, diversity_param, is_training=False)

for k in top_k:
    update_results('CBF', k, results[k]['Recall'], results[k]['NDCG'], results[k]['ILD'], results[k]['F1'])

In [None]:
# Test the CBF model with different diversity parameters
top_k = [10, 20, 50, 100]
diversity_params = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
f1_scores = {k: [] for k in top_k}

for diversity_param in diversity_params:
    model = ContentBasedModel(num_categories, num_visual_features, hidden_dim)
    model.load_state_dict(torch.load("./models/best_model-cbf-initial.pth"))
    model.to(device)
    _, results = metrics_cbf(model, top_k, train_dict, test_dict, test_user_profiles, category_features, category_features_onehot, visual_features, device, diversity_param, is_training=False)
    for k in top_k:
        f1_scores[k].append(results[k]['F1'])

plt.figure(figsize=(10, 6))

for k in top_k:
    plt.plot(diversity_params, f1_scores[k], label=f'Top-{k}')

plt.title('Diversity Parameter vs F1 Score for Different Top-K')
plt.xlabel('Diversity Parameter')
plt.ylabel('F1 Score')
plt.legend()
plt.grid(True)
plt.show()

In [10]:
def evaluate(model, top_k, train_dict, gt_dict, valid_dict, item_num, flag, device):
    recommends = []
    for i in range(len(top_k)):
        recommends.append([])

    with torch.no_grad():
        pred_list_all = []
        for i in gt_dict.keys():  # for each user
            if len(gt_dict[i]) != 0:  # if
                user = torch.full((item_num,), i, dtype=torch.int64).to(device)  # create n_item users for prediction
                item = torch.arange(0, item_num, dtype=torch.int64).to(device)
                prediction = model(user, item)
                prediction = prediction.detach().cpu().numpy().tolist()
                for j in train_dict[i]:  # mask train
                    prediction[j] -= float('inf')
                if flag == 1:  # mask validation
                    if i in valid_dict:
                        for j in valid_dict[i]:
                            prediction[j] -= float('inf')
                pred_list_all.append(prediction)

        predictions = torch.Tensor(pred_list_all).to(device)  # shape: (n_user,n_item)
        for idx in range(len(top_k)):
            _, indices = torch.topk(predictions, int(top_k[idx]))
            recommends[idx].extend(indices.tolist())
    return recommends

def metrics(model, top_k, train_dict, gt_dict, valid_dict, item_num, flag, item_categories, device):
    results = {}
    recommends = evaluate(model, top_k, train_dict, gt_dict, valid_dict, item_num, flag, device)

    for idx, k in enumerate(top_k):
        sumForRecall, sumForNDCG, sumForIld, user_length, total_diversity = 0, 0, 0, 0, 0
        k_recalls, k_ndcgs, k_ilds = [], [], []
        
        for user, recs in enumerate(recommends[idx]):
            if user not in gt_dict or not gt_dict[user]:
                continue

            gt_items = set(gt_dict[user])
            hit = sum(item in gt_items for item in recs)
            recall = hit / len(gt_items)
            k_recalls.append(recall)

            dcg = sum(1.0 / np.log2(i + 2) for i, item in enumerate(recs) if item in gt_items)
            idcg = sum(1.0 / np.log2(i + 2) for i in range(min(len(gt_items), k)))
            ndcg = dcg / idcg if idcg > 0 else 0
            k_ndcgs.append(ndcg)

            recommended_categories = [item_categories[item] for item in recs]
            ild = calculate_diversity(recommended_categories)
            sumForIld += ild
            k_ilds.append(ild)

            sumForRecall += recall
            sumForNDCG += ndcg
            user_length += 1

        avg_recall = sum(k_recalls) / len(k_recalls) if k_recalls else 0
        avg_ndcg = sum(k_ndcgs) / len(k_ndcgs) if k_ndcgs else 0
        avg_ild = sumForIld / user_length if user_length else 0
        avg_f1 = f1_score(avg_recall, avg_ild)

        results[k] = {'Recall': avg_recall, 'NDCG': avg_ndcg, 'ILD': avg_ild, 'F1': avg_f1}
        print(f"Top-{k}: Avg Recall: {avg_recall:.4f}, Avg NDCG: {avg_ndcg:.4f}, Avg ILD: {avg_ild:.4f}, Avg F1 Score: {avg_f1:.4f}")

    return recommends, results


In [11]:
# Load data
train_dataset = MFData(train_data, item_num, train_dict, True)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

model = MF(user_num, item_num, emb_size, dropout)

model.to(device)
loss_function = nn.BCEWithLogitsLoss() # pointwise loss
optimizer = optim.Adam(model.parameters(), lr=lr)

best_recall = 0
best_f1_score = 0
total_loss = 0

for epoch in range(epochs):
    # train
    model.train() # Enable dropout (if have).
    start_time = time.time()
    train_loader.dataset.ng_sample()

    # for each batch
    for idx, (user, item, label) in enumerate(train_loader):
        user, item, label = user.to(device), item.to(device), label.float().to(device)
        model.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}, Time elapsed: {time.time() - start_time:.2f}s")

    model.eval()
    recommends, results = metrics(model, top_k, train_dict, valid_dict, valid_dict, item_num, 0, category_features, device)

    first_k = top_k[0]

    # Update best F1 score and save model if necessary based on the first k value
    current_best_f1 = results[first_k]['F1']
    current_best_recall = results[first_k]['Recall']
    if current_best_recall > best_recall:
        best_recall = current_best_recall
        best_f1_score = current_best_f1
        # Save the model checkpoint
        torch.save(model.state_dict(), f'./models/best_model_{model.model_name}.pth')
        print(f"New best model saved with Recall: {best_recall}, F1: {best_f1_score}, model path: ./models/best_model_{model.model_name}.pth")
    print('---'*18)

print("Training completed.")
print("Best Recall: ", best_recall)
print("Best F1 score: ", best_f1_score)

Top-10: Avg Recall: 0.0372, Avg NDCG: 0.0208, Avg ILD: 0.9957, Avg F1 Score: 0.0716
Top-20: Avg Recall: 0.0464, Avg NDCG: 0.0230, Avg ILD: 1.0000, Avg F1 Score: 0.0888
Top-30: Avg Recall: 0.0588, Avg NDCG: 0.0257, Avg ILD: 1.0000, Avg F1 Score: 0.1111
Top-40: Avg Recall: 0.0681, Avg NDCG: 0.0275, Avg ILD: 1.0000, Avg F1 Score: 0.1275
Top-50: Avg Recall: 0.0774, Avg NDCG: 0.0292, Avg ILD: 1.0000, Avg F1 Score: 0.1437
Top-60: Avg Recall: 0.0805, Avg NDCG: 0.0297, Avg ILD: 1.0000, Avg F1 Score: 0.1490
Top-70: Avg Recall: 0.0836, Avg NDCG: 0.0303, Avg ILD: 1.0000, Avg F1 Score: 0.1543
Top-80: Avg Recall: 0.0898, Avg NDCG: 0.0313, Avg ILD: 1.0000, Avg F1 Score: 0.1648
Top-90: Avg Recall: 0.0991, Avg NDCG: 0.0327, Avg ILD: 1.0000, Avg F1 Score: 0.1803
Top-100: Avg Recall: 0.1084, Avg NDCG: 0.0341, Avg ILD: 1.0000, Avg F1 Score: 0.1955
New best model saved with Recall: 0.03715170278637771, F1: 0.07163060073474349, model path: ./models/best_model_MF.pth
----------------------------------------

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7ff4122e6c10>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1479, in __del__
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 116, in spawn_main
    self._shutdown_workers()
  File "/opt/anaconda3/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1437, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'
    exitcode = _main(fd, parent_sentinel)
  File "/opt/anaconda3/lib/python3.9/multiprocessing/spawn.py", line 126, in _main
    self = reduction.pickle.load(from_parent)
_pickle.UnpicklingError: pickle data was truncated


Top-10: Avg Recall: 0.0217, Avg NDCG: 0.0113, Avg ILD: 1.0000, Avg F1 Score: 0.0424
Top-20: Avg Recall: 0.0248, Avg NDCG: 0.0121, Avg ILD: 1.0000, Avg F1 Score: 0.0483
Top-30: Avg Recall: 0.0310, Avg NDCG: 0.0133, Avg ILD: 1.0000, Avg F1 Score: 0.0601
Top-40: Avg Recall: 0.0341, Avg NDCG: 0.0139, Avg ILD: 1.0000, Avg F1 Score: 0.0659
Top-50: Avg Recall: 0.0433, Avg NDCG: 0.0156, Avg ILD: 1.0000, Avg F1 Score: 0.0831
Top-60: Avg Recall: 0.0557, Avg NDCG: 0.0177, Avg ILD: 1.0000, Avg F1 Score: 0.1056
Top-70: Avg Recall: 0.0588, Avg NDCG: 0.0182, Avg ILD: 1.0000, Avg F1 Score: 0.1111
Top-80: Avg Recall: 0.0650, Avg NDCG: 0.0192, Avg ILD: 1.0000, Avg F1 Score: 0.1221
Top-90: Avg Recall: 0.0681, Avg NDCG: 0.0197, Avg ILD: 1.0000, Avg F1 Score: 0.1275
Top-100: Avg Recall: 0.0805, Avg NDCG: 0.0216, Avg ILD: 1.0000, Avg F1 Score: 0.1490
------------------------------------------------------
Top-10: Avg Recall: 0.0248, Avg NDCG: 0.0114, Avg ILD: 1.0000, Avg F1 Score: 0.0483
Top-20: Avg Recall: 

KeyboardInterrupt: 

In [None]:
# Test MF model

mf_model = MF(user_num, item_num, emb_size, dropout)
mf_model.load_state_dict(torch.load(f'./models/best_model_MF.pth'))
mf_model.to(device)
mf_model.eval()
recommends, results = metrics(model, (10,20,30,40,50,60,70,80,90,100), train_dict, test_dict, test_dict, item_num, 0, category_features, device)

for k in top_k:
    update_results(mf_model.model_name, k, results[k]['Recall'], results[k]['NDCG'], results[k]['ILD'], results[k]['F1'])
    

In [None]:
# Load data
train_dataset = MFData(train_data, item_num, train_dict, True)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

model = NCF(user_num, item_num, emb_size, dropout)

model.to(device)
loss_function = nn.BCEWithLogitsLoss() # pointwise loss
optimizer = optim.Adam(model.parameters(), lr=lr)

best_recall = 0
best_f1_score = 0

for epoch in range(epochs):
    # train
    model.train() # Enable dropout (if have).
    start_time = time.time()
    train_loader.dataset.ng_sample()

    # for each batch
    for idx, (user, item, label) in enumerate(train_loader):
        user, item, label = user.to(device), item.to(device), label.float().to(device)
        model.zero_grad()
        prediction = model(user, item)
        loss = loss_function(prediction, label)
        loss.backward()
        optimizer.step()

    model.eval()
    recommends, results = metrics(model, top_k, train_dict, valid_dict, valid_dict, item_num, 0, category_features, device)

    first_k = top_k[0]

    # Update best F1 score and save model if necessary based on the first k value
    current_best_f1 = results[first_k]['F1']
    current_best_recall = results[first_k]['Recall']
    if current_best_recall > best_recall:
        best_recall = current_best_recall
        best_f1_score = current_best_f1
        # Save the model checkpoint
        torch.save(model.state_dict(), f'./models/best_model_{model.model_name}.pth')
        print(f"New best model saved with Recall: {best_recall}, F1: {best_f1_score}, model path: ./models/best_model_{model.model_name}.pth")
    print('---'*18)

print("Training completed.")
print("Best Recall: ", best_recall)
print("Best F1 score: ", best_f1_score)

In [None]:
# Test NCF model

ncf_model = NCF(user_num, item_num, emb_size, dropout)
ncf_model.load_state_dict(torch.load(f'./models/best_model_NCF.pth'))
ncf_model.to(device)
ncf_model.eval()
recommends, results = metrics(ncf_model, top_k, train_dict, test_dict, test_dict, item_num, 0, category_features, device)

for k in top_k:
    update_results(ncf_model.model_name, k, results[k]['Recall'], results[k]['NDCG'], results[k]['ILD'], results[k]['F1'])

In [None]:
# Compare results of different models

# Load the data
df = pd.read_csv('model_results.csv')

# Splitting the DataFrame based on the model
data_cbf = df[df['Model'] == 'CBF']
data_mf = df[df['Model'] == 'MF']
data_ncf = df[df['Model'] == 'NCF']

# Define the metrics to plot
evaluation_metrics = ['Recall', 'NDCG', 'ILD', 'F1']

# Initialize a 2x2 subplot layout
fig, axs = plt.subplots(2, 2, figsize=(12, 10))

# Flattening the array of axes for easy iterating
axs = axs.flatten()

colors = {'CBF': 'magenta', 'MF': 'orange', 'NCF': 'blue'}

# Loop through each metric and plot
for i, metric in enumerate(evaluation_metrics):
    axs[i].plot(data_cbf['K'], data_cbf[metric], label='CBF', color=colors['CBF'], marker='o')
    axs[i].plot(data_mf['K'], data_mf[metric], label='MF', color=colors['MF'], marker='^')
    axs[i].plot(data_ncf['K'], data_ncf[metric], label='NCF', color=colors['NCF'], marker='s')
    
    axs[i].set_title(metric)
    axs[i].set_xlabel('Top K')
    axs[i].set_ylabel(metric)
    axs[i].legend()
    axs[i].grid(False)  # Enable grid

# Adjust layout to prevent overlap
plt.tight_layout()

# Display the plot
plt.show()
