In [5]:
import os
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.backends.cudnn as cudnn
import torch.nn.functional as F

from data_utils import * 
from evaluate_cbf import *
from models import *

import random
import numpy as np

from sklearn.preprocessing import OneHotEncoder

In [6]:
# Load data

train_dict = np.load('../data/training_dict.npy', allow_pickle=True).item()
valid_dict = np.load('../data/validation_dict.npy', allow_pickle=True).item()
test_dict = np.load('../data/testing_dict.npy', allow_pickle=True).item()
category_features = np.load(os.path.join('../data/category_feature.npy'), allow_pickle=True).item()
visual_features = np.load(os.path.join('../data/visual_feature.npy'), allow_pickle=True).item()

# Get the number of users and items

user_num = max(max(train_dict), max(valid_dict, default=-1), max(test_dict, default=-1)) + 1

item_num = max(
    max((max(items, default=-1) for items in train_dict.values()), default=-1),
    max((max(items, default=-1) for items in valid_dict.values()), default=-1),
    max((max(items, default=-1) for items in test_dict.values()), default=-1)
) + 1

print('Number of users: %d, Number of items: %d' % (user_num, item_num))

# Prepare training, validation, and test data

train_data = [[int(user), int(item)] for user, items in train_dict.items() for item in items]
valid_gt = [[int(user), int(item)] for user, items in valid_dict.items() for item in items]
test_gt = [[int(user), int(item)] for user, items in test_dict.items() for item in items]
print('Training samples: %d, Validation samples: %d, Test samples: %d' % (len(train_data), len(valid_gt), len(test_gt)))

# Load item features

category_feature_size = len(category_features)
unique_categories = set(category_features.values())
category_encoder = OneHotEncoder()
category_encoder.fit(np.array(list(unique_categories)).reshape(-1, 1))
category_features_onehot = category_encoder.transform(np.array(list(category_features.values())).reshape(-1, 1)).toarray()
print('Category features shape: %s' % str(category_features_onehot.shape))

visual_feature_size = len(visual_features)
example_key = next(iter(visual_features.keys()))
print('Visual features shape: %s' % str(visual_features[example_key].shape))

# Create user profiles

def create_user_profiles(interaction_dict, category_features, visual_features):

    user_profiles = {
        user_id: {
            'category_sum': np.zeros(category_features_onehot.shape[1]), 
            'visual_sum': np.zeros(visual_features[0].shape),
            'count': 0
        }
        for user_id in interaction_dict
    }

    for user_id, items in interaction_dict.items():
        for item_id in items:
            user_profiles[user_id]['category_sum'] += category_features[item_id]
            user_profiles[user_id]['visual_sum'] += visual_features[item_id]
            user_profiles[user_id]['count'] += 1

    # Averaging the features for each user profile
    for profile in user_profiles.values():
        if profile['count'] > 0:
            profile['category_sum'] /= profile['count']
            profile['visual_sum'] /= profile['count']

    return user_profiles

train_user_profiles = create_user_profiles(train_dict, category_features_onehot, visual_features)
valid_user_profiles = create_user_profiles(valid_dict, category_features_onehot, visual_features)
test_user_profiles = create_user_profiles(test_dict, category_features_onehot, visual_features)

Number of users: 506, Number of items: 1674
Training samples: 12358, Validation samples: 406, Test samples: 406
Category features shape: (1674, 368)
Visual features shape: (512,)


In [7]:
class ContentBasedModel(nn.Module):
    def __init__(self, num_categories, num_visual_features, hidden_dim):
        super(ContentBasedModel, self).__init__()
        # User category pathway
        self.model_name = 'ContentBasedModel'
        self.user_category_fc = nn.Linear(num_categories, hidden_dim)
        
        # Item category pathway
        self.item_category_fc = nn.Linear(num_categories, hidden_dim)
        
        # User visual pathway
        self.user_visual_fc = nn.Linear(num_visual_features, hidden_dim)
        
        # Item visual pathway
        self.item_visual_fc = nn.Linear(num_visual_features, hidden_dim)
        
        # Combined features for prediction
        self.combined_fc = nn.Linear(hidden_dim * 4, hidden_dim)  # *4 because we concatenate user+item category+visual features
        self.output_layer = nn.Linear(hidden_dim, 1)

    def forward(self, user_category, user_visual, item_category, item_visual):
        # Process features through respective pathways
        user_category_out = F.relu(self.user_category_fc(user_category))
        item_category_out = F.relu(self.item_category_fc(item_category))
        user_visual_out = F.relu(self.user_visual_fc(user_visual))
        item_visual_out = F.relu(self.item_visual_fc(item_visual))
        
        # Combine all pathways
        combined_features = torch.cat((user_category_out, item_category_out, user_visual_out, item_visual_out), dim=1)
        
        # Further processing for final prediction
        combined_out = F.relu(self.combined_fc(combined_features))
        output = torch.sigmoid(self.output_layer(combined_out))
        return output

In [8]:
# Initialise parameters

seed = 4242
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.set_num_threads(torch.get_num_threads())

data_path = "../data/"
model = "MF"
emb_size = 100
lr = 0.001
dropout = 0.0
batch_size = 100
epochs = 10
device = "cpu"
top_k = [10, 20, 50, 100]
log_name = "log"
model_path = "./models/"
num_categories = 368
num_visual_features = 512
embedding_dim = 32
hidden_dim = 32
diversity_param = 0.5

In [9]:
# Prepare the training data
train_dataset = CBFData(
    user_item_pairs=train_data, 
    num_items=item_num, 
    category_features=category_features_onehot,
    visual_features=visual_features,
    user_profiles=train_user_profiles, 
    train_dict=train_dict, 
    is_training=True
)

train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)


In [20]:
def calculate_diversity(recommended_categories):
    K = len(recommended_categories)
    diversity_score = 0
    
    for i in range(K):
        for j in range(i+1, K):
            if recommended_categories[i] != recommended_categories[j]:
                diversity_score += 1

    # Normalize by the number of possible pairs
    diversity_score /= (K * (K - 1) / 2.0)
    return diversity_score

# Function to calculate the F1 score for diversity and relevance
def f1_score(ndcg_score, ild_score):
    if not np.isnan(ndcg_score) and not np.isnan(ild_score):  
        f1 = 2 * (ndcg_score * ild_score) / (ndcg_score + ild_score)
    else:
        f1 = 0  
    return f1

def rerank_for_diversity(initial_recommendations, item_categories, diversity_param):
    reranked_items = []
    category_penalty = {}  # Dictionary to keep track of category penalties

    for item_id, score in initial_recommendations:
        category = item_categories[item_id]
        penalty = category_penalty.get(category, 0) # Get penalty for this category
        diversity_score = score * (1 - diversity_param) - penalty * diversity_param 
        reranked_items.append((item_id, diversity_score))
        category_penalty[category] = category_penalty.get(category, 0) + 1 # Update category penalty to 1

    # Sort items by diversity_score
    reranked_items.sort(key=lambda x: x[1], reverse=True)
    reranked_item_ids = [item_id for item_id, _ in reranked_items]

    return reranked_item_ids

def evaluate_cbf(model, top_k, train_dict, gt_dict, user_profiles, category_features, category_features_one_hot, visual_features, device, diversity_param, is_training):
    recommends = {k: [] for k in top_k}
    diversity_scores = {k: [] for k in top_k}
    
    with torch.no_grad():
        for user_id, true_items in gt_dict.items():
            if not true_items:  
                continue
            '''
            if model.model_name == 'CBF':
                user_category = torch.tensor(user_profiles[user_id]['category_sum'], dtype=torch.float32).unsqueeze(0)
                user_visual = torch.tensor(user_profiles[user_id]['visual_sum'], dtype=torch.float32).unsqueeze(0)
                interacted_items = train_dict[user_id] if user_id in train_dict else []
                item_ids = [i for i in range(len(category_features_one_hot)) if i not in interacted_items]
                item_category_tensor = torch.stack([torch.tensor(category_features_one_hot[i], dtype=torch.float32) for i in item_ids])
                item_visual_tensor = torch.stack([torch.tensor(visual_features[i], dtype=torch.float32) for i in item_ids])
                scores = model(user_category.repeat(len(item_ids), 1), user_visual.repeat(len(item_ids), 1), item_category_tensor, item_visual_tensor).squeeze()
            '''
            for user_id, true_items in gt_dict.items(): # Iterate over all users
                scores = np.zeros(len(category_features)) # Initialize scores for all items

                user_category = torch.tensor(user_profiles[user_id]['category_sum'], dtype=torch.float32).unsqueeze(0).to(device) # Get user profile
                user_visual = torch.tensor(user_profiles[user_id]['visual_sum'], dtype=torch.float32).unsqueeze(0).to(device) # Get user profile
                
                item_ids = [i for i in range(len(category_features)) if i not in train_dict.get(user_id, [])]
                for item_id in item_ids:

                    item_category = torch.tensor(category_features_one_hot[item_id], dtype=torch.float32).unsqueeze(0).to(device)
                    item_visual = torch.tensor(visual_features[item_id], dtype=torch.float32).unsqueeze(0).to(device)

                    # Score the user-item pair
                    score = model(user_category, user_visual, item_category, item_visual).squeeze().cpu().numpy()
                    scores[item_id] = score
                
                # Exclude scores for training items by setting them to -inf
                scores[list(train_dict.get(user_id, []))] = -np.inf
                # scores = torch.tensor(scores, dtype=torch.float32).to(device)
            
                

            # Generate recommendation pool for reranking
            # recommendation_pool_size = max(top_k) * 5
            # top_scores, top_indices = torch.topk(scores, recommendation_pool_size)
            # recommendation_pool = [(item_ids[i], scores[i].item()) for i in top_indices.cpu().numpy()]

            for k in top_k:
                # top_k_items = np.argsort(scores)[-k:]
                initial_recommendations = np.argsort(scores)[-k:]
                print('Initial Recommendations', initial_recommendations)
                final_recommendations = []

                if not is_training and diversity_param > 0:
                    rerank_input = [(item_id, score) for item_id, score in initial_recommendations]
                    final_recommendations = rerank_for_diversity(rerank_input, {i: category_features[i] for i, _ in rerank_input}, diversity_param)
                else:
                    final_recommendations = [item_id for item_id, _ in initial_recommendations]
                
                print('Final Recommendations', final_recommendations)

                # Compute ild_score based on final recommendations
                final_categories = [category_features[item_id] for item_id in final_recommendations]
                ild_score = calculate_diversity(final_categories)

                # Store the final recommendations and their diversity score
                recommends[k].append(final_recommendations)
                diversity_scores[k].append(ild_score)

    print('Recommendations', recommends)
    print('Diversity Scores', diversity_scores)

    return recommends, diversity_scores

def metrics_cbf(model, top_k, train_dict, gt_dict, user_profiles, category_features, category_features_one_hot, visual_features, device, diversity_param=0.5, is_training=True):
    
    recommends, diversity_scores = evaluate_cbf(model, top_k, train_dict, gt_dict, user_profiles, category_features, category_features_one_hot, visual_features, device, diversity_param, is_training)

    results = {} 
    
    for k in top_k:
        sumForRecall, sumForNDCG, user_length = 0, 0, 0 # Initialize variables for average calculation
        user_ids_list = list(gt_dict.keys())  # Ensure we have a consistent order
        
        for idx, user_id in enumerate(user_ids_list):
            true_items = gt_dict[user_id]
            if len(true_items) == 0:
                continue  # Skip users with no ground truth data
            
            if idx >= len(recommends[k]):  # Guard against index out of bounds
                continue
            
            recommended_items = recommends[k][idx]  # Access by index
            userhit, dcg, idcg = 0, 0, 0
            idcgCount = len(true_items)
            
            for index, item in enumerate(recommended_items[:k]):
                if item in true_items:
                    userhit += 1
                    dcg += 1.0 / np.log2(index + 2)
                if idcgCount > 0:
                    idcg += 1.0 / np.log2(index + 2)
                    idcgCount -= 1
            
            ndcg = dcg / idcg if idcg != 0 else 0
            recall = userhit / len(true_items) if len(true_items) > 0 else 0
            
            sumForRecall += recall
            sumForNDCG += ndcg
            user_length += 1
        
        avg_recall = sumForRecall / user_length if user_length > 0 else 0
        avg_ndcg = sumForNDCG / user_length if user_length > 0 else 0
        avg_ild = np.mean([diversity_scores[k]]) if k in diversity_scores and diversity_scores[k] else 0
        avg_f1 = np.mean([f1_score(avg_ndcg, avg_ild)])

        results[k] = {'Recall': avg_recall, 'NDCG': avg_ndcg, 'ILD': avg_ild, 'F1': avg_f1}
        print(f"Top-{k}: Avg Recall: {avg_recall:.4f}, Avg NDCG: {avg_ndcg:.4f}, Avg ILD: {avg_ild:.4f}, Avg F1 Score: {avg_f1:.4f}")
    
    return recommends, results


In [10]:
# Evaluation function

def evaluate_CBF(model, user_profiles, category_features, visual_features, gt_dict, train_dict, device, top_k=(10, 20, 50, 100)):
    model.eval()
    recalls = []
    ndcgs = []

    # No need to prepare these tensors in advance since we'll process each item individually
    with torch.no_grad():
        for K in top_k:
            recall_sum = 0.0
            ndcg_sum = 0.0
            num_users = len(gt_dict)

            for user_id, true_items in gt_dict.items():
                scores = np.zeros(len(category_features))

                user_category = torch.tensor(user_profiles[user_id]['category_sum'], dtype=torch.float32).unsqueeze(0).to(device)
                user_visual = torch.tensor(user_profiles[user_id]['visual_sum'], dtype=torch.float32).unsqueeze(0).to(device)
                
                for item_id in range(len(category_features)):
                    # Skip items the user has seen in training
                    if item_id in train_dict.get(user_id, []):
                        continue

                    item_category = torch.tensor(category_features[item_id], dtype=torch.float32).unsqueeze(0).to(device)
                    item_visual = torch.tensor(visual_features[item_id], dtype=torch.float32).unsqueeze(0).to(device)

                    # Score the user-item pair
                    score = model(user_category, user_visual, item_category, item_visual).squeeze().cpu().numpy()
                    scores[item_id] = score
                
                # Exclude scores for training items by setting them to -inf
                scores[list(train_dict.get(user_id, []))] = -np.inf

                # Get top-K items based on scores
                top_k_items = np.argsort(scores)[-K:]
                
                # Calculate metrics
                num_hits = len(set(top_k_items) & set(true_items))
                recall = num_hits / float(len(true_items))
                recall_sum += recall

                ndcg = calculate_ndcg(top_k_items, true_items, K)
                ndcg_sum += ndcg
            
            recalls.append(recall_sum / num_users)
            ndcgs.append(ndcg_sum / num_users)

    return recalls, ndcgs

def calculate_ndcg(predicted_items, true_items, K):
    dcg = 0.0
    idcg = 0.0
    for i, pred in enumerate(predicted_items[-K:]):
        if pred in true_items:
            dcg += 1.0 / np.log2(i + 2)
    for i in range(min(len(true_items), K)):
        idcg += 1.0 / np.log2(i + 2)
    return dcg / idcg if idcg > 0 else 0.0

In [21]:
# Instantiate and train the model

model = ContentBasedModel(num_categories, num_visual_features, hidden_dim)
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.BCELoss()

best_f1_score = 0
best_recall = 0

for epoch in range(epochs):
    model.train()
    start_time = time.time()
    total_loss = 0

    for batch in train_loader:
        user_category, user_visual, item_category, item_visual, labels = batch
        user_category, user_visual = user_category.to(device), user_visual.to(device)
        item_category, item_visual = item_category.to(device), item_visual.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        predictions = model(user_category, user_visual, item_category, item_visual).squeeze()
        loss = criterion(predictions, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}, Time elapsed: {time.time() - start_time:.2f}s")

    # Evaluation    
    recommends, results = metrics_cbf(model, top_k, train_dict, valid_dict, train_user_profiles, category_features, category_features_onehot, visual_features, device, diversity_param, is_training=True)

    first_k = top_k[0]

    # Update best F1 score and save model if necessary based on the first k value
    current_best_f1 = results[first_k]['F1']
    current_best_recall = results[first_k]['Recall']
    if current_best_recall > best_recall:
        best_recall = current_best_recall
        best_f1 = current_best_f1
        # Save the model checkpoint
        torch.save(model.state_dict(), f'./models/best_model_{model.model_name}.pth')
        print(f"New best model saved with Recall: {best_recall}, F1: {best_f1}, model path: ./models/best_model_{model.model_name}.pth")
    print('---'*18)

print("Training completed.")
print("Best Recall: ", best_recall)
print("Best F1 score: ", best_f1_score)

Epoch 1, Loss: 0.6876490488648415, Time elapsed: 28.66s


IndexError: list index out of range

In [None]:
# Testing the model

model = ContentBasedModel(num_categories, num_visual_features, hidden_dim)
model.load_state_dict(torch.load("./best_model.pth"))
model.to(device)

model.eval()
recalls, ndcgs = evaluate_CBF(model, test_user_profiles, category_features_onehot, visual_features, test_dict, train_dict, device, top_k=[10, 20, 50, 100])
print(f"[Test] Recall: {recalls}, NDCG: {ndcgs}")