In [1]:
# 1. Install required packages
!pip install numpy torch tqdm wget



In [2]:
# 2. Clone the repository
!git clone https://github.com/jaywonchung/BERT4Rec-VAE-Pytorch.git
%cd BERT4Rec-VAE-Pytorch

fatal: destination path 'BERT4Rec-VAE-Pytorch' already exists and is not an empty directory.
/content/BERT4Rec-VAE-Pytorch


In [3]:
# # 3. Download MovieLens-1M dataset
# !wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
# !unzip ml-1m.zip -d datasets/


import os
os.makedirs('Data/ml-1m', exist_ok=True)
!wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ml-1m.zip -d Data/

# Verify the files are there
!ls Data/ml-1m/

--2025-06-25 09:40:42--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip.24’


2025-06-25 09:40:42 (11.0 MB/s) - ‘ml-1m.zip.24’ saved [5917549/5917549]

Archive:  ml-1m.zip
replace Data/ml-1m/movies.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Data/ml-1m/movies.dat   
replace Data/ml-1m/ratings.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Data/ml-1m/ratings.dat  
replace Data/ml-1m/README? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Data/ml-1m/README       
replace Data/ml-1m/users.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
  inflating: Data/ml-1m/users.dat    
movies.dat  ratings.dat  README  users.dat


In [4]:
# 4. Verify GPU availability
import torch
print("GPU Available:", torch.cuda.is_available())
print("GPU Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

GPU Available: True
GPU Device Name: Tesla T4


In [5]:
# 5. Create necessary directories
import os
os.makedirs('models/bert_genre', exist_ok=True)
os.makedirs('trainers', exist_ok=True)
os.makedirs('dataloaders', exist_ok=True)
os.makedirs('datasets', exist_ok=True)

In [6]:
# Update BERT genre model
%%writefile models/bert_genre.py
import torch
import torch.nn as nn
from .bert import BERT

class BERTGenreModel(nn.Module):
    @classmethod
    def code(cls):
        return 'bert_genre'

    def __init__(self, args):
        super().__init__()
        self.args = args
        self.bert = BERT(args)
        self.num_items = args.num_items
        self.genre_embedding_size = args.genre_embedding_size
        self.num_genres = args.num_genres

        # Genre embeddings
        self.genre_embeddings = nn.Embedding(self.num_genres, self.genre_embedding_size)

        # Output layer for genre-item predictions
        self.output_layer = nn.Linear(args.bert_hidden_units + self.genre_embedding_size, self.num_items)

        # Initialize weights
        self.init_weights()

    def init_weights(self):
        """Initialize weights for BERT and output layer"""
        self.bert.init_weights()
        nn.init.xavier_normal_(self.output_layer.weight)
        nn.init.zeros_(self.output_layer.bias)

    def forward(self, x, genre_matrix):
        batch_size = x.size(0)

        # BERT output: [batch_size, seq_len, hidden_dim]
        bert_output = self.bert(x)

        # Get final token embedding (CLS token)
        bert_output = bert_output[:, -1, :]  # [batch_size, hidden_dim]

        # Repeat genre embeddings: [batch_size, num_genres, genre_emb_dim]
        genre_embeddings = self.genre_embeddings.weight  # [num_genres, genre_emb_dim]
        genre_embeddings_expanded = genre_embeddings.unsqueeze(0).expand(batch_size, -1, -1)

        # Expand BERT output to match genre dimension: [batch_size, num_genres, hidden_dim]
        bert_output_expanded = bert_output.unsqueeze(1).expand(-1, self.num_genres, -1)

        # Concatenate: [batch_size, num_genres, hidden + genre_emb_dim]
        combined = torch.cat([bert_output_expanded, genre_embeddings_expanded], dim=-1)

        # Final output layer: [batch_size, num_genres, num_items]
        logits = self.output_layer(combined)

        return logits


    def get_topk_per_genre(self, x, genre_matrix, k=5):
        """Get top-k items for each genre"""
        logits = self.forward(x, genre_matrix)
        topk_per_genre = torch.topk(logits, k, dim=2).indices  # Shape: [batch_size, num_genres, k]
        return topk_per_genre

Overwriting models/bert_genre.py


In [7]:
# 7. Write BERT genre trainer
%%writefile trainers/bert_genre_trainer.py
from .base import AbstractTrainer as BaseTrainer
import torch
import torch.nn as nn

class BERTGenreTrainer(BaseTrainer):
    @classmethod
    def code(cls):
        return 'bert_genre'

    def __init__(self, args, model, train_dataloader, val_dataloader, test_dataloader, export_root):
        super().__init__(args, model, train_dataloader, val_dataloader, test_dataloader, export_root)
        self.criterion = nn.CrossEntropyLoss()

    def add_extra_loggers(self):
        pass

    def log_extra_train_info(self, log_data):
        pass

    def log_extra_val_info(self, log_data):
        pass

    def calculate_loss(self, batch):
        """
        batch = (seqs, labels, genre_matrix)
        logits: [batch_size, num_genres, num_items]
        labels: [batch_size, num_genres]
        """
        seqs, labels, genre_matrix = batch
        logits = self.model(seqs, genre_matrix)  # shape: [B, G, I]

        batch_size, num_genres, num_items = logits.shape
        logits = logits.view(-1, num_items)           # [B * G, I]
        labels = labels.view(-1)                      # [B * G]

        loss = self.criterion(logits, labels)         # CrossEntropyLoss across all genre-item targets
        return loss

    def calculate_metrics(self, batch):
        """
        Calculate Recall@5 per genre, averaged over all users
        """
        seqs, labels, genre_matrix = batch
        logits = self.model(seqs, genre_matrix)  # [B, G, I]

        # Top-5 item indices per genre
        top5 = torch.topk(logits, 5, dim=2).indices  # Expecting: [B, G, 5]

        # Add these debug prints
        print("top5 shape:", top5.shape)        # Should be [B, G, 5]
        print("labels shape:", labels.shape)    # Should be [B]

        # Ensure proper shape alignment: [B, G, 1]
        labels_expanded = labels.unsqueeze(1).expand(-1, top5.size(1)).unsqueeze(2)
        print("labels_expanded shape:", labels_expanded.shape)  # Should be [B, G, 1]

        # Check if label is in top-5 per genre
        hits = (top5 == labels_expanded).any(dim=2).float()  # [B, G]

        recall_5 = hits.mean().item()  # scalar average over all users and genres

        return {
            "Recall@5": recall_5
        }


Overwriting trainers/bert_genre_trainer.py


In [8]:
# Update BERT genre dataloader
%%writefile dataloaders/bert_genre.py
from .base import AbstractDataloader
import numpy as np
import torch

class BERTGenreDataloader(AbstractDataloader):
    @classmethod
    def code(cls):
        return 'bert_genre'

    def __init__(self, args, dataset):
        super().__init__(args, dataset)
        self.dataset = dataset
        self.max_len = args.bert_max_len
        self.mask_prob = args.bert_mask_prob
        self.CLOZE_MASK_TOKEN = self.dataset.get_num_items() + 1

    def _get_dataloader(self, data, shuffle):
        # Pre-allocate numpy arrays
        n_samples = len(data)
        sequences = np.zeros((n_samples, self.max_len), dtype=np.int32)
        labels = np.zeros(n_samples, dtype=np.int32)
        genre_matrices = []

        idx = 0
        for user_id, items in data.items():
            # Add items to sequence
            seq = np.zeros([self.max_len], dtype=np.int32)
            seq_idx = self.max_len - 1

            for i in reversed(items[:-1]):
                seq[seq_idx] = i
                seq_idx -= 1
                if seq_idx == -1: break

            sequences[idx] = seq

            # Get positive item and its genre matrix
            pos_id = items[-1]
            genre_matrix = self.dataset.get_genre_matrix([pos_id])
            genre_matrices.append(genre_matrix.numpy())

            labels[idx] = pos_id
            idx += 1

        # Convert to tensors
        sequences = torch.LongTensor(sequences)
        labels = torch.LongTensor(labels)
        genre_matrices = torch.FloatTensor(np.stack(genre_matrices))

        dataset = torch.utils.data.TensorDataset(sequences, labels, genre_matrices)

        return torch.utils.data.DataLoader(
            dataset,
            batch_size=self.args.train_batch_size,
            shuffle=shuffle,
            num_workers=self.args.workers
        )

    def get_pytorch_dataloaders(self):
        train = self._get_dataloader(self.dataset.train, shuffle=True)
        val = self._get_dataloader(self.dataset.val, shuffle=False)
        test = self._get_dataloader(self.dataset.test, shuffle=False)
        return train, val, test

Overwriting dataloaders/bert_genre.py


In [9]:
# Update templates.py
%%writefile templates.py
# Update templates.py
import argparse
import torch
import os
import random

def train_bert_genre():
    """Train the BERT genre model with genre-specific metrics"""
    parser = argparse.ArgumentParser(description='RecPlay')
    args = parser.parse_args([])

    # Experiment settings
    args.experiment_dir = os.path.join(os.getcwd(), 'experiments')
    args.experiment_description = 'genre_recommendation'

    # Model and dataset settings
    args.model_code = 'bert_genre'
    args.dataset_code = 'ml-1m'
    args.min_rating = 0 if args.dataset_code == 'ml-1m' else 4
    args.min_uc = 5
    args.min_sc = 0
    args.split = 'leave_one_out'

    # Model initialization
    args.model_init_seed = 0

    # Dataloader settings
    args.dataloader_code = 'bert_genre'
    args.train_negative_sampler_code = 'random'
    args.train_negative_sample_size = 0
    args.train_batch_size = 64
    args.val_batch_size = 64
    args.test_batch_size = 64
    args.train_negative_sampling_seed = 0
    args.test_negative_sampling_seed = 98765
    args.dataloader_random_seed = 98765
    args.workers = 0

    # Trainer settings
    args.trainer_code = 'bert_genre'
    args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
    args.num_gpu = 1 if torch.cuda.is_available() else 0
    args.device_idx = 0
    args.optimizer = 'Adam'
    args.lr = 0.001
    args.num_epochs = 100   ###############################################################################  NUMBER OF EPOCHS ##################################
    args.metric_ks = [1, 5, 10]
    args.best_metric = 'NDCG@10'
    args.l2_reg = 0.0
    args.weight_decay = 0.0
    args.momentum = 0.9

    # Learning rate schedule
    args.enable_lr_schedule = True
    args.decay_step = 20
    args.gamma = 0.1

    # Logging settings
    args.log_period_as_iter = 100  # Log every 100 iterations
    args.log_period_as_epoch = 1   # Log every epoch

    # BERT genre-specific parameters
    args.bert_max_len = 100
    args.bert_num_blocks = 2
    args.bert_num_heads = 4
    args.bert_hidden_units = 256
    args.bert_dropout = 0.1
    args.bert_mask_prob = 0.15
    args.genre_embedding_size = 64
    args.num_items = 3706
    args.num_genres = 18

    return args

def set_template(args):
    if args.template == 'train_bert_genre':
        args = train_bert_genre()
    return args

Overwriting templates.py


In [10]:
# Update options.py
%%writefile options.py
import sys
import argparse
import torch
from templates import set_template

# Check if running in Colab
is_colab = 'ipykernel' in sys.argv[0]

# Create argument parser
parser = argparse.ArgumentParser(description='RecPlay')

# Add all arguments
parser.add_argument('--dataset_code', type=str, default='ml-1m', choices=['ml-1m', 'ml-20m'])
parser.add_argument('--min_rating', type=int, default=0, help='Minimum rating to include')
parser.add_argument('--min_uc', type=int, default=5, help='Filter threshold for users')
parser.add_argument('--min_sc', type=int, default=0, help='Filter threshold for items')
parser.add_argument('--split', type=str, default='leave_one_out', help='How to split the datasets')

parser.add_argument('--dataset_split_seed', type=int, default=98765)
parser.add_argument('--eval_set_size', type=int, default=500,
                    help='Size of val and test set when running evaluation')

# Dataloader arguments
parser.add_argument('--dataloader_random_seed', type=int, default=98765)
parser.add_argument('--train_batch_size', type=int, default=64)
parser.add_argument('--val_batch_size', type=int, default=64)
parser.add_argument('--test_batch_size', type=int, default=64)
parser.add_argument('--workers', type=int, default=0)  # Added workers argument

# Negative sampler arguments
parser.add_argument('--train_negative_sampler_code', type=str, default='random',
                    choices=['popular', 'random'], help='Negative sampling technique for training')
parser.add_argument('--train_negative_sample_size', type=int, default=0)
parser.add_argument('--train_negative_sampling_seed', type=int, default=0)
parser.add_argument('--test_negative_sampler_code', type=str, default='random',
                    choices=['popular', 'random'], help='Negative sampling technique for evaluation')
parser.add_argument('--test_negative_sample_size', type=int, default=100)
parser.add_argument('--test_negative_sampling_seed', type=int, default=98765)

# Model arguments
parser.add_argument('--model_code', type=str, default='bert_genre', choices=['bert', 'dae', 'vae', 'bert_genre'])
parser.add_argument('--model_init_seed', type=int, default=0)

# BERT arguments
parser.add_argument('--bert_max_len', type=int, default=100)
parser.add_argument('--bert_num_blocks', type=int, default=2)
parser.add_argument('--bert_num_heads', type=int, default=4)
parser.add_argument('--bert_hidden_units', type=int, default=256)
parser.add_argument('--bert_dropout', type=float, default=0.1)
parser.add_argument('--bert_mask_prob', type=float, default=0.15)

# Device configuration
parser.add_argument('--device', type=str, default='cuda' if torch.cuda.is_available() else 'cpu')
parser.add_argument('--device_idx', type=str, default='0')
parser.add_argument('--num_gpu', type=int, default=1 if torch.cuda.is_available() else 0)

# Experiment arguments
parser.add_argument('--experiment_dir', type=str, default='experiments')
parser.add_argument('--experiment_description', type=str, default='genre_recommendation')

# Add template argument
parser.add_argument('--template', type=str, default='train_bert_genre')

def is_colab_or_ipython():
    return any(word in sys.argv[0] for word in ['ipykernel', 'colab', 'kernel'])

if is_colab_or_ipython():
    # In Colab: Initialize args with default values
    args = parser.parse_args([])
else:
    args = parser.parse_args()
    set_template(args)

# Set device configuration
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
args.device_idx = '0'
args.num_gpu = 1 if torch.cuda.is_available() else 0

# Apply template settings
args = set_template(args)

Overwriting options.py


In [11]:
# 10. Update main.py with genre-specific training function
%%writefile main.py
# Update main.py
import torch
from options import args
from models import model_factory
from dataloaders import dataloader_factory
from trainers import trainer_factory
import os

def train_bert_genre():
    """Train the BERT genre model with genre-specific metrics"""
    export_root = setup_train(args)
    train_loader, val_loader, test_loader = dataloader_factory(args)
    model = model_factory(args)
    trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root)
    trainer.train()

def setup_train(args):
    """Set up training environment"""
    export_root = os.path.join(args.experiment_dir, args.experiment_description)
    os.makedirs(export_root, exist_ok=True)
    return export_root

if __name__ == '__main__':
    train_bert_genre()

Overwriting main.py


In [12]:
# 10. Update dataloaders/__init__.py
%%writefile dataloaders/__init__.py
from .base import AbstractDataloader
# from .bert import BERTDataloader
from .bert_genre import BERTGenreDataloader
from datasets import dataset_factory

DATALOADERS = {
    # 'bert': BERTDataloader,
    'bert_genre': BERTGenreDataloader
}

def dataloader_factory(args):
    dataset = dataset_factory(args)
    dataloader = DATALOADERS[args.dataloader_code]
    dataloader = dataloader(args, dataset)
    train, val, test = dataloader.get_pytorch_dataloaders()
    return train, val, test

Overwriting dataloaders/__init__.py


In [13]:
# 1. Update ML1MDataset
%%writefile datasets/ml1m_genre.py
from .base import AbstractDataset
import pandas as pd
import os
from datetime import datetime
import numpy as np
import torch

class ML1MDataset(AbstractDataset):
    @classmethod
    def code(cls):
        return 'ml-1m'

    @classmethod
    def url(cls):
        return 'http://files.grouplens.org/datasets/movielens/ml-1m.zip'

    @classmethod
    def zip_file_content_is_folder(cls):
        return True

    @classmethod
    def all_raw_file_names(cls):
        return ['ratings.dat', 'movies.dat']

    def __init__(self, args):
        super().__init__(args)
        self._load_dataset()
        self._set_num_items()
        self._split_dataset()

    def _load_dataset(self):
        df = self.load_ratings_df()
        movies_df = self.load_movies_df()

        # Create genre mapping
        all_genres = set()
        for genres in movies_df['genres']:
            all_genres.update(genres.split('|'))

        self.genre2id = {genre: idx for idx, genre in enumerate(all_genres)}
        self.id2genre = {idx: genre for genre, idx in self.genre2id.items()}

        # Create movie-genre mapping
        self.movie_genre_map = {}
        for _, row in movies_df.iterrows():
            movie_id = int(row['sid'])
            genres = row['genres'].split('|')
            self.movie_genre_map[movie_id] = [self.genre2id[genre] for genre in genres]

        # Add genre information to dataset
        df['genres'] = df['sid'].apply(lambda x: self.movie_genre_map.get(x, []))

        self.df = df

    def _set_num_items(self):
        """Set the number of items in the dataset"""
        self._num_items = len(self.movie_genre_map)
        print(f"Number of items: {self._num_items}")  # Debug print

    def _split_dataset(self):
        """Split the dataset into train, val, and test sets"""
        df = self.df.copy()

        # Sort by timestamp
        df = df.sort_values('timestamp')

        # Group by user
        user_groups = df.groupby('uid')

        # Split each user's history into train, val, and test
        train_data = {}
        val_data = {}
        test_data = {}

        for user_id, group in user_groups:
            items = group['sid'].tolist()
            if len(items) >= 3:  # Need at least 3 interactions for train/val/test
                train_data[user_id] = items[:-2]  # All but last 2 items
                val_data[user_id] = items[-2:-1]  # Second to last item
                test_data[user_id] = items[-1:]   # Last item
            else:
                # If user has less than 3 interactions, skip them
                continue

        self.train = train_data
        self.val = val_data
        self.test = test_data

    def load_ratings_df(self):
        folder_path = self._get_rawdata_folder_path()
        return pd.read_csv(os.path.join(folder_path, 'ratings.dat'),
                          sep='::',
                          header=None,
                          names=['uid', 'sid', 'rating', 'timestamp'],
                          engine='python',
                          encoding='ISO-8859-1')

    def load_movies_df(self):
        folder_path = self._get_rawdata_folder_path()
        return pd.read_csv(os.path.join(folder_path, 'movies.dat'),
                          sep='::',
                          header=None,
                          names=['sid', 'title', 'genres'],
                          engine='python',
                          encoding='ISO-8859-1')

    def get_genre_matrix(self, item_ids):
        """Get genre matrix for given item IDs"""
        genre_matrix = torch.zeros(len(item_ids), len(self.genre2id))
        for i, item_id in enumerate(item_ids):
            genres = self.movie_genre_map.get(item_id, [])
            for genre_id in genres:
                genre_matrix[i, genre_id] = 1
        return genre_matrix

    def get_num_items(self):
        return len(self.movie_genre_map)

    @property
    def num_items(self):
        return self.get_num_items()

Overwriting datasets/ml1m_genre.py


In [14]:
# Update datasets/__init__.py
%%writefile datasets/__init__.py
from .ml1m_genre import ML1MDataset
from .ml_20m import ML20MDataset

DATASETS = {
    ML1MDataset.code(): ML1MDataset,
    ML20MDataset.code(): ML20MDataset
}

def dataset_factory(args):
    dataset = DATASETS[args.dataset_code]
    return dataset(args)

Overwriting datasets/__init__.py


In [15]:
# Update models/__init__.py
%%writefile models/__init__.py
from .bert import BERTModel
from .bert_genre import BERTGenreModel

MODELS = {
    BERTModel.code(): BERTModel,
    BERTGenreModel.code(): BERTGenreModel
}

def model_factory(args):
    """Factory function to create model based on args"""
    model = MODELS[args.model_code]
    return model(args)

Overwriting models/__init__.py


In [16]:
# Update trainers/__init__.py
%%writefile trainers/__init__.py
from .bert import BERTTrainer
# from .dae import DAERecommenderTrainer
# from .vae import VAETrainer
from .bert_genre import BERTGenreTrainer  # Import our genre trainer

TRAINERS = {
    BERTTrainer.code(): BERTTrainer,
    # DAERecommenderTrainer.code(): DAERecommenderTrainer,
    # VAETrainer.code(): VAETrainer,
    BERTGenreTrainer.code(): BERTGenreTrainer  # Add our genre trainer
}

def trainer_factory(args, model, train_loader, val_loader, test_loader, export_root):
    trainer = TRAINERS[args.trainer_code]
    return trainer(args, model, train_loader, val_loader, test_loader, export_root)

Overwriting trainers/__init__.py


In [17]:
# Update BERT genre trainer
%%writefile trainers/bert_genre.py
from .base import AbstractTrainer
import torch
import torch.nn as nn
from tqdm import tqdm

class BERTGenreTrainer(AbstractTrainer):
    @classmethod
    def code(cls):
        return 'bert_genre'

    def __init__(self, args, model, train_loader, val_loader, test_loader, export_root):
        super().__init__(args, model, train_loader, val_loader, test_loader, export_root)
        self.args = args
        self.device = args.device
        self.model = model.to(self.device)
        self.criterion = nn.CrossEntropyLoss(ignore_index=0)
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
        self.best_metric = args.best_metric
        self.num_items = args.num_items

    def add_extra_loggers(self):
        pass

    def log_extra_train_info(self, log_data):
        pass

    def log_extra_val_info(self, log_data):
        pass

    def calculate_loss(self, batch):
        seqs, labels, genre_matrices = batch
        seqs = seqs.to(self.device)
        labels = labels.to(self.device)
        genre_matrices = genre_matrices.to(self.device)

        logits = self.model(seqs, genre_matrices)  # [B, G, I]
        B, G, I = logits.size()

        # Fix shape: from [B, 1, G] → [B, G]
        if genre_matrices.shape[1] == 1:
            genre_matrices = genre_matrices.squeeze(1)

        genre_indices = torch.argmax(genre_matrices, dim=1)  # [B]
        gather_indices = genre_indices.view(B, 1, 1).expand(-1, 1, I)  # [B, 1, I]
        genre_logits = logits.gather(dim=1, index=gather_indices).squeeze(1)  # [B, I]

        return self.criterion(genre_logits, labels)

    def calculate_metrics(self, batch):
        seqs, labels, genre_matrices = batch
        seqs = seqs.to(self.device)
        labels = labels.to(self.device)
        genre_matrices = genre_matrices.to(self.device)

        logits = self.model(seqs, genre_matrices)  # [B, G, I]
        batch_size, num_genres, num_items = logits.size()

        top5 = torch.topk(logits, 5, dim=2).indices  # [B, G, 5]
        labels_expanded = labels.unsqueeze(1).expand(-1, num_genres).unsqueeze(2)  # [B, G, 1]
        hits = (top5 == labels_expanded).any(dim=2).float()  # [B, G]
        recall_per_genre = hits.mean(dim=0)  # [G]

        metrics = {f"Recall@5_Genre{g}": recall_per_genre[g].item() for g in range(num_genres)}
        metrics["Recall@5_Avg"] = recall_per_genre.mean().item()
        return metrics

    def train_epoch(self, epoch_idx):
        self.model.train()
        avg_loss = 0.0
        for batch_idx, batch in enumerate(tqdm(self.train_loader, desc=f'Epoch {epoch_idx}')):
            self.optimizer.zero_grad()
            loss = self.calculate_loss(batch)
            loss.backward()
            self.optimizer.step()
            avg_loss += loss.item()
        avg_loss /= (batch_idx + 1)
        return {'avg_loss': avg_loss}

    def validate_epoch(self, epoch_idx):
        self.model.eval()
        metrics = {}
        with torch.no_grad():
            for batch in tqdm(self.val_loader, desc='Validation'):
                batch_metrics = self.calculate_metrics(batch)
                for k, v in batch_metrics.items():
                    metrics[k] = metrics.get(k, 0) + v
        for k in metrics:
            metrics[k] /= len(self.val_loader)
        return metrics

    def test(self):
        self.model.eval()
        metrics = {}
        with torch.no_grad():
            for batch in tqdm(self.test_loader, desc='Testing'):
                batch_metrics = self.calculate_metrics(batch)
                for k, v in batch_metrics.items():
                    metrics[k] = metrics.get(k, 0) + v
        for k in metrics:
            metrics[k] /= len(self.test_loader)

        # Print and save metrics
        print("\n📊 Final Evaluation (Test Set):")
        for k, v in metrics.items():
            print(f"{k}: {v:.4f}")
        import json
        with open("final_recall_metrics.json", "w") as f:
            json.dump(metrics, f, indent=2)

        return metrics

    def get_genre_recommendations(self):
        self.model.eval()
        recommendations = {}  # {user_id: {genre: [item_ids]}}

        with torch.no_grad():
            for batch in tqdm(self.test_loader, desc='Generating Genre-Based Recommendations'):
                seqs, labels, genre_matrices = [x.to(self.device) for x in batch]
                logits = self.model(seqs, genre_matrices)  # [B, G, I]
                top5_items = torch.topk(logits, 5, dim=2).indices  # [B, G, 5]

                for i in range(seqs.size(0)):
                    user_id = i  # Replace with actual user ID if available
                    recommendations[user_id] = {}
                    for g in range(logits.size(1)):
                        recommendations[user_id][f'Genre_{g}'] = top5_items[i, g].cpu().tolist()

        import json
        with open('genre_recommendations.json', 'w') as f:
            json.dump(recommendations, f, indent=2)
        print("✅ Genre-based recommendations saved to genre_recommendations.json")

Overwriting trainers/bert_genre.py


In [19]:
from dataloaders import dataloader_factory
from models import model_factory
from trainers import trainer_factory
from options import args
import os

# Ensure working directory is correct (if not already done)
os.chdir('/content/BERT4Rec-VAE-Pytorch')

# Set template and required args
args.template = 'train_bert_genre'
args.metric = "Recall@5_Avg"
args.best_metric = "Recall@5_Avg"
args.monitor = "Recall@5_Avg"
args.num_items = 3953

# 🛠️ Add missing arguments
args.result_dir = './results'
args.run_name = 'bert_genre_test'

# Load data and model
train_loader, val_loader, test_loader = dataloader_factory(args)
model = model_factory(args).to(args.device)

# Create trainer
export_root = os.path.join(args.result_dir, args.run_name)
trainer = trainer_factory(args, model, train_loader, val_loader, test_loader, export_root)


Number of items: 3883
Already preprocessed. Skip preprocessing


In [20]:
trainer.train()

Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:02<00:00, 44.22it/s]


Update Best Recall@5_Avg Model at 1


Logging to Tensorboard: 100%|██████████| 95/95 [00:05<00:00, 17.69it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:01<00:00, 48.86it/s]


Update Best Recall@5_Avg Model at 1


Epoch 2, loss 5.376 : 100%|██████████| 95/95 [00:05<00:00, 17.46it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:01<00:00, 51.40it/s]
Logging to Tensorboard: 100%|██████████| 95/95 [00:05<00:00, 17.68it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:02<00:00, 46.88it/s]
Logging to Tensorboard: 100%|██████████| 95/95 [00:05<00:00, 18.10it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:01<00:00, 51.39it/s]
Logging to Tensorboard: 100%|██████████| 95/95 [00:05<00:00, 17.10it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:01<00:00, 52.88it/s]
Logging to Tensorboard: 100%|██████████| 95/95 [00:05<00:00, 18.38it/s]
Val: N@1 0.000, N@5 0.000, N@10 0.000, R@1 0.000, R@5 0.000, R@10 0.000: 100%|██████████| 95/95 [00:01<00:00, 52.82it/s]
Epoch 7, loss 0.729 : 100%|██████████

In [21]:
trainer.get_genre_recommendations()

Generating Genre-Based Recommendations: 100%|██████████| 95/95 [00:04<00:00, 21.22it/s]

✅ Genre-based recommendations saved to genre_recommendations.json





In [22]:
results = trainer.test()

print("📊 Final Evaluation (Test Set):")
for k, v in results.items():
    print(f"{k}: {v:.4f}")


Testing: 100%|██████████| 95/95 [00:01<00:00, 47.52it/s]


📊 Final Evaluation (Test Set):
Recall@5_Genre0: 0.0133
Recall@5_Genre1: 0.0086
Recall@5_Genre2: 0.0154
Recall@5_Genre3: 0.0054
Recall@5_Genre4: 0.0018
Recall@5_Genre5: 0.0030
Recall@5_Genre6: 0.0081
Recall@5_Genre7: 0.0033
Recall@5_Genre8: 0.0035
Recall@5_Genre9: 0.0049
Recall@5_Genre10: 0.0036
Recall@5_Genre11: 0.0035
Recall@5_Genre12: 0.0021
Recall@5_Genre13: 0.0023
Recall@5_Genre14: 0.0008
Recall@5_Genre15: 0.0008
Recall@5_Genre16: 0.0016
Recall@5_Genre17: 0.0010
Recall@5_Avg: 0.0046
📊 Final Evaluation (Test Set):
Recall@5_Genre0: 0.0133
Recall@5_Genre1: 0.0086
Recall@5_Genre2: 0.0154
Recall@5_Genre3: 0.0054
Recall@5_Genre4: 0.0018
Recall@5_Genre5: 0.0030
Recall@5_Genre6: 0.0081
Recall@5_Genre7: 0.0033
Recall@5_Genre8: 0.0035
Recall@5_Genre9: 0.0049
Recall@5_Genre10: 0.0036
Recall@5_Genre11: 0.0035
Recall@5_Genre12: 0.0021
Recall@5_Genre13: 0.0023
Recall@5_Genre14: 0.0008
Recall@5_Genre15: 0.0008
Recall@5_Genre16: 0.0016
Recall@5_Genre17: 0.0010
Recall@5_Avg: 0.0046





In [None]:
import json

with open("genre_recall_results.json", "w") as f:
    json.dump(results, f, indent=2)

print("✅ Saved genre recall results to 'genre_recall_results.json'")


In [27]:
import os
import shutil

# Define list of modified/created files
modified_files = [
    "models/bert_genre.py",
    "trainers/bert_genre_trainer.py",
    "dataloaders/bert_genre.py",
    "templates.py",
    "options.py",
    "main.py",
    "dataloaders/__init__.py",
    "datasets/ml1m_genre.py",
    "datasets/__init__.py",
    "models/__init__.py",
    "trainers/__init__.py",
    "trainers/bert_genre.py",
    "genre_recommendations.json",               # ✅ Result file 1
    "genre_recall_results.json",                # ✅ Result file 2
    "final_recall_metrics.json"
    # "BERT_recommendation_model.ipynb"  # <-- Your notebook
]

# Create a directory to store files before zipping
os.makedirs("BERT4Rec_Submission", exist_ok=True)

# Copy each file to submission folder preserving folder structure
for file_path in modified_files:
    dest_path = os.path.join("BERT4Rec_Submission", file_path)
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    shutil.copy(file_path, dest_path)

# Create the zip archive
shutil.make_archive("BERT4Rec_Submission", 'zip', "BERT4Rec_Submission")


'/content/BERT4Rec-VAE-Pytorch/BERT4Rec_Submission.zip'

In [28]:
from google.colab import files
files.download("BERT4Rec_Submission.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>