In [8]:
# Import all required libraries
import torch
from datasets import load_dataset
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.general_recommender import BPR, Pop
from recbole.model.sequential_recommender import SASRec
from recbole.utils import init_seed, init_logger, get_trainer

# Set torch.load compatibility
torch.serialization.add_safe_globals([dict, list, tuple, set])

print("All libraries imported successfully!")

All libraries imported successfully!


In [9]:
# Load Amazon Reviews 2023 dataset using RecBole
print("=== Loading Amazon Reviews 2023 Dataset ===")

# Load raw dataset (optional, for comparison)
raw_dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_All_Beauty", trust_remote_code=True)
print(f"Raw dataset loaded, samples: {len(raw_dataset['full'])}")

# 在RecBole中使用5-core过滤
print("=== Using 5-core Filtering in RecBole ===")

config_5core = Config(
    model='SASRec',
    dataset='All_Beauty', 
    config_dict={
        'data_path': 'seq_rec_results/dataset/processed/',
        'load_col': {
            'inter': ['user_id', 'item_id_list', 'item_id']
        },
        'benchmark_filename': ['train', 'valid', 'test'],
        'alias_of_item_id': ['item_id_list'],
        'train_neg_sample_args': None,
        'loss_type': 'CE',
    }
)

# 创建5-core过滤后的数据集
print("Creating 5-core filtered dataset...")
dataset_5core = create_dataset(config_5core)
train_data_5core, valid_data_5core, test_data_5core = data_preparation(config_5core, dataset_5core)

print(f"\n📊 5-core Filtered Dataset Statistics:")
print(f"Users: {dataset_5core.user_num}")
print(f"Items: {dataset_5core.item_num}")
print(f"Interactions: {dataset_5core.inter_num}")

=== Loading Amazon Reviews 2023 Dataset ===
Raw dataset loaded, samples: 701528
=== Using 5-core Filtering in RecBole ===
Creating 5-core filtered dataset...

📊 5-core Filtered Dataset Statistics:
Users: 254
Items: 357
Interactions: 2282


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always beha

In [10]:
# Define unified model training function
def train_model(model_type, dataset_name='All_Beauty', epochs=10, **kwargs):
    """
    Unified function to train recommendation models
    
    Args:
        model_type: Model type ('BPR', 'SASRec', 'Pop')
        dataset_name: Dataset name
        epochs: Training epochs
        **kwargs: Additional model-specific parameters
    
    Returns:
        dict: Dictionary containing model, trainer, config and results
    """
    
    print(f"\n=== Training {model_type} Model ===")
    
    # Base configuration
    base_config = {
        'data_path': 'seq_rec_results/dataset/processed/',
        'benchmark_filename': ['train', 'valid', 'test'],
        'epochs': epochs,
        'stopping_step': 10,
        'eval_step': 1,
        'metrics': ['Recall', 'NDCG'],
        'topk': [10, 20],
        'valid_metric': 'NDCG@10',
        'checkpoint_dir': './checkpoints/',
        'show_progress': True
    }
    
    # Model-specific configurations
    if model_type == 'BPR':
        model_class = BPR
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id']},
            'train_neg_sample_args': {
                'distribution': 'uniform',
                'sample_num': 1,
                'alpha': 1.0,
                'dynamic': False,
                'candidate_num': 0
            },
            'loss_type': 'BPR',
            'learning_rate': 0.001,
            'train_batch_size': 2048,
        }
        
    elif model_type == 'SASRec':
        model_class = SASRec
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id_list', 'item_id']},
            'alias_of_item_id': ['item_id_list'],
            'train_neg_sample_args': None,
            'loss_type': 'CE',
            'learning_rate': 0.001,
            'train_batch_size': 256,
            'max_seq_length': 50,
            'hidden_size': 64,
            'n_layers': 2,
            'n_heads': 2,
            'inner_size': 256,
            'hidden_dropout_prob': 0.5,
            'attn_dropout_prob': 0.5,
        }
        
    elif model_type == 'Pop':
        model_class = Pop
        model_config = {
            **base_config,
            'load_col': {'inter': ['user_id', 'item_id']},
            'train_neg_sample_args': None,
        }
    else:
        raise ValueError(f"Unsupported model type: {model_type}")
    
    # Merge user-defined parameters
    model_config.update(kwargs)
    
    # Create config and dataset
    config = Config(
        model=model_type,
        dataset=dataset_name,
        config_dict=model_config
    )
    
    # Create dataset
    model_dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, model_dataset)
    
    print(f"{model_type} dataset stats:")
    print(f"Users: {model_dataset.user_num}")
    print(f"Items: {model_dataset.item_num}")
    print(f"Interactions: {model_dataset.inter_num}")
    
    # Initialize model and trainer
    init_seed(config['seed'], config['reproducibility'])
    model = model_class(config, model_dataset).to(config['device'])
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)
    
    print(f"Training {model_type} model...")
    
    # torch.load compatibility settings
    original_load = torch.load
    def safe_load(*args, **kwargs):
        kwargs['weights_only'] = False
        return original_load(*args, **kwargs)
    torch.load = safe_load
    
    try:
        # Train model
        best_valid_score, best_valid_result = trainer.fit(
            train_data, valid_data, saved=True, show_progress=True
        )
        
        print(f"{model_type} training completed!")
        print(f"Best validation result: {best_valid_result}")
        
        # Test model
        test_result = trainer.evaluate(test_data, load_best_model=True, show_progress=True)
        print(f"{model_type} test result: {test_result}")
        
        return {
            'model_type': model_type,
            'model': model,
            'trainer': trainer,
            'config': config,
            'dataset': model_dataset,
            'train_data': train_data,
            'valid_data': valid_data,
            'test_data': test_data,
            'best_valid_result': best_valid_result,
            'test_result': test_result
        }
        
    finally:
        # Restore original torch.load function
        torch.load = original_load

print("Unified training function defined!")

Unified training function defined!


In [11]:
# Train all models using unified function
print("=== Training All Models with Unified Function ===")

# Store all model results
model_results = {}

# Train all three models
models_to_train = [
    {'model_type': 'Pop', 'epochs': 1},  # Pop model trains quickly
    {'model_type': 'BPR', 'epochs': 5},  # BPR model
    {'model_type': 'SASRec', 'epochs': 5}  # SASRec model
]

for model_config in models_to_train:
    try:
        result = train_model(**model_config)
        model_results[model_config['model_type']] = result
        print(f"✅ {model_config['model_type']} training successful")
    except Exception as e:
        print(f"❌ {model_config['model_type']} training failed: {str(e)}")
        model_results[model_config['model_type']] = None

print(f"\nTraining completed! Successfully trained {len([r for r in model_results.values() if r is not None])} models")

=== Training All Models with Unified Function ===

=== Training Pop Model ===
Pop dataset stats:
Users: 254
Items: 352
Interactions: 2282


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)


Training Pop model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 1/1 [00:00<00:00, 17.91it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 146.25it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m


Pop training completed!
Best validation result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.0124), ('ndcg@10', 0.0), ('ndcg@20', 0.0033)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 2/2 [00:00<00:00, 569.22it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)


Pop test result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.0), ('ndcg@10', 0.0), ('ndcg@20', 0.0)])
✅ Pop training successful

=== Training BPR Model ===
BPR dataset stats:
Users: 254
Items: 352
Interactions: 2282
Training BPR model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 1/1 [00:00<00:00,  6.27it/s, [1;33mGPU RAM: 0.00 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 169.93it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     1[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 135.82it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 674.74it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     2[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 141.88it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13/13 [00:00<00:00, 661.28it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mTrain     3[0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 145.66it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|████████████████████████| 13

BPR training completed!
Best validation result: OrderedDict([('recall@10', 0.0068), ('recall@20', 0.0317), ('ndcg@10', 0.0039), ('ndcg@20', 0.011)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 2/2 [00:00<00:00, 576.22it/s, [1;33mGPU RAM: 0.02 G/79.14 G[0m][0m
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[field].fillna(value="", inplace=True)
The behavior will chang

BPR test result: OrderedDict([('recall@10', 0.0), ('recall@20', 0.1111), ('ndcg@10', 0.0), ('ndcg@20', 0.0319)])
✅ BPR training successful

=== Training SASRec Model ===
SASRec dataset stats:
Users: 254
Items: 357
Interactions: 2282
Training SASRec model...


  scaler = amp.GradScaler(enabled=self.enable_scaler)
[1;35mTrain     0[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 17.19it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 292.10it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     1[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 24.35it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 307.79it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     2[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 23.08it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 309.86it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mTrain     3[0m: 100%|███████████████████████████| 8/8 [00:00<00:00, 22.93it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m
[1;35mEvaluate   [0m: 100%|██████████████████████████| 

SASRec training completed!
Best validation result: OrderedDict([('recall@10', 0.1055), ('recall@20', 0.2109), ('ndcg@10', 0.0393), ('ndcg@20', 0.066)])


[1;35mEvaluate   [0m: 100%|██████████████████████████| 1/1 [00:00<00:00, 312.77it/s, [1;33mGPU RAM: 0.35 G/79.14 G[0m][0m

SASRec test result: OrderedDict([('recall@10', 0.0455), ('recall@20', 0.4091), ('ndcg@10', 0.0196), ('ndcg@20', 0.1104)])
✅ SASRec training successful

Training completed! Successfully trained 3 models





In [12]:
# Unified model performance comparison and analysis
def compare_models(model_results):
    """Compare all trained models"""
    
    print("\n" + "="*70)
    print("                Model Performance Comparison Report")
    print("="*70)
    
    # Extract test results
    results_summary = {}
    for model_type, result in model_results.items():
        if result is not None:
            results_summary[model_type] = result['test_result']
        else:
            results_summary[model_type] = {}
    
    # Create comparison table
    print(f"\n{'Metric':<15}", end="")
    model_names = list(results_summary.keys())
    for name in model_names:
        print(f" | {name:<12}", end="")
    print()
    print("-" * (15 + 15 * len(model_names)))
    
    metrics_to_compare = ['recall@10', 'ndcg@10', 'recall@20', 'ndcg@20']
    best_scores = {}
    
    for metric in metrics_to_compare:
        print(f"{metric:<15}", end="")
        metric_values = []
        
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 'N/A')
            if value != 'N/A':
                print(f" | {value:<12.4f}", end="")
                metric_values.append((model_type, value))
            else:
                print(f" | {'N/A':<12}", end="")
        
        print()
        
        # Find best model
        if metric_values:
            best_model, best_score = max(metric_values, key=lambda x: x[1])
            best_scores[metric] = (best_model, best_score)
    
    print("-" * (15 + 15 * len(model_names)))
    
    # Analyze best models
    print(f"\n🏆 Best model for each metric:")
    for metric, (best_model, best_score) in best_scores.items():
        print(f"  {metric}: {best_model} ({best_score:.4f})")
    
    # Overall model ranking
    model_scores = {name: [] for name in model_names}
    for metric in metrics_to_compare:
        for model_type in model_names:
            value = results_summary[model_type].get(metric, 0)
            if value != 'N/A' and value != 0:
                model_scores[model_type].append(value)
    
    avg_scores = {name: sum(scores)/len(scores) if scores else 0 
                  for name, scores in model_scores.items()}
    
    print(f"\n📊 Overall model ranking:")
    sorted_models = sorted(avg_scores.items(), key=lambda x: x[1], reverse=True)
    for i, (model, score) in enumerate(sorted_models, 1):
        print(f"  {i}. {model}: {score:.4f} (avg score)")
    
    # Model characteristics analysis
    print(f"\n📝 Model characteristics:")
    model_analysis = {
        'Pop': 'Item popularity based, fast training, good for cold start',
        'BPR': 'Collaborative filtering, personalized, balanced performance',  
        'SASRec': 'Sequential recommendation, temporal patterns, rich historical data'
    }
    
    for model_type in model_names:
        if model_type in model_analysis:
            status = "✅ Success" if model_results[model_type] else "❌ Failed"
            print(f"  • {model_type}: {model_analysis[model_type]} [{status}]")
    
    return best_scores, sorted_models

# Execute model comparison
if model_results:
    best_scores, model_ranking = compare_models(model_results)
else:
    print("⚠️ No model results to compare, please run model training first")


                Model Performance Comparison Report

Metric          | Pop          | BPR          | SASRec      
------------------------------------------------------------
recall@10       | 0.0000       | 0.0000       | 0.0455      
ndcg@10         | 0.0000       | 0.0000       | 0.0196      
recall@20       | 0.0000       | 0.1111       | 0.4091      
ndcg@20         | 0.0000       | 0.0319       | 0.1104      
------------------------------------------------------------

🏆 Best model for each metric:
  recall@10: SASRec (0.0455)
  ndcg@10: SASRec (0.0196)
  recall@20: SASRec (0.4091)
  ndcg@20: SASRec (0.1104)

📊 Overall model ranking:
  1. SASRec: 0.1462 (avg score)
  2. BPR: 0.0715 (avg score)
  3. Pop: 0.0000 (avg score)

📝 Model characteristics:
  • Pop: Item popularity based, fast training, good for cold start [✅ Success]
  • BPR: Collaborative filtering, personalized, balanced performance [✅ Success]
  • SASRec: Sequential recommendation, temporal patterns, rich historical 