In [1]:
import mlflow
import pandas as pd
import numpy as np
from itertools import product
import seaborn as sns
import matplotlib.pyplot as plt
import yaml
from pathlib import Path
from src.models.model import MatrixFactorization
from src.data.preprocessing import load_ml1m_data, preprocess_ratings, split_data
from src.data.dataset import RecommenderDataset
from src.training.trainer import train_model
from torch.utils.data import DataLoader

In [2]:
hyperparameter_grind = {
    'embedding_dim' : [50, 100, 150],
    'reg_lambda' : [0.001, 0.01, 0.1],
    'dropout' : [0.1, 0.2, 0.3]
}

In [3]:
def prepare_data(config):

    ratings_df, _ = load_ml1m_data('../data/raw/ml-1m')
    processed_df, user_mapping, item_mapping = preprocess_ratings(ratings_df)
    train_data, val_data = split_data(processed_df)

    train_dataset = RecommenderDataset(train_data)
    val_dataset = RecommenderDataset(val_data)

    train_loader = DataLoader(
        train_dataset,
        batch_size=config['training']['batch_size'],
        shuffle=True)
    val_loader = DataLoader(
        val_dataset,
        batch_size=config['training']['batch_size'],
        shuffle=False
    )

    return train_loader, val_loader, len(user_mapping), len(item_mapping)

In [4]:
def run_hyperparameter_experiment(config, hyperparams, train_loader, val_loader):

    model = MatrixFactorization(
        num_users=config['num_users'],
        n_items=config['n_items'],
        embedding_dim=hyperparams['embedding_dim'],
        reg_lambda=hyperparams['reg_lambda'],
    )

    trained_model = train_model(model, train_loader, val_loader, config)
    return trained_model


In [5]:
def run_experiments(config):

    train_loader, val_loader, num_users, n_items = prepare_data(config)

    config.update({
        'num_users': num_users,
        'n_items': n_items
    })

    results = []

    try:
        mlflow.end_run()
    except:
        pass

    with mlflow.start_run(run_name="Hyperparameter_optimization") as parent_run:
        for params in product(*hyperparameter_grind.values()):
            hyperparams = dict(zip(hyperparameter_grind.keys(), params))
            print(f"Running experiment with parameters: {hyperparams}")

            with mlflow.start_run(nested=True) as child_run:

                model = run_hyperparameter_experiment(
                    config,
                    hyperparams,
                    train_loader,
                    val_loader
                )

                mlflow.log_params(hyperparams)

                results.append({
                    'params': hyperparams,
                    'model': model,
                })

        return results

In [6]:
if __name__ == '__main__':
    with open('../config/config.yaml', 'r') as f:
        config = yaml.safe_load(f)

    results = run_experiments(config)

Running experiment with parameters: {'embedding_dim': 50, 'reg_lambda': 0.001, 'dropout': 0.1}
Epoch 1/20 
Train Loss: 94.89088502786574
Val Loss: 0.8647535067392479
----------------------------------------
Epoch 2/20 
Train Loss: 0.8385709598594919
Val Loss: 0.8324703486422964
----------------------------------------
Epoch 3/20 
Train Loss: 0.8213156574269939
Val Loss: 0.8280007847115846
----------------------------------------
Epoch 4/20 
Train Loss: 0.817450431011433
Val Loss: 0.8273664490246498
----------------------------------------
Epoch 5/20 
Train Loss: 0.816259252036332
Val Loss: 0.8272934433690112
----------------------------------------
Epoch 6/20 
Train Loss: 0.815735714443491
Val Loss: 0.8274022708870399
----------------------------------------
Epoch 7/20 
Train Loss: 0.8155748762531795
Val Loss: 0.8280468696843944
----------------------------------------
Epoch 8/20 
Train Loss: 0.8154613123834967
Val Loss: 0.8277850437883147
----------------------------------------
Epoch