In [None]:
# General Stuff:
import numpy as np
import pandas as pd

# Our Stuff:
from models.AutoRecBase import AutoRecBase
from models.VarAutoRec import VarAutoRec
from models.NMF import NMF


from scripts.get_data import download_2_data_sets, ratings_to_train_test
from utils.evaluate import evaluate_model
from utils.loading_utils import load_model, save_model

import torch
from torch import nn
import pytorch_lightning as pl

# Visualization Stuff
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_colwidth', 240)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device
if torch.cuda.is_available():
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))

# AutoRec - AutoEncoders Meet Collaborative Filtering - PyTorch

<a id="toc"></a>
## Table of Content
1. [Introduction](#introduction)
1. [Conclusions](#conclusions)

<a id="introduction"></a>

## Introduction

In this notebook we will review a collaborative filtering approach using autoencoders, as suggested by Sedhain et al, in their 2015 paper "AutoRec: Autoencoders Meet Collaborative Filtering".

We will then introduce several improvements and asses them.

[Table of content](#toc)

In [2]:
download_2_data_sets()

  ratings = pd.read_table(


In [10]:
train_loader, val_loader = ratings_to_train_test(1,0, 1,10)

In [11]:
%reload_ext tensorboard
%tensorboard --logdir lightning_logs

Launching TensorBoard...

Go to:  [TensorBoard](http://localhost:6006)

In [12]:
models_dict = {} # (model,ephoc,lr): loss
models_state = {} # (model,ephoc,lr): model.state_dict()
models = [AutoRecBase, VarAutoRec, NMF]
lrs = [0.0005,0.001,0.002,0.004]

From the original paper:

In [13]:
latent_dims = [10, 20, 40, 80, 100, 200, 300, 400, 500]
lambdas = [0.001, 0.01, 0.1, 1, 100, 1000]

In [18]:
model = AutoRecBase(number_of_items=3706,
                    hidden_size=25,
                    activation_function_1=nn.PReLU,
                    activation_function_2=nn.PReLU,
                    loss=nn.MSELoss(reduction='none'),
                    λ=lambdas[0],
                    lr=lambdas[0])
# training
trainer = pl.Trainer(gpus=0,max_epochs=10)
trainer.fit(model,train_loader, val_loader)
save_model(model, lr=lambdas[0], hidden_size=latent_dims[0])

TypeError: __init__() got an unexpected keyword argument 'λ'

In [None]:
models_eval_dict = {}
Ks = [5, 10]
i = 0

for model_class in models:
    for lr in lrs:
        model = model_class()
        model = load_model(model, lr=lr, hidden_size=latent_dims[0])
        model.eval()
        for K in Ks:
            (hits, ndcgs, mrrs) = evaluate_model(model, val_loader, K)
            models_eval_dict[f"row_{i}"] = [type(model).__name__, K, "HR", np.mean(hits)]
            models_eval_dict[f"row_{i+1}"] = [type(model).__name__, K, "NDCG",np.mean(ndcgs)]
            models_eval_dict[f"row_{i+2}"] = [type(model).__name__, K, "MRR",np.mean(mrrs)]
            i += 3

In [None]:
columns = ["model","topk","metric","score"]
eval_df = pd.DataFrame.from_dict(models_eval_dict, orient='index', columns=columns)
eval_df

In [None]:
sns.set(style="darkgrid")
fig, axs = plt.subplots(ncols=3)
fig.set_size_inches(60, 20)

for i, metric in enumerate(['HR', 'NDCG', 'MRR']):
    sns.barplot(data=eval_df[eval_df["metric"] == metric], x="model", y="score", hue="topk", ax=axs[i], orient='v').set(title=metric)
