In [1]:
import io
import os
import math
import copy
import pickle
import zipfile
from textwrap import wrap
from pathlib import Path
from itertools import zip_longest
from collections import defaultdict
from urllib.error import URLError
from urllib.request import urlopen

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
from torch import nn
from torch import optim
from torch.nn import functional as F 
from torch.optim.lr_scheduler import _LRScheduler

In [2]:
def read_data(path):
    files = {}
    for filename in path.glob('*'):
        if filename.suffix == '.csv':
            files[filename.stem] = pd.read_csv(filename)
        elif filename.suffix == '.dat':
            if filename.stem == 'ratings':
                columns = ['userId', 'movieId', 'rating', 'timestamp']
            else:
                columns = ['movieId', 'title', 'genres']
            data = pd.read_csv(filename, sep='::', names=columns, engine='python')
            files[filename.stem] = data
    return files['ratings'], files['movies']

In [3]:
download_path = '/Users/varunn/Documents/NLP-data'

In [4]:
ratings, movies = read_data(Path(os.path.join(download_path, 'ml-1m')))

In [5]:
print(ratings.shape)
ratings.head()

(1000209, 4)


Unnamed: 0,userId,movieId,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [6]:
# transform users and movies to categorical features
ratings['userId'] = ratings['userId'].astype('category')
ratings['movieId'] = ratings['movieId'].astype('category')

# use the category codes to avoid creating separate vocabularies
ratings['user_code'] = ratings['userId'].cat.codes.astype(int)
ratings['movie_code'] = ratings['movieId'].cat.codes.astype(int)

In [7]:
n_users = ratings['user_code'].max() + 1
n_movies = ratings['movie_code'].max() + 1

print('# users: ', n_users)
print('# movies: ', n_movies)

# users:  6040
# movies:  3706


In [8]:
data_df = ratings.sort_values(by='timestamp')
data_df.reset_index(drop=True, inplace=True)

In [9]:
user_ids = data_df['userId'].unique().tolist()
movie_ids = data_df['movieId'].unique().tolist()
user_id_mapping = {value: i for i, value in enumerate(user_ids)}
movie_id_mapping = {value: i for i, value in enumerate(movie_ids)}

n_users = len(user_id_mapping)
n_movies = len(movie_id_mapping)

print('# users: ', n_users)
print('# movies: ', n_movies)

data_df['new_userId'] = data_df['userId'].apply(
    lambda x: user_id_mapping[x])
data_df['new_movieId'] = data_df['movieId'].apply(
    lambda x: movie_id_mapping[x])

# users:  6040
# movies:  3706


In [10]:
data_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,user_code,movie_code,new_userId,new_movieId
0,6040,858,4,956703932,6039,802,0,0
1,6040,2384,4,956703954,6039,2191,0,1
2,6040,593,5,956703954,6039,579,0,2
3,6040,1961,4,956703977,6039,1781,0,3
4,6040,2019,5,956703977,6039,1839,0,4


In [11]:
# more than 4 -> 1, less than 5 -> 0
data_df['preference'] = np.where(data_df['rating'] > 4, 1, 0)

# keep only ones and discard the others
data_df_cleaned = data_df[(data_df['preference'] == 1)]
print(data_df_cleaned.shape)
data_df_cleaned.head()

(226310, 9)


Unnamed: 0,userId,movieId,rating,timestamp,user_code,movie_code,new_userId,new_movieId,preference
2,6040,593,5,956703954,6039,579,0,2,1
4,6040,2019,5,956703977,6039,1839,0,4,1
6,6040,213,5,956704056,6039,207,0,6,1
7,6040,3111,5,956704056,6039,2895,0,7,1
11,6040,2503,5,956704191,6039,2309,0,11,1


In [12]:
data_df['user_code'].unique()

array([6039, 6038, 6037, ...,  348, 2909, 2946])

In [13]:
help(torch.nn.init.normal_)

Help on function normal_ in module torch.nn.init:

normal_(tensor, mean=0.0, std=1.0)
    Fills the input Tensor with values drawn from the normal
    distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`.
    
    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
    
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.normal_(w)



In [14]:
embedding = nn.Embedding(n_movies, 128)

In [15]:
embedding.weight.data

tensor([[ 1.5789,  0.8428,  1.2063,  ...,  1.2183, -0.8995,  0.8056],
        [ 0.9189,  0.4767, -1.4105,  ..., -0.1423, -0.7803,  0.5009],
        [ 0.5333,  1.4665,  0.7449,  ...,  0.1174, -0.7504, -1.4096],
        ...,
        [-1.9330, -0.5867,  1.1772,  ...,  0.0532,  0.9101, -0.5424],
        [-0.0422,  0.6056, -0.4763,  ...,  1.3144, -0.0459,  0.9182],
        [ 1.4965, -0.0470,  0.2065,  ...,  1.0852, -0.7818,  0.6645]])

In [25]:
nn.init.normal_(embedding.weight.data)

tensor([[ 0.7677,  1.0578, -1.7912,  ..., -0.3434, -0.3171,  0.0614],
        [ 1.2797, -0.1544, -1.2456,  ..., -0.2985,  1.5844, -1.0965],
        [ 1.3789, -0.4688,  0.1579,  ...,  1.2603,  0.7772, -1.5381],
        ...,
        [-0.3001, -0.3375, -0.6527,  ...,  0.4167, -2.0371, -0.6036],
        [ 1.8442,  0.9047, -2.2226,  ...,  1.9515,  0.8001, -1.7166],
        [ 0.6924, -1.9342,  1.5229,  ..., -0.0569,  1.4001, -0.0700]])

In [27]:
bias = nn.Embedding(n_movies, 1, sparse=False)

In [29]:
bias.weight.data

tensor([[ 0.9953],
        [-0.6603],
        [-0.6756],
        ...,
        [ 1.1043],
        [-0.3431],
        [ 1.1553]])

In [30]:
nn.init.normal_(bias.weight.data)

tensor([[2.1364],
        [0.6402],
        [0.5094],
        ...,
        [0.0621],
        [0.6678],
        [0.5436]])

In [16]:
import torch
import torch.nn as nn

from torch import tensor


class SimpleCF(nn.Module):
    def __init__(self, n_users: int, n_items: int, factors: int = 16,
                 user_embeddings: torch.tensor = None,
                 freeze_users: bool = False,
                 item_embeddings: torch.tensor = None,
                 freeze_items: bool = False,
                 init: torch.nn.init = torch.nn.init.normal_,
                 binary: bool =False, **kwargs):
        super().__init__()
        self.binary = binary

        self.user_embeddings = self._create_embedding(
            n_users, factors, user_embeddings, freeze_users,
            init, **kwargs)
        self.item_embeddings = self._create_embedding(
            n_items, factors, item_embeddings, freeze_items,
            init, **kwargs)
        self.sigmoid = nn.Sigmoid()

    def forward(self, u: torch.tensor, i: torch.tensor) -> torch.tensor:
        user_embedding = self.user_embeddings(u)
        user_embedding = user_embedding[:, None, :]
        item_embedding = self.item_embeddings(i)
        item_embedding = item_embedding[:, None, :]
        rating = torch.matmul(user_embedding, item_embedding.transpose(
            1, 2))
        if self.binary:
            return self.sigmoid(rating)
        return rating

    def _create_embedding(self, n_items, factors, weights, freeze,
                          init, **kwargs):
        embedding = nn.Embedding(n_items, factors)
        init(embedding.weight.data, **kwargs)

        if weights is not None:
            embedding.load_state_dict({'weight': weights})
        if freeze:
            embedding.weight.requires_grad = False

        return embedding
    
    
class BaseModule(nn.Module):
    """
    Base module for explicit matrix factorization.
    """
    
    def __init__(self,
                 n_users,
                 n_items,
                 n_factors=40,
                 dropout_p=0,
                 sparse=False,
                 user_embeddings: torch.tensor = None,
                 user_biases: torch.tensor = None,
                 freeze_users: bool = False,
                 item_embeddings: torch.tensor = None,
                 item_biases: torch.tensor = None,
                 freeze_items: bool = False,
                 init: torch.nn.init = torch.nn.init.normal_,
                 **kwargs):
        """
        Parameters
        ----------
        n_users : int
            Number of users
        n_items : int
            Number of items
        n_factors : int
            Number of latent factors (or embeddings or whatever you want to
            call it).
        dropout_p : float
            p in nn.Dropout module. Probability of dropout.
        sparse : bool
            Whether or not to treat embeddings as sparse. NOTE: cannot use
            weight decay on the optimizer if sparse=True. Also, can only use
            Adagrad.
        """
        super(BaseModule, self).__init__()
        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors
        self.user_embeddings, self.user_biases = self._create_embedding(
            n_users, n_factors, user_embeddings, user_biases,
            freeze_users, init, sparse, **kwargs)
        self.item_embeddings, self.item_biases = self._create_embedding(
            n_items, n_factors, item_embeddings, item_biases,
            freeze_items, init, sparse, **kwargs)
        
        self.dropout_p = dropout_p
        self.dropout = nn.Dropout(p=self.dropout_p)

        self.sparse = sparse
        
    def forward(self, users, items):
        """
        Forward pass through the model. For a single user and item, this
        looks like:
        user_bias + item_bias + user_embeddings.dot(item_embeddings)
        Parameters
        ----------
        users : np.ndarray
            Array of user indices
        items : np.ndarray
            Array of item indices
        Returns
        -------
        preds : np.ndarray
            Predicted ratings.
        """
        ues = self.user_embeddings(users)
        uis = self.item_embeddings(items)

        preds = self.user_biases(users)
        preds += self.item_biases(items)
        preds += (self.dropout(ues) * self.dropout(uis)).sum(
            dim=1, keepdim=True)

        return preds.squeeze()
    
    def __call__(self, *args):
        return self.forward(*args)

    def predict(self, users, items):
        return self.forward(users, items)
    
    def _create_embedding(self, n_items, n_factors, pre_weights,
                          pre_biases, freeze, init, sparse, **kwargs):
        
        bias = nn.Embedding(n_items, 1, sparse=sparse)
        embedding = nn.Embedding(n_items, n_factors, sparse=sparse)
        init(bias.weight.data, **kwargs)
        init(embedding.weight.data, **kwargs)

        if pre_weights is not None:
            embedding.load_state_dict({'weight': pre_weights})
            
        if pre_biases is not None:
            bias.load_state_dict({'weight': pre_biases})
        
        if freeze:
            embedding.weight.requires_grad = False
            bias.weight.requires_grad = False

        return embedding, bias


def bpr_loss(preds, vals):
    sig = nn.Sigmoid()
    return (1.0 - sig(preds)).pow(2).sum()

In [17]:
from torch.utils import data


class Interactions(data.Dataset):
    """
    Hold data in the form of an interactions matrix.
    Typical use-case is like a ratings matrix:
    - Users are the rows
    - Items are the columns
    - Elements of the matrix are the ratings given by a user for an item.
    """

    def __init__(self, mat):
        self.mat = mat.astype(np.float32).tocoo()
        self.n_users = self.mat.shape[0]
        self.n_items = self.mat.shape[1]

    def __getitem__(self, index):
        row = self.mat.row[index]
        col = self.mat.col[index]
        val = self.mat.data[index]
        return (row, col), val

    def __len__(self):
        return self.mat.nnz

In [18]:
import torch
from torch import nn
import numpy as np
import collections

from abc import ABCMeta
from abc import abstractmethod
from typing import Callable
from tqdm import tqdm


class StepBase:
    """Defines the interface that all step models here expose."""
    __metaclass__ = ABCMeta

    @abstractmethod
    def batch_fit(self, data_loader: torch.utils.data.DataLoader, epochs: int):
        """Trains the model on a batch of user-item interactions."""
        pass

    @abstractmethod
    def step(self, user: torch.tensor, item: torch.tensor,
             rating: torch.tensor, preference: torch.tensor):
        """Trains the model incrementally."""
        pass

    @abstractmethod
    def predict(self, user: torch.tensor, k: int):
        """Recommends the top-k items to a specific user."""
        pass

    @abstractmethod
    def save(self, path: str):
        """Saves the model parameters to the given path."""
        pass

    @abstractmethod
    def load(self, path: str):
        """Loads the model parameters from a given path."""
        pass


class Step(StepBase):
    """Incremental and batch training of recommender systems."""
    def __init__(self, model: torch.nn.Module,
                 loss_function=torch.nn.MSELoss(reduction='sum'),
                 optimizer = torch.optim.Adam,
                 lr = 0.01, weight_decay = 0.):
        self.model = model
        self.loss_function = loss_function
        self.lr = lr
        self.weight_decay = weight_decay
        self.optimizer = optimizer(self.model.parameters(),
                                   lr=self.lr,
                                   weight_decay=self.weight_decay)
        self.losses = []

        # check if the user has provided user and item embeddings
        assert self.model.user_embeddings, 'User embedding matrix could not be found.'
        assert self.model.item_embeddings, 'Item embedding matrix could not be found.'

    @property
    def user_embeddings(self):
        return self.model.user_embeddings

    @property
    def item_embeddings(self):
        return self.model.item_embeddings
    
    @property
    def user_biases(self):
        return self.model.user_biases
    
    @property
    def item_biases(self):
        return self.model.item_biases

    def batch_fit(self, data_loader: torch.utils.data.DataLoader,
                  data_size: int, epochs: int = 1):
        """Trains the model on a batch of user-item interactions."""
        
        self.model.train()
        for epoch in range(epochs):
            total_loss = torch.Tensor([0])
            with tqdm(total=len(data_loader)) as pbar:
                for _, ((row, col), val) in enumerate(data_loader):
                    self.optimizer.zero_grad()

                    row = row.long()
                    # TODO: turn this into a collate_fn like the data_loader
                    if isinstance(col, list):
                        col = tuple(c.long() for c in col)
                    else:
                        col = col.long()
                    val = val.float()

                    preds = self.model(row, col)
                    loss = self.loss_function(preds, val)
                    loss.backward()

                    self.optimizer.step()

                    total_loss += loss.item()
                    batch_loss = loss.item() / row.size()[0]

                    pbar.update(1)
                
            total_loss /= data_size
            self.losses.append(total_loss)
            
    def _validation_loss(self, data_loader: torch.utils.data.DataLoader,
                         data_size: int):
        self.model.eval()
        total_loss = torch.Tensor([0])
        for _, ((row, col), val) in enumerate(data_loader):
            row = row.long()
            if isinstance(col, list):
                col = tuple(c.long() for c in col)
            else:
                col = col.long()
            val = val.float()

            preds = self.model(row, col)
            loss = self.loss_function(preds, val)
            total_loss += loss.item()

        total_loss /= data_size
        return total_loss[0]

    def step(self, user: torch.tensor, item: torch.tensor,
             rating: torch.tensor = None):
        """Trains the model incrementally."""
        self.model.train()
        
        self.optimizer.zero_grad()
        
        pred = self.model(user, item)
        loss = self.loss_function(pred, rating)
        loss.backward()

        self.optimizer.step()
        
        batch_loss = loss.item()
        return batch_loss

    def recommend(self, user: torch.tensor, k:int = 10) -> torch.tensor:
        """Recommends the top-k items to a specific user."""
        self.model.eval()
        
        u_embed_one = self.user_embeddings(user)
        u_embed_one_reshaped = u_embed_one.reshape((
            1, u_embed_one.shape[0]))
        m_embed = self.item_embeddings.weight
        u_bias_one = self.user_biases(user)
        u_bias_one_reshaped = u_bias_one.reshape((
            1, u_bias_one.shape[0]))
        m_bias = self.item_biases.weight
        
        bias_sum = u_bias_one_reshaped + m_bias
        bias_sum = bias_sum.reshape((bias_sum.shape[1],
                                     bias_sum.shape[0]))

        preds = torch.matmul(u_embed_one_reshaped, m_embed.t())+bias_sum

        return preds.squeeze().argsort()[-k:]

    def save(self, path: str):
        """Saves the model parameters to the given path."""
        torch.save(self.model.state_dict(), path)

    def load(self, path: str):
        """Loads the model parameters from a given path."""
        self.model.load_state_dict(torch.load(path))

In [19]:
n_users, n_movies

(6040, 3706)

In [47]:
user_embeddings = nn.Embedding(n_users, 128, sparse=False)

user_biases = nn.Embedding(n_users, 1, sparse=False)

movie_embeddings = nn.Embedding(n_movies, 128, sparse=False)

movie_biases = nn.Embedding(n_movies, 1, sparse=False)

In [212]:
user_embeddings.embedding_dim

128

In [51]:
user = 0
user = torch.tensor(user)
print(user)

tensor(0)


In [57]:
u_embed_one = user_embeddings(user)
m_embed = movie_embeddings.weight
u_bias_one = user_biases(user)
m_bias = movie_biases.weight

In [96]:
print(u_embed_one.shape)
u_embed_one_reshaped = u_embed_one.reshape((1, u_embed_one.shape[0]))
print(u_embed_one_reshaped.shape)
#print(u_embed_one.transpose(0, 1))
print(m_embed.shape)
print(m_embed.t().shape)
print(u_bias_one.shape)
u_bias_one_reshaped = u_bias_one.reshape((1, u_bias_one.shape[0]))
print(u_bias_one_reshaped.shape)
bias_sum = u_bias_one_reshaped + m_bias
bias_sum = bias_sum.reshape((bias_sum.shape[1], bias_sum.shape[0]))
print(bias_sum.shape)
print(m_bias.shape)

torch.Size([128])
torch.Size([1, 128])
torch.Size([3706, 128])
torch.Size([128, 3706])
torch.Size([1])
torch.Size([1, 1])
torch.Size([1, 3706])
torch.Size([3706, 1])


In [94]:
tmp = torch.matmul(u_embed_one_reshaped, m_embed.t())
print(tmp.shape)
print(tmp)

torch.Size([1, 3706])
tensor([[ -7.5282,  15.1134,   5.4389,  ..., -13.9357, -14.3046, -21.9863]],
       grad_fn=<MmBackward>)


In [106]:
preds = tmp + bias_sum

In [107]:
preds.shape

torch.Size([1, 3706])

In [108]:
preds.view(preds.shape[1]).shape

torch.Size([3706])

In [111]:
preds.squeeze().argsort()[-10:]

tensor([3701, 2744, 1224, 2863, 3224, 1756,  393, 1635, 3496, 3248])

In [21]:
# split data_df into train and test
from sklearn.model_selection import train_test_split


train, test = train_test_split(data_df, test_size=0.1, random_state=1)
train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

In [22]:
print(train.shape)
print(test.shape)

print('num users train: ', train['userId'].nunique())
print('num movies train: ', train['movieId'].nunique())

print('num users test: ', test['userId'].nunique())
print('num movies test: ', test['movieId'].nunique())

(900188, 9)
(100021, 9)
num users train:  6040
num movies train:  3698
num users test:  5953
num movies test:  3303


In [23]:
# divide train into train1 and train2
train.sort_values('timestamp', ascending=True, inplace=True)
train.reset_index(drop=True, inplace=True)
bootstrap_perc = 0.4
pct = int(train.shape[0] * bootstrap_perc)
train1 = train[:pct]
train2 = train[pct:]

train1.reset_index(drop=True, inplace=True)
train2.reset_index(drop=True, inplace=True)

print(train1.shape)
print(train2.shape)
print(train1.head())
print(train2.head())

(360075, 9)
(540113, 9)
  userId movieId  rating  timestamp  user_code  movie_code new_userId  \
0   6040     593       5  956703954       6039         579          0   
1   6040    2384       4  956703954       6039        2191          0   
2   6040    2019       5  956703977       6039        1839          0   
3   6040    3111       5  956704056       6039        2895          0   
4   6040     213       5  956704056       6039         207          0   

  new_movieId  preference  
0           2           1  
1           1           0  
2           4           1  
3           7           1  
4           6           1  
  userId movieId  rating  timestamp  user_code  movie_code new_userId  \
0   3380    1240       5  967588202       3379        1148       2660   
1   3380    2130       4  967588214       3379        1949       2660   
2   3377    1343       5  967588226       3376        1245       2663   
3   3377    2118       4  967588226       3376        1937       2663   
4   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [24]:
# create interactions matrix
import scipy.sparse as sp

def get_interaction_matrix(df, n_users, n_movies, user2index,
                           item2index):
    interactions = np.zeros((n_users, n_movies))
    for row in df.itertuples():
        interactions[user2index[row[1]], item2index[row[2]]] = row[3]
    
    return sp.coo_matrix(interactions)

In [25]:
train1_interactions = get_interaction_matrix(train1, n_users, n_movies,
                                             user_id_mapping,
                                             movie_id_mapping)
train2_interactions = get_interaction_matrix(train2, n_users, n_movies,
                                             user_id_mapping,
                                             movie_id_mapping)
test_interactions = get_interaction_matrix(test, n_users, n_movies,
                                           user_id_mapping,
                                           movie_id_mapping)

In [26]:
print(train1_interactions.shape)
print(train2_interactions.shape)
print(test_interactions.shape)
print(train1_interactions.nnz)

(6040, 3706)
(6040, 3706)
(6040, 3706)
360075


## Experiments
1. Train on train1 (model1) and validate on test
2. Update model1 on train2 and validate on test
3. Incremental training of model1 on train2

### Experiment 1 - train on train1 and validate on test

In [27]:
train1_loader = data.DataLoader(Interactions(train1_interactions),
                                batch_size=512, shuffle=False)
test_loader = data.DataLoader(Interactions(test_interactions),
                              batch_size=512, shuffle=False)

In [31]:
len(train1_loader), train1_interactions.nnz, 360448//512

(704, 360075, 704)

In [175]:
net = BaseModule(n_users, n_movies, n_factors=128, dropout_p=0.02)

model = Step(net, lr=0.02, weight_decay=0.1)

In [176]:
import time

start = time.time()

model.batch_fit(train1_loader, train1_interactions.nnz, epochs=12)

print('time taken: %0.2f' % (time.time() - start))

100%|██████████| 704/704 [00:11<00:00, 54.79it/s]
100%|██████████| 704/704 [00:13<00:00, 52.24it/s]
100%|██████████| 704/704 [00:23<00:00, 29.55it/s]
100%|██████████| 704/704 [00:23<00:00, 29.93it/s]
100%|██████████| 704/704 [00:22<00:00, 30.94it/s]
100%|██████████| 704/704 [00:22<00:00, 30.99it/s]
100%|██████████| 704/704 [00:23<00:00, 29.91it/s]
100%|██████████| 704/704 [00:24<00:00, 29.30it/s]
100%|██████████| 704/704 [00:23<00:00, 29.71it/s]
100%|██████████| 704/704 [00:22<00:00, 29.83it/s]
100%|██████████| 704/704 [00:22<00:00, 32.89it/s]
100%|██████████| 704/704 [00:23<00:00, 29.71it/s]

time taken: 258.13





In [177]:
model.losses

[tensor([11.2205]),
 tensor([2.0262]),
 tensor([1.3138]),
 tensor([1.3812]),
 tensor([1.2426]),
 tensor([1.2377]),
 tensor([1.1876]),
 tensor([1.1864]),
 tensor([1.1596]),
 tensor([1.1610]),
 tensor([1.1479]),
 tensor([1.1544])]

In [178]:
print('loss of model on test: ',
      model._validation_loss(test_loader, test_interactions.nnz))

loss of model on test:  tensor(2.6761)


In [179]:
model.save('./model_train1_E12.pt')

In [198]:
model1 = Step(net, lr=6e-3, weight_decay=0.1)

In [199]:
model1.load('./model_train1_E12.pt')

In [201]:
print('loss of model1 on test: ',
      model1._validation_loss(test_loader, test_interactions.nnz))

loss of model1 on test:  tensor(2.6761)


### Experiment 2 - Update model1 on train2 and validate on test

In [204]:
train2_loader = data.DataLoader(Interactions(train2_interactions),
                                batch_size=10, shuffle=False)

In [205]:
model1.lr

0.006

In [208]:
%time model1.batch_fit(train2_loader, train2_interactions.nnz, epochs=20)

100%|██████████| 54012/54012 [25:13<00:00, 35.68it/s]
100%|██████████| 54012/54012 [28:20<00:00, 27.23it/s]
100%|██████████| 54012/54012 [28:10<00:00, 31.95it/s]
100%|██████████| 54012/54012 [27:02<00:00, 34.97it/s]
100%|██████████| 54012/54012 [27:25<00:00, 35.42it/s]
100%|██████████| 54012/54012 [25:27<00:00, 35.36it/s]
100%|██████████| 54012/54012 [25:43<00:00, 35.00it/s]
100%|██████████| 54012/54012 [40:20<00:00, 22.32it/s]  
100%|██████████| 54012/54012 [26:40<00:00, 33.74it/s]
100%|██████████| 54012/54012 [25:23<00:00, 35.66it/s]
100%|██████████| 54012/54012 [25:43<00:00, 34.99it/s]
100%|██████████| 54012/54012 [26:14<00:00, 34.31it/s]
100%|██████████| 54012/54012 [26:42<00:00, 33.71it/s]
100%|██████████| 54012/54012 [26:43<00:00, 33.67it/s]
100%|██████████| 54012/54012 [26:05<00:00, 34.49it/s]
100%|██████████| 54012/54012 [25:40<00:00, 35.03it/s]
100%|██████████| 54012/54012 [26:01<00:00, 34.59it/s]
100%|██████████| 54012/54012 [25:47<00:00, 35.19it/s]
100%|██████████| 54012/540

CPU times: user 8h 52min 45s, sys: 4min 7s, total: 8h 56min 53s
Wall time: 9h 33s


In [211]:
print('loss of model1_updated on test: ',
      model1._validation_loss(test_loader, test_interactions.nnz))

loss of model1_updated on test:  tensor(11.1351)


In [210]:
model1.losses

[tensor([7.1437]),
 tensor([7.1831]),
 tensor([7.1775]),
 tensor([7.1770]),
 tensor([7.1773]),
 tensor([7.1770]),
 tensor([7.1711]),
 tensor([7.1765]),
 tensor([7.1772]),
 tensor([7.1762]),
 tensor([7.1806]),
 tensor([7.1812]),
 tensor([7.1707]),
 tensor([7.1694]),
 tensor([7.1783]),
 tensor([7.1762]),
 tensor([7.1769]),
 tensor([7.1772]),
 tensor([7.1719]),
 tensor([7.1771])]

In [162]:
train_interactions = get_interaction_matrix(train, n_users, n_movies,
                                            user_id_mapping,
                                            movie_id_mapping)
train_loader = data.DataLoader(Interactions(train_interactions),
                               batch_size=512, shuffle=False)

In [163]:
net = BaseModule(n_users, n_movies, n_factors=128, dropout_p=0.02)

model2 = Step(net, lr=0.02, weight_decay=0.1)

In [165]:
%time model2.batch_fit(train_loader, train_interactions.nnz, epochs=32)

100%|██████████| 1759/1759 [00:33<00:00, 52.20it/s]
100%|██████████| 1759/1759 [00:37<00:00, 47.03it/s]
100%|██████████| 1759/1759 [00:39<00:00, 44.59it/s]
100%|██████████| 1759/1759 [00:42<00:00, 41.18it/s]
100%|██████████| 1759/1759 [00:51<00:00, 34.31it/s]
100%|██████████| 1759/1759 [00:55<00:00, 31.73it/s]
100%|██████████| 1759/1759 [01:00<00:00, 34.35it/s]
100%|██████████| 1759/1759 [00:49<00:00, 35.37it/s]
100%|██████████| 1759/1759 [00:44<00:00, 39.32it/s]
100%|██████████| 1759/1759 [00:48<00:00, 35.96it/s]
100%|██████████| 1759/1759 [00:44<00:00, 39.92it/s]
100%|██████████| 1759/1759 [00:43<00:00, 40.68it/s]
100%|██████████| 1759/1759 [00:42<00:00, 41.83it/s]
100%|██████████| 1759/1759 [00:41<00:00, 42.44it/s]
100%|██████████| 1759/1759 [00:41<00:00, 42.10it/s]
100%|██████████| 1759/1759 [00:42<00:00, 41.02it/s]
100%|██████████| 1759/1759 [00:43<00:00, 36.03it/s]
100%|██████████| 1759/1759 [00:44<00:00, 39.70it/s]
100%|██████████| 1759/1759 [00:44<00:00, 39.97it/s]
100%|███████

CPU times: user 23min 32s, sys: 11.1 s, total: 23min 43s
Wall time: 23min 42s





In [166]:
model2.losses

[tensor([5.5935]),
 tensor([1.4669]),
 tensor([1.3905]),
 tensor([1.3773]),
 tensor([1.3657]),
 tensor([1.3668]),
 tensor([1.3621]),
 tensor([1.3666]),
 tensor([1.3626]),
 tensor([1.3672]),
 tensor([1.3633]),
 tensor([1.3676]),
 tensor([1.3640]),
 tensor([1.3679]),
 tensor([1.3646]),
 tensor([1.3682]),
 tensor([1.3648]),
 tensor([1.3684]),
 tensor([1.3651]),
 tensor([1.3685]),
 tensor([1.3653]),
 tensor([1.3687]),
 tensor([1.3654]),
 tensor([1.3688]),
 tensor([1.3654]),
 tensor([1.3688]),
 tensor([1.3654]),
 tensor([1.3689]),
 tensor([1.3655]),
 tensor([1.3688]),
 tensor([1.3656]),
 tensor([1.3689])]

In [167]:
print('loss of model2_updated on test: ',
      model2._validation_loss(test_loader, test_interactions.nnz))

loss of model1_updated on test:  tensor(1.2263)


In [213]:
from itertools import chain, islice

In [219]:
train_loader = data.DataLoader(Interactions(train_interactions),
                               batch_size=2, shuffle=False)

In [221]:
for i, batch in enumerate(islice(train_loader, 8)):
    print(i, '\t', batch)

0 	 [[tensor([0, 0], dtype=torch.int32), tensor([1, 2], dtype=torch.int32)], tensor([4., 5.])]
1 	 [[tensor([0, 0], dtype=torch.int32), tensor([4, 5], dtype=torch.int32)], tensor([5., 3.])]
2 	 [[tensor([0, 0], dtype=torch.int32), tensor([6, 7], dtype=torch.int32)], tensor([5., 5.])]
3 	 [[tensor([0, 0], dtype=torch.int32), tensor([11, 12], dtype=torch.int32)], tensor([5., 5.])]
4 	 [[tensor([0, 0], dtype=torch.int32), tensor([13, 14], dtype=torch.int32)], tensor([5., 5.])]
5 	 [[tensor([0, 0], dtype=torch.int32), tensor([15, 16], dtype=torch.int32)], tensor([5., 5.])]
6 	 [[tensor([0, 0], dtype=torch.int32), tensor([17, 18], dtype=torch.int32)], tensor([4., 5.])]
7 	 [[tensor([0, 0], dtype=torch.int32), tensor([19, 20], dtype=torch.int32)], tensor([5., 4.])]
