<a href="https://colab.research.google.com/github/williamrobotma/mlstm4reco/blob/master/mLSTMnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install git+https://github.com/maciejkula/spotlight.git@master#egg=spotlight

Collecting spotlight
  Cloning https://github.com/maciejkula/spotlight.git (to revision master) to /tmp/pip-install-i9fwtbid/spotlight_8e5baf80a739438ba62a27be95cd8707
  Running command git clone -q https://github.com/maciejkula/spotlight.git /tmp/pip-install-i9fwtbid/spotlight_8e5baf80a739438ba62a27be95cd8707
Building wheels for collected packages: spotlight
  Building wheel for spotlight (setup.py) ... [?25l[?25hdone
  Created wheel for spotlight: filename=spotlight-0.1.6-py3-none-any.whl size=33929 sha256=3185e6b1b10fc90f10ec1e58c97b013a8f2fb293416fa15c0e201e0a8ded701a
  Stored in directory: /tmp/pip-ephem-wheel-cache-6m98rkf2/wheels/d8/8b/76/508de2a4f4d2dc273e47fd34f78bda690f62661bf9d1e43bb1
Successfully built spotlight
Installing collected packages: spotlight
Successfully installed spotlight-0.1.6


In [2]:
import math

import torch
from torch.nn import Parameter
from torch.nn.modules.rnn import RNNBase, LSTMCell
from torch.nn import functional as F
from torch import nn
import scipy.stats as ss

import scipy.sparse as sp

from sklearn.utils import murmurhash3_32

import sys
import os
import shutil
import pickle
import time
import numpy as np


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model

In [3]:
class ZeroEmbedding(nn.Embedding):
    """
    Embedding layer that initialises its values
    to using a normal variable scaled by the inverse
    of the embedding dimension.
    Used for biases.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.zero_()
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0)


class ScaledEmbedding(nn.Embedding):
    """
    Embedding layer that initialises its values
    to using a normal variable scaled by the inverse
    of the embedding dimension.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.normal_(0, 1.0 / self.embedding_dim)
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0)

In [4]:
class mLSTM(RNNBase):
    def __init__(self, input_size, hidden_size, bias=True):
        super(mLSTM, self).__init__(
            mode='LSTM', input_size=input_size, hidden_size=hidden_size,
                 num_layers=1, bias=bias, batch_first=True,
                 dropout=0, bidirectional=False)

        w_im = torch.Tensor(hidden_size, input_size)
        w_hm = torch.Tensor(hidden_size, hidden_size)
        b_im = torch.Tensor(hidden_size)
        b_hm = torch.Tensor(hidden_size)
        self.w_im = Parameter(w_im)
        self.b_im = Parameter(b_im)
        self.w_hm = Parameter(w_hm)
        self.b_hm = Parameter(b_hm)

        self.lstm_cell = LSTMCell(input_size, hidden_size, bias)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def forward(self, input, hx):
        n_batch, n_seq, n_feat = input.size()

        hx, cx = hx
        steps = [cx.unsqueeze(1)]
        for seq in range(n_seq):
            mx = F.linear(input[:, seq, :], self.w_im, self.b_im) * F.linear(hx, self.w_hm, self.b_hm)
            hx = (mx, cx)
            hx, cx = self.lstm_cell(input[:, seq, :], hx)
            steps.append(cx.unsqueeze(1))

        return torch.cat(steps, dim=1)

In [5]:
PADDING_IDX = 0


class mLSTMNet(nn.Module):
    """
    Module representing users through running a recurrent neural network
    over the sequence, using the hidden state at each timestep as the
    sequence representation, a'la [2]_
    During training, representations for all timesteps of the sequence are
    computed in one go. Loss functions using the outputs will therefore
    be aggregating both across the minibatch and across time in the sequence.
    Parameters
    ----------
    num_items: int
        Number of items to be represented.
    embedding_dim: int, optional
        Embedding dimension of the embedding layer, and the number of hidden
        units in the LSTM layer.
    item_embedding_layer: an embedding layer, optional
        If supplied, will be used as the item embedding layer
        of the network.
    References
    ----------
    .. [2] Hidasi, Balazs, et al. "Session-based recommendations with
       recurrent neural networks." arXiv preprint arXiv:1511.06939 (2015).
    """

    def __init__(self, num_items, embedding_dim=32,
                 item_embedding_layer=None, sparse=False):

        super(mLSTMNet, self).__init__()

        self.embedding_dim = embedding_dim

        if item_embedding_layer is not None:
            self.item_embeddings = item_embedding_layer
        else:
            self.item_embeddings = ScaledEmbedding(num_items, embedding_dim,
                                                   padding_idx=PADDING_IDX,
                                                   sparse=sparse)

        self.item_biases = ZeroEmbedding(num_items, 1, sparse=sparse,
                                         padding_idx=PADDING_IDX)

        h_init = torch.zeros(embedding_dim)
        h_init.normal_(0, 1.0 / self.embedding_dim)
        self.h_init = nn.Parameter(h_init, requires_grad=True)

        self.mlstm = mLSTM(input_size=embedding_dim,
                           hidden_size=embedding_dim)

    def user_representation(self, item_sequences):
        """
        Compute user representation from a given sequence.
        Returns
        -------
        tuple (all_representations, final_representation)
            The first element contains all representations from step
            -1 (no items seen) to t - 1 (all but the last items seen).
            The second element contains the final representation
            at step t (all items seen). This final state can be used
            for prediction or evaluation.
        """
        # Make the embedding dimension the channel dimension
        sequence_embeddings = self.item_embeddings(item_sequences)

        # pad from left with initial state
        batch_size = sequence_embeddings.size()[0]
        embedding_dim = self.h_init.size()[0]
        seq_start = self.h_init.expand(batch_size, embedding_dim)

        user_representations = self.mlstm(sequence_embeddings, (seq_start, seq_start))

        user_representations = user_representations.permute(0, 2, 1)

        return user_representations[:, :, :-1], user_representations[:, :, -1]

    def forward(self, user_representations, targets):
        """
        Compute predictions for target items given user representations.
        Parameters
        ----------
        user_representations: tensor
            Result of the user_representation_method.
        targets: tensor
            A minibatch of item sequences of shape
            (minibatch_size, sequence_length).
        Returns
        -------
        predictions: tensor
            of shape (minibatch_size, sequence_length)
        """

        target_embedding = (self.item_embeddings(targets)
                            .permute(0, 2, 1)
                            .squeeze())
        target_bias = self.item_biases(targets).squeeze()

        dot = ((user_representations * target_embedding)
               .sum(1)
               .squeeze())

        return target_bias + dot

## Spotlight Stuff

In [6]:
### Spotlight Losses

def hinge_loss(positive_predictions, negative_predictions, mask=None):
    """
    Hinge pairwise loss function.
    Parameters
    ----------
    positive_predictions: tensor
        Tensor containing predictions for known positive items.
    negative_predictions: tensor
        Tensor containing predictions for sampled negative items.
    mask: tensor, optional
        A binary tensor used to zero the loss from some entries
        of the loss tensor.
    Returns
    -------
    loss, float
        The mean value of the loss function.
    """

    loss = torch.clamp(negative_predictions -
                       positive_predictions +
                       1.0, 0.0)

    if mask is not None:
        mask = mask.float()
        loss = loss * mask
        return loss.sum() / mask.sum()

    return loss.mean()


def adaptive_hinge_loss(positive_predictions, negative_predictions, mask=None):
    """
    Adaptive hinge pairwise loss function. Takes a set of predictions
    for implicitly negative items, and selects those that are highest,
    thus sampling those negatives that are closes to violating the
    ranking implicit in the pattern of user interactions.
    Approximates the idea of weighted approximate-rank pairwise loss
    introduced in [2]_
    Parameters
    ----------
    positive_predictions: tensor
        Tensor containing predictions for known positive items.
    negative_predictions: tensor
        Iterable of tensors containing predictions for sampled negative items.
        More tensors increase the likelihood of finding ranking-violating
        pairs, but risk overfitting.
    mask: tensor, optional
        A binary tensor used to zero the loss from some entries
        of the loss tensor.
    Returns
    -------
    loss, float
        The mean value of the loss function.
    References
    ----------
    .. [2] Weston, Jason, Samy Bengio, and Nicolas Usunier. "Wsabie:
       Scaling up to large vocabulary image annotation." IJCAI.
       Vol. 11. 2011.
    """

    highest_negative_predictions, _ = torch.max(negative_predictions, 0)

    return hinge_loss(positive_predictions, highest_negative_predictions.squeeze(), mask=mask)

### Spotlight eval

def sequence_mrr_score(predictions, targets, exclude_preceding=False):
    """
    Compute mean reciprocal rank (MRR) scores. Each sequence
    in test is split into two parts: the first part, containing
    all but the last elements, is used to predict the last element.
    The reciprocal rank of the last element is returned for each
    sequence.
    Parameters
    ----------
    model: fitted instance of a recommender model
        The model to evaluate.
    test: :class:`spotlight.interactions.SequenceInteractions`
        Test interactions.
    exclude_preceding: boolean, optional
        When true, items already present in the sequence will
        be excluded from evaluation.
    Returns
    -------
    mrr scores: numpy array of shape (num_users,)
        Array of MRR scores for each sequence in test.
    """



    mrrs = []

    for i in range(len(targets)):

        

        if exclude_preceding:
            predictions[i][sequences[i]] = np.finfo(np.float32).max

        mrr = (1.0 / ss.rankdata(predictions[i])[targets[i]]).mean()

        mrrs.append(mrr)

    return np.array(mrrs)

### Spotlight Implicit Sequence functions

def get_negative_prediction(shape, user_representation, net, random_state=None):
    if random_state is None:
        random_state = np.random.RandomState()

    negative_items = random_state.randint(0, num_items, shape, dtype=np.int64)
    negative_var = torch.from_numpy(negative_items)
    negative_var = negative_var.to(device)

    negative_prediction = net(user_representation, negative_var)

    return negative_prediction

def get_multiple_negative_predictions(shape, user_representation, net,n=5, random_state=None):

    if random_state is None:
        random_state = np.random.RandomState()

    batch_size, sliding_window = shape
    size = (n,) + (1,) * (user_representation.dim() - 1)
    negative_prediction = get_negative_prediction(
        (n * batch_size, sliding_window),
        user_representation.repeat(*size), net)

    return negative_prediction.view(n, batch_size, sliding_window)
    


def check_input(item_ids, num_items):

    if isinstance(item_ids, int):
        item_id_max = item_ids
    else:
        item_id_max = item_ids.max()

    if item_id_max >= num_items:
        raise ValueError('Maximum item id greater '
                          'than number of items in model.')

def predict(net, sequences, num_items,item_ids=None):
    """
    Make predictions: given a sequence of interactions, predict
    the next item in the sequence.

    Parameters
    ----------

    sequences: array, (1 x max_sequence_length)
        Array containing the indices of the items in the sequence.
    item_ids: array (num_items x 1), optional
        Array containing the item ids for which prediction scores
        are desired. If not supplied, predictions for all items
        will be computed.

    Returns
    -------

    predictions: array
        Predicted scores for all items in item_ids.
    """

    net.train(False)

    sequences = np.atleast_2d(sequences)

    if item_ids is None:
        item_ids = np.arange(num_items).reshape(-1, 1)


    check_input(item_ids, num_items)
    check_input(sequences, num_items)

    sequences = torch.from_numpy(sequences.astype(np.int64).reshape(1, -1))
    item_ids = torch.from_numpy(item_ids.astype(np.int64))

    sequence_var = sequences.to(device)
    item_var = item_ids.to(device)

    _, sequence_representations = net.user_representation(sequence_var)
    size = (len(item_var),) + sequence_representations.size()[1:]
    out = net(sequence_representations.expand(*size),
                    item_var)

    return out.cpu().detach().numpy().flatten()

## seems like this functionality could be replace by a dataloader?

def shuffle(*arrays, **kwargs):

    random_state = kwargs.get('random_state')

    if len(set(len(x) for x in arrays)) != 1:
        raise ValueError('All inputs to shuffle must have '
                         'the same length.')

    if random_state is None:
        random_state = np.random.RandomState()

    shuffle_indices = np.arange(len(arrays[0]))
    random_state.shuffle(shuffle_indices)

    if len(arrays) == 1:
        return arrays[0][shuffle_indices]
    else:
        return tuple(x[shuffle_indices] for x in arrays)


def minibatch(*tensors, **kwargs):

    batch_size = kwargs.get('batch_size', 128)

    if len(tensors) == 1:
        tensor = tensors[0]
        for i in range(0, len(tensor), batch_size):
            yield tensor[i:i + batch_size]
    else:
        for i in range(0, len(tensors[0]), batch_size):
            yield tuple(x[i:i + batch_size] for x in tensors)

# Data

## Interactions Data

In [7]:
def _sliding_window(tensor, window_size, step_size=1):

    for i in range(len(tensor), 0, -step_size):
        yield tensor[max(i - window_size, 0):i]


def _generate_sequences(user_ids, item_ids,
                        indices,
                        max_sequence_length,
                        step_size):

    for i in range(len(indices)):

        start_idx = indices[i]

        if i >= len(indices) - 1:
            stop_idx = None
        else:
            stop_idx = indices[i + 1]

        for seq in _sliding_window(item_ids[start_idx:stop_idx],
                                   max_sequence_length,
                                   step_size):

            yield (user_ids[i], seq)


class Interactions(object):
    """
    Interactions object. Contains (at a minimum) pair of user-item
    interactions, but can also be enriched with ratings, timestamps,
    and interaction weights.
    For *implicit feedback* scenarios, user ids and item ids should
    only be provided for user-item pairs where an interaction was
    observed. All pairs that are not provided are treated as missing
    observations, and often interpreted as (implicit) negative
    signals.
    For *explicit feedback* scenarios, user ids, item ids, and
    ratings should be provided for all user-item-rating triplets
    that were observed in the dataset.
    Parameters
    ----------
    user_ids: array of np.int32
        array of user ids of the user-item pairs
    item_ids: array of np.int32
        array of item ids of the user-item pairs
    ratings: array of np.float32, optional
        array of ratings
    timestamps: array of np.int32, optional
        array of timestamps
    weights: array of np.float32, optional
        array of weights
    num_users: int, optional
        Number of distinct users in the dataset.
        Must be larger than the maximum user id
        in user_ids.
    num_items: int, optional
        Number of distinct items in the dataset.
        Must be larger than the maximum item id
        in item_ids.
    Attributes
    ----------
    user_ids: array of np.int32
        array of user ids of the user-item pairs
    item_ids: array of np.int32
        array of item ids of the user-item pairs
    ratings: array of np.float32, optional
        array of ratings
    timestamps: array of np.int32, optional
        array of timestamps
    weights: array of np.float32, optional
        array of weights
    num_users: int, optional
        Number of distinct users in the dataset.
    num_items: int, optional
        Number of distinct items in the dataset.
    """

    def __init__(self, user_ids, item_ids,
                 ratings=None,
                 timestamps=None,
                 weights=None,
                 num_users=None,
                 num_items=None):

        self.num_users = num_users or int(user_ids.max() + 1)
        self.num_items = num_items or int(item_ids.max() + 1)

        self.user_ids = user_ids
        self.item_ids = item_ids
        self.ratings = ratings
        self.timestamps = timestamps
        self.weights = weights

        self._check()

    def __repr__(self):

        return ('<Interactions dataset ({num_users} users x {num_items} items '
                'x {num_interactions} interactions)>'
                .format(
                    num_users=self.num_users,
                    num_items=self.num_items,
                    num_interactions=len(self)
                ))

    def __len__(self):

        return len(self.user_ids)

    def _check(self):

        if self.user_ids.max() >= self.num_users:
            raise ValueError('Maximum user id greater '
                             'than declared number of users.')
        if self.item_ids.max() >= self.num_items:
            raise ValueError('Maximum item id greater '
                             'than declared number of items.')

        num_interactions = len(self.user_ids)

        for name, value in (('item IDs', self.item_ids),
                            ('ratings', self.ratings),
                            ('timestamps', self.timestamps),
                            ('weights', self.weights)):

            if value is None:
                continue

            if len(value) != num_interactions:
                raise ValueError('Invalid {} dimensions: length '
                                 'must be equal to number of interactions'
                                 .format(name))

    def tocoo(self):
        """
        Transform to a scipy.sparse COO matrix.
        """

        row = self.user_ids
        col = self.item_ids
        data = self.ratings if self.ratings is not None else np.ones(len(self))

        return sp.coo_matrix((data, (row, col)),
                             shape=(self.num_users, self.num_items))

    def tocsr(self):
        """
        Transform to a scipy.sparse CSR matrix.
        """

        return self.tocoo().tocsr()

    def to_sequence(self, max_sequence_length=10, min_sequence_length=None, step_size=None):
        """
        Transform to sequence form.
        User-item interaction pairs are sorted by their timestamps,
        and sequences of up to max_sequence_length events are arranged
        into a (zero-padded from the left) matrix with dimensions
        (num_sequences x max_sequence_length).
        Valid subsequences of users' interactions are returned. For
        example, if a user interacted with items [1, 2, 3, 4, 5], the
        returned interactions matrix at sequence length 5 and step size
        1 will be be given by:
        .. code-block:: python
           [[1, 2, 3, 4, 5],
            [0, 1, 2, 3, 4],
            [0, 0, 1, 2, 3],
            [0, 0, 0, 1, 2],
            [0, 0, 0, 0, 1]]
        At step size 2:
        .. code-block:: python
           [[1, 2, 3, 4, 5],
            [0, 0, 1, 2, 3],
            [0, 0, 0, 0, 1]]
        Parameters
        ----------
        max_sequence_length: int, optional
            Maximum sequence length. Subsequences shorter than this
            will be left-padded with zeros.
        min_sequence_length: int, optional
            If set, only sequences with at least min_sequence_length
            non-padding elements will be returned.
        step-size: int, optional
            The returned subsequences are the effect of moving a
            a sliding window over the input. This parameter
            governs the stride of that window. Increasing it will
            result in fewer subsequences being returned.
        Returns
        -------
        sequence interactions: :class:`~SequenceInteractions`
            The resulting sequence interactions.
        """

        if self.timestamps is None:
            raise ValueError('Cannot convert to sequences, '
                             'timestamps not available.')

        if 0 in self.item_ids:
            raise ValueError('0 is used as an item id, conflicting '
                             'with the sequence padding value.')

        if step_size is None:
            step_size = max_sequence_length

        # Sort first by user id, then by timestamp
        sort_indices = np.lexsort((self.timestamps,
                                   self.user_ids))

        user_ids = self.user_ids[sort_indices]
        item_ids = self.item_ids[sort_indices]

        user_ids, indices, counts = np.unique(user_ids,
                                              return_index=True,
                                              return_counts=True)

        num_subsequences = int(np.ceil(counts / float(step_size)).sum())

        sequences = np.zeros((num_subsequences, max_sequence_length),
                             dtype=np.int32)
        sequence_users = np.empty(num_subsequences,
                                  dtype=np.int32)
        for i, (uid,
                seq) in enumerate(_generate_sequences(user_ids,
                                                      item_ids,
                                                      indices,
                                                      max_sequence_length,
                                                      step_size)):
            sequences[i][-len(seq):] = seq
            sequence_users[i] = uid

        if min_sequence_length is not None:
            long_enough = sequences[:, -min_sequence_length] != 0
            sequences = sequences[long_enough]
            sequence_users = sequence_users[long_enough]

        return (SequenceInteractions(sequences,
                                     user_ids=sequence_users,
                                     num_items=self.num_items))


class SequenceInteractions(object):
    """
    Interactions encoded as a sequence matrix.
    Parameters
    ----------
    sequences: array of np.int32 of shape (num_sequences x max_sequence_length)
        The interactions sequence matrix, as produced by
        :func:`~Interactions.to_sequence`
    num_items: int, optional
        The number of distinct items in the data
    Attributes
    ----------
    sequences: array of np.int32 of shape (num_sequences x max_sequence_length)
        The interactions sequence matrix, as produced by
        :func:`~Interactions.to_sequence`
    """

    def __init__(self,
                 sequences,
                 user_ids=None, num_items=None):

        self.sequences = sequences
        self.user_ids = user_ids
        self.max_sequence_length = sequences.shape[1]

        if num_items is None:
            self.num_items = sequences.max() + 1
        else:
            self.num_items = num_items

    def __repr__(self):

        num_sequences, sequence_length = self.sequences.shape

        return ('<Sequence interactions dataset ({num_sequences} '
                'sequences x {sequence_length} sequence length)>'
                .format(
                    num_sequences=num_sequences,
                    sequence_length=sequence_length,
                ))

## Data helpers

In [8]:

def _index_or_none(array, shuffle_index):

    if array is None:
        return None
    else:
        return array[shuffle_index]

def user_based_train_test_split(interactions,
                                test_percentage=0.2,
                                random_state=None):
    """
    Split interactions between a train and a test set based on
    user ids, so that a given user's entire interaction history
    is either in the train, or the test set.
    Parameters
    ----------
    interactions: :class:`spotlight.interactions.Interactions`
        The interactions to shuffle.
    test_percentage: float, optional
        The fraction of users to place in the test set.
    random_state: np.random.RandomState, optional
        The random state used for the shuffle.
    Returns
    -------
    (train, test): (:class:`spotlight.interactions.Interactions`,
                    :class:`spotlight.interactions.Interactions`)
         A tuple of (train data, test data)
    """

    if random_state is None:
        random_state = np.random.RandomState()

    minint = np.iinfo(np.uint32).min
    maxint = np.iinfo(np.uint32).max

    seed = random_state.randint(minint, maxint, dtype=np.int64)

    in_test = ((murmurhash3_32(interactions.user_ids,
                               seed=seed,
                               positive=True) % 100 /
                100.0) <
               test_percentage)
    in_train = np.logical_not(in_test)

    train = Interactions(interactions.user_ids[in_train],
                         interactions.item_ids[in_train],
                         ratings=_index_or_none(interactions.ratings,
                                                in_train),
                         timestamps=_index_or_none(interactions.timestamps,
                                                   in_train),
                         weights=_index_or_none(interactions.weights,
                                                in_train),
                         num_users=interactions.num_users,
                         num_items=interactions.num_items)
    test = Interactions(interactions.user_ids[in_test],
                        interactions.item_ids[in_test],
                        ratings=_index_or_none(interactions.ratings,
                                               in_test),
                        timestamps=_index_or_none(interactions.timestamps,
                                                  in_test),
                        weights=_index_or_none(interactions.weights,
                                               in_test),
                        num_users=interactions.num_users,
                        num_items=interactions.num_items)

    return train, test

In [9]:
# from spotlight.sequence.implicit import ImplicitSequenceModel
# from spotlight.cross_validation import user_based_train_test_split
from spotlight.datasets.goodbooks import get_goodbooks_dataset
from spotlight.datasets.amazon import get_amazon_dataset
from spotlight.datasets.movielens import get_movielens_dataset
# from spotlight.evaluation import sequence_mrr_score
# from spotlight.torch_utils import set_seed
# import hyperopt
# from hyperopt import Trials, hp, fmin, STATUS_OK, STATUS_FAIL


# Run

In [10]:
DATASETS = ['1m', '10m', 'amazon', 'goodbooks']
MODELS = ['mlstm', 'lstm']

In [11]:
from dataclasses import dataclass


@dataclass
class Args:
    variant: str = DATASETS[0]
    dataset: str = DATASETS[0]
    num_trials: int = 100
    model: str = MODELS[0]


In [12]:
args = Args()

In [13]:
common_space = {
    'batch_size': 240,
    'learn_rate': 1.25e-2,
    'l2': 5.90e-06,
    'n_iter': 40,
    'loss': 'adaptive_hinge',
    'embedding_dim': 120,
}

space = common_space

batch_size = int(space['batch_size'])
learn_rate = space['learn_rate']
loss = space['loss']
n_iter = int(space['n_iter'])
embedding_dim = int(space['embedding_dim'])
l2 = space['l2']

num_negative_samples = 5
verbose = True
seed=72

In [18]:
print("device is {}!".format(device))

# Fix random_state
torch.manual_seed(seed)
random_state = np.random.RandomState(seed)

max_sequence_length = 100
min_sequence_length = 20
step_size = max_sequence_length

if args.dataset == 'amazon':
    max_sequence_length = 50
    min_sequence_length = 5
    step_size = max_sequence_length
    dataset = get_amazon_dataset()
elif args.dataset == 'goodbooks':
    dataset = get_goodbooks_dataset()
else:
    dataset = get_movielens_dataset(args.dataset.upper())

args.variant = args.dataset
train, rest = user_based_train_test_split(
    dataset,
    test_percentage=0.2,
    random_state=random_state)
test, valid = user_based_train_test_split(
    rest,
    test_percentage=0.5,
    random_state=random_state)
train = train.to_sequence(
    max_sequence_length=max_sequence_length,
    min_sequence_length=min_sequence_length,
    step_size=step_size)
test = test.to_sequence(
    max_sequence_length=max_sequence_length,
    min_sequence_length=min_sequence_length,
    step_size=step_size)
valid = valid.to_sequence(
    max_sequence_length=max_sequence_length,
    min_sequence_length=min_sequence_length,
    step_size=step_size)

print('model: {}, data: {}'.format(args.model, train))


num_items=train.num_items

representation = mLSTMNet(
    num_items,
    embedding_dim=embedding_dim)

representation.to(device)

optimizer = torch.optim.Adam(
                representation.parameters(),
                weight_decay=l2,
                lr=learn_rate
            )

sequences = train.sequences.astype(np.int64)

check_input(sequences, num_items)


start = time.perf_counter()
try:
    for epoch_num in range(n_iter):

        sequences = shuffle(sequences,
                            random_state=random_state)

        sequences_tensor = torch.from_numpy(sequences)
        sequences_tensor = sequences_tensor.to(device)

        epoch_loss = 0.0

        ## TODO: replace minibatch, shuffle with pytorch dataloader
        ## TODO: do same with validation set
        ## TODO: implement validation loss
        for minibatch_num, batch_sequence in enumerate(minibatch(sequences_tensor,
                                                                  batch_size=batch_size)):

            sequence_var = batch_sequence

            user_representation, _ = representation.user_representation(
                sequence_var
            )

            positive_prediction = representation(user_representation,
                                            sequence_var)


            negative_prediction = get_multiple_negative_predictions(
                sequence_var.size(),
                user_representation,
                net=representation,
                n=num_negative_samples,
                random_state=random_state)


            optimizer.zero_grad()

            loss = adaptive_hinge_loss(positive_prediction,
                                    negative_prediction,
                                    mask=(sequence_var != PADDING_IDX))
            # with torch.no_grad():
            #     print(positive_prediction.shape)
            #     print(user_representation.shape)
            epoch_loss += loss.item()

            loss.backward()

            optimizer.step()

        epoch_loss /= minibatch_num + 1

        if verbose:
            print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))

        if np.isnan(epoch_loss) or epoch_loss == 0.0:
            raise ValueError('Degenerate epoch loss: {}'
                              .format(epoch_loss))
            

    elapsed = time.perf_counter() - start

    val_predictions = [-predict(representation, sequence, num_items) for sequence in valid.sequences[:, :-1]]
    test_predictions = [-predict(representation, sequence, num_items) for sequence in valid.sequences[:, :-1]]

    validation_mrr = sequence_mrr_score(
        val_predictions,
        valid.sequences[:, -1:],
        exclude_preceding=True
    ).mean()
    test_mrr = sequence_mrr_score(
        test_predictions,
        test.sequences[:, -1:],
        exclude_preceding=True
    ).mean()
except ValueError:
    elapsed = time.perf_counter() - start
    validation_mrr = 0.0
    test_mrr = 0.0
    print( {'loss': 0.0,
            'status': 'FAIL',
            'validation_mrr': 0.0,
            'test_mrr': 0.0,
            'elapsed': elapsed,
            'hyper': space})


print('MRR {} {}'.format(validation_mrr, test_mrr))

if np.isnan(validation_mrr):
    status = 'FAIL'
else:
    status = 'OK'

print({'loss': -validation_mrr,
        'status': status,
        'validation_mrr': validation_mrr,
        'test_mrr': test_mrr,
        'elapsed': elapsed,
        'hyper': space})


device is cuda!
model: mlstm, data: <Sequence interactions dataset (9869 sequences x 100 sequence length)>
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
torch.Size([240, 120, 100])
torch.Size([240, 100])
tor

KeyboardInterrupt: ignored

In [22]:
sequence_var.shape

torch.Size([240, 100])

In [23]:
positive_prediction.shape

torch.Size([240, 100])

In [24]:
user_representation.shape

torch.Size([240, 120, 100])

In [20]:
test

<Sequence interactions dataset (1266 sequences x 100 sequence length)>

In [19]:
positive_prediction[0]

tensor([-8.9897e-02,  8.3161e-01,  1.9490e+00,  1.6316e+00,  1.3015e+00,
         1.4422e+00,  1.1159e+00,  1.2603e+00,  1.7145e+00,  7.8968e-01,
         2.0591e+00,  1.1350e+00,  1.3748e+00,  1.0087e+00,  1.2679e+00,
         1.0245e+00,  1.4537e+00,  1.0151e+00,  6.9320e-01,  8.3160e-01,
         1.9181e-01,  7.3736e-01,  4.2203e-01,  5.1625e-01,  1.1969e+00,
         1.8219e+00,  2.8332e+00,  1.2577e+00,  1.9564e+00,  9.8569e-01,
         2.1016e+00,  2.4810e+00,  1.5026e+00,  2.1306e-01, -2.2869e-01,
         1.8180e-01,  1.2645e+00, -8.7922e-02,  5.1651e-02,  1.7179e-01,
         1.0909e-03,  4.3184e-01,  4.9093e-01,  1.1351e+00,  1.0513e+00,
         1.4443e+00,  1.5232e+00,  1.1074e+00,  3.4924e-01, -1.6031e-01,
         2.3361e-01, -2.0497e-01,  1.6062e-01,  2.2768e-01,  5.7506e-03,
         1.9216e-01, -1.4283e-01,  1.9063e+00,  4.8740e-01,  2.6123e+00,
         2.7423e+00,  3.3496e+00,  3.2346e+00,  3.0395e+00,  3.3466e+00,
         3.6757e+00,  2.8670e+00,  3.5267e+00,  3.3

In [25]:
sequence_var[0]

tensor([ 877,  516,    5,  232,  279, 1559,  355,  316,  436,  364,   69,  335,
         220,  878, 2379,  940,  339, 1136,  552,  295,  975,  501,  312, 2160,
          44,  351,  135,  264,  244,  446,   39,   51,  306, 2140,  673, 2251,
         219,  224, 1674, 2629, 2579, 1236, 2628,  277,  310, 2515, 1121, 2578,
         363, 2927, 2118, 1670, 1288, 2519, 2030, 2930,   45,  670, 1480,  125,
         435,  214,   49,    6,  715,  210,  547,  218, 1110,  200,   98,  720,
         133,  140,  216,  187,   93, 1081,  137,  217,    7,  152,  211,  157,
         690,  273, 1285, 1229,   79, 1252,  749, 1155,   65,  209,  644,  647,
         721,   63,  265,  729], device='cuda:0')