In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parameter as P
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np
import random
import time

In [2]:
torch.cuda.empty_cache()
use_gpu = True
if use_gpu:
    device = 'cuda'
else:
    device = 'cpu'
print('Using device: ', device)

Using device:  cuda


In [3]:
class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, data):
        'Initialization'
        self.data = data

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.data)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        data_item = self.data[index]

        # Load data and get label
        X = data_item[:2]
        X = torch.tensor(X).to(device)
        y = data_item[2]
        y = torch.tensor(y).to(device).float()

        return X, y

In [4]:
userIds = set()
movieIds = set()
triplets = []



file1 = open('../../ml-100k/u.data', 'r')
for line in file1.readlines():
    uid, mid, rating, timestamp = line.split('	')
    userIds.add(int(uid))
    movieIds.add(int(mid))
    triplets.append([uid,mid,rating])

    
random.shuffle(triplets)

triplets = np.array(triplets, dtype='int')

train_val_split = int(len(triplets)*0.8)
train_triplets = triplets[: train_val_split]
val_triplets = triplets[train_val_split:]

params = {'batch_size': 64,
          'shuffle': True,
         'drop_last': True}



training_set = Dataset(train_triplets)
val_set = Dataset(val_triplets)

train_gen = torch.utils.data.DataLoader(training_set, **params)
val_gen = torch.utils.data.DataLoader(val_set, **params)

numUsers = max(userIds)
numItems = max(movieIds)

In [5]:
def weights_init_uniform(m):
    classname = m.__class__.__name__
    
    # for every Linear layer in a model..
    if classname.find('Linear') != -1:
        weight_range = 4.0 * pow(6, 0.5) / pow(m.in_features + m.out_features, 0.5)
        # apply a paper distribution to the weights and a bias=0
        m.weight.data.uniform_(-1 * weight_range, weight_range)
        m.bias.data.fill_(0)


In [6]:
class FCMF(nn.Module):
    '''
    Base class for Fully-Connected Matrix Factorization networks
    '''

    def __init__ (self, N, M, D, D_, K, layers):
        '''
        variable definitions taken from paper: https://arxiv.org/pdf/1511.06443.pdf
        
        @param N:  Number of users
        @param M:  Number of items
        @param D:  size of latent-feature vectors
        @param D_: num rows in latent-features matrices
        @param K:  num cols in latent-feature matrices
        
        @param layers: list of hidden layer sizes; does not include input or output
        '''
        
        assert (min(N,M,D,D_,K) > 0), "Params must be nonzero and positive"
        assert (len(layers) > 0),     "Must have nonzero hidden layers"
        
        ########################################################################
        
        super(FCMF, self).__init__()

        
        self.N, self.M, self.D, self.D_, self.K = N, M, D, D_, K

        
        self.userLatentVectors = P.Parameter(torch.rand(N,D, requires_grad=True))
        self.itemLatentVectors = P.Parameter(torch.rand(M,D, requires_grad=True))

        
        self.userLatentMatrices = P.Parameter(torch.rand(N,D_,K, requires_grad=True))
        self.itemLatentMatrices = P.Parameter(torch.rand(M,D_,K, requires_grad=True))

        
        linear_inputs = [2*D + D_] + layers
        linear_outputs = layers + [1]

        self.layers = nn.ModuleList([nn.Linear(i,o) for (i,o) in zip(linear_inputs, linear_outputs)])
        #Initialize weights as specified in paper
        self.apply(weights_init_uniform)
        

        
    def forward(self, x):
        '''
        @param x: let this be a tensor of size (X, 2): (user index, item index)
        
        WARNING: 
            - forward currently does not account for user/items outside of training data
            - mitigations include returning smart averages    
        '''   
        #Get 100 user and item indices
        userIndices, itemIndices = x[:,0].long(), x[:,1].long()
        
        
        #Select the 10 dimensional rows for each user and item        
        userLatMats = self.userLatentMatrices[userIndices]
        itemLatMats = self.itemLatentMatrices[itemIndices]

        #Take the product of these and sum it to get the feature U'n,1 * V'm,1 + ... + U'n,D * V'm,D
        latentDotProducts = torch.sum(userLatMats * itemLatMats, dim=-1)
        
        x = torch.cat([
            #D
            self.userLatentVectors[userIndices],
            #D
            self.itemLatentVectors[itemIndices],
            #D'
            latentDotProducts
        ], dim=1)
        
        

        
        for l in self.layers[:-1]:
            x = torch.sigmoid(l(x))
        
        # TODO: should last layer go through a sigmoid? NO!
        out = self.layers[-1](x)
        return out
    
    def gradAll(self):
        self._setGrads(True, True, True, True, True)
    
    def gradNetwork(self):
        self._setGrads(False, False, False, False, True)

    def gradLatent(self):
        self._setGrads(True, True, True, True, False)

    def _setGrads(self, userVec, itemVec, userMat, itemMat, net):
        self.userLatentVectors.requires_grad = userVec
        self.itemLatentVectors.requires_grad = itemVec
        
        self.userLatentMatrices.requires_grad = userMat
        self.itemLatentMatrices.requires_grad = itemMat
        
        self.layers.requires_grad = net
        
        
        

        

In [7]:
def getBatches(X, Y, usersPerBatch=100):
    '''
    batchSize = min(N - start, usersPerBatch) * M
    '''
    N = X.shape[0]
    
    start = 0
    while start < N:

        if start+usersPerBatch + 1 < N:
            batch_x = torch.tensor(X[start:start+usersPerBatch]).to(device)
            batch_y = torch.tensor(Y[start:start+usersPerBatch]).to(device).float()
            start += usersPerBatch
            yield (batch_x, batch_y)

        else:
            batch_x = torch.tensor(X[start:]).to(device)
            batch_y = torch.tensor(Y[start:]).to(device).float()
            start += usersPerBatch
            yield (batch_x, batch_y)
            
def trainEpoch(opt, criterion, model):
    model.train()
    opt.zero_grad()
    totalLoss = 0

    for batch_x, batch_y in train_gen:
        optimizer.zero_grad()
        pred_y = model(batch_x)
        #RMSE in order to compare to paper
        loss = pow(criterion(batch_y, pred_y.flatten()), 0.5)
        #reg_loss = torch.norm(model.userLatentVectors) + torch.norm(model.itemLatentVectors) + torch.norm(model.userLatentMatrices) + torch.norm(model.itemLatentMatrices)
        #loss = pred_loss + 50 * reg_loss
        totalLoss += loss
        loss.backward()
        optimizer.step()

    return totalLoss / len(train_gen)

def evaluate(criterion, model):
    model.eval()
    loss = 0
    accuracy = 0
    for batch_x, batch_y in val_gen:
        pred_y = model(batch_x)
        loss += pow(criterion(batch_y, pred_y.flatten()), 0.5)
        matching = (torch.round(pred_y.detach()).flatten() == batch_y.flatten()).type(torch.uint8).sum()
        matching = matching
        accuracy += matching
    return loss / len(val_gen), accuracy.item()/len(val_set)


In [None]:
fc3 = FCMF(numUsers+1, numItems+1 ,10,60,1,[50, 50, 50]).to(device)
# Paper uses RMSE as objective and RMSProp optimizer
print(fc3)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(fc3.parameters(), lr=0.001)
min_val_loss = float('inf')
path = "NNMF/best_model.pt"

epochs = 0
max_epochs = 50
while epochs < max_epochs:
    start = time.time()
    fc3.gradAll()
    fc3.gradLatent()
    loss = trainEpoch(optimizer, criterion, fc3)
    fc3.gradNetwork()
    loss += trainEpoch(optimizer, criterion, fc3)
    val_loss, val_acc = evaluate(criterion, fc3)
    print("Epoch {} Train Loss: {} Val Loss: {} Val Acc: {} Duration: {}".format(epochs,loss, val_loss, val_acc, time.time()-start))
    epochs+=1
    if val_loss < min_val_loss:
        torch.save(fc3.state_dict(), path)
        print(f"New min val loss: {val_loss}. Saving model weights")
        min_val_loss = val_loss




FCMF(
  (layers): ModuleList(
    (0): Linear(in_features=80, out_features=50, bias=True)
    (1): Linear(in_features=50, out_features=50, bias=True)
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Linear(in_features=50, out_features=1, bias=True)
  )
)
Epoch 0 Train Loss: 1.9776184558868408 Val Loss: 0.9701220393180847 Val Acc: 0.38525 Duration: 25.26687240600586
New min val loss: 0.9701220393180847. Saving model weights
Epoch 1 Train Loss: 1.852799654006958 Val Loss: 0.9524884223937988 Val Acc: 0.4197 Duration: 28.216583013534546
New min val loss: 0.9524884223937988. Saving model weights
Epoch 2 Train Loss: 1.8104047775268555 Val Loss: 0.9467328786849976 Val Acc: 0.4234 Duration: 27.286761045455933
New min val loss: 0.9467328786849976. Saving model weights
Epoch 3 Train Loss: 1.7738680839538574 Val Loss: 0.9399663805961609 Val Acc: 0.41945 Duration: 28.7211012840271
New min val loss: 0.9399663805961609. Saving model weights
Epoch 4 Train Loss: 1.7352707386016846 

In [None]:
evaluate(optimizer, criterion)