In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parameter as P

import numpy as np

In [39]:
class FCMF(nn.Module):
    '''
    Base class for Fully-Connected Matrix Factorization networks
    '''
    
    def __init__ (self, N, M, D, D_, K, layers):
        '''
        variable definitions taken from paper: https://arxiv.org/pdf/1511.06443.pdf
        
        @param N:  Number of users
        @param M:  Number of items
        @param D:  size of latent-feature vectors
        @param D_: num rows in latent-features matrices
        @param K:  num cols in latent-feature matrices
        
        @param layers: list of hidden layer sizes; does not include input or output
        '''
        
        assert (min(N,M,D,D_,K) > 0), "Params must be nonzero and positive"
        assert (len(layers) > 0),     "Must have nonzero hidden layers"
        
        ########################################################################
        
        super(FCMF, self).__init__()
        
        self.N, self.M, self.D, self.D_, self.K = N, M, D, D_, K
        
        self.userLatentVectors = P.Parameter(torch.rand(N,D, requires_grad=True))
        self.itemLatentVectors = P.Parameter(torch.rand(M,D, requires_grad=True))
        
        self.userLatentMatrices = P.Parameter(torch.rand(N,D_,K, requires_grad=True))
        self.itemLatentMatrices = P.Parameter(torch.rand(M,D_,K, requires_grad=True))
        
        linear_inputs = [2*D + D_] + layers
        linear_outputs = layers + [1]
        
        self.layers = nn.ModuleList([nn.Linear(i,o) for (i,o) in zip(linear_inputs, linear_outputs)])
        
    def forward(self, x):
        '''
        @param x: let this be a tensor of size (X, 2): (user index, item index)
        
        WARNING: 
            - forward currently does not account for user/items outside of training data
            - mitigations include returning smart averages    
        '''        
        userIndices, itemIndices = x[:,0].long(), x[:,1].long()
                
        userLatMats = self.userLatentMatrices[userIndices]
        itemLatMats = self.itemLatentMatrices[itemIndices]
        latentDotProducts = torch.sum(userLatMats * itemLatMats, dim=-1)        
        
        x = torch.hstack([
            self.userLatentVectors[userIndices],
            self.itemLatentVectors[itemIndices],
            latentDotProducts
        ])
        
        for l in self.layers[:-1]:
            x = F.relu(l(x))
        
        # TODO: should last layer go through a sigmoid?
        return self.layers[-1](x)
    
    def gradAll(self):
        self._setGrads(True, True, True, True, True)
    
    def gradNetwork(self):
        self._setGrads(False, False, False, False, True)

    def gradLatent(self):
        self._setGrads(True, True, True, True, False)

    def _setGrads(self, userVec, itemVec, userMat, itemMat, net):
        self.userLatentVectors.requires_grad_(userVec)
        self.itemLatentVectors.requires_grad_(itemVec)
        
        self.userLatentMatrices.requires_grad_(userMat)
        self.itemLatentMatrices.requires_grad_(itemMat)
        
        self.layers.requires_grad = net
        

In [40]:
def getBatches(mat, usersPerBatch=100):
    '''
    batchSize = min(N - start, usersPerBatch) * M
    '''
    N, M = mat.shape
    
    start = 0
    while start < N:
        batchSize = min(N - start, usersPerBatch) * M
                
        batch_x = torch.empty(batchSize, 2)
        batch_y = torch.empty(batchSize, 1)
        
        for userId, ratings in enumerate(mat[start: start+N]):
            for movieId, stars in enumerate(ratings):
                
                curId = userId * M + movieId
                                
                batch_x[curId][0] = userId
                batch_x[curId][1] = movieId
                batch_y[curId][0] = stars
                    
        start += N
        
        yield (batch_x, batch_y)
        
    

def trainEpoch(opt, criterion, model, mat):
    opt.zero_grad()
    loss = 0
    for batch_x, batch_y in getBatches(mat):
        pred_y = model(batch_x)
        loss += criterion(batch_y, pred_y)
    loss.backward()
    optimizer.step()

In [41]:
numUsers = 5
numItems = 5

def randMat():
    return np.random.randint(6, size=(numUsers, numItems))

testMatrix = randMat()
testMatrix

array([[0, 3, 3, 1, 3],
       [2, 3, 0, 5, 5],
       [1, 1, 3, 3, 4],
       [4, 1, 5, 2, 2],
       [3, 3, 0, 0, 4]])

In [42]:
fc3 = FCMF(numUsers, numItems ,2,2,1,[5])

NameError: name 'P' is not defined

In [43]:
import torch.optim as optim

# Paper uses RMSE as objective and RMSProp optimizer
criterion = nn.MSELoss()
optimizer = optim.RMSprop(fc3.parameters(), lr=0.01)

In [44]:
trainEpoch(optimizer, criterion, fc3, testMatrix)

In [45]:
fc3.gradLatent()
trainEpoch(optimizer, criterion, fc3, randMat())

In [46]:
fc3.gradNetwork()
trainEpoch(optimizer, criterion, fc3, randMat())

In [47]:
fc3.gradAll()
trainEpoch(optimizer, criterion, fc3, randMat())

In [49]:
list(fc3.named_parameters())

[('userLatentVectors',
  Parameter containing:
  tensor([[0.6560, 1.2778],
          [0.0103, 1.1705],
          [0.0426, 0.8050],
          [0.3098, 0.9659],
          [0.4452, 0.7394]], requires_grad=True)),
 ('layers.0.weight',
  Parameter containing:
  tensor([[-0.2738,  0.0099, -0.1069,  0.2414, -0.1543, -0.2926],
          [-0.3537,  0.2577, -0.3879, -0.2134, -0.2290, -0.3225],
          [ 0.1727,  0.7911,  0.7274,  0.3876,  0.5268,  0.8661],
          [-0.3029, -0.2514, -0.2051,  0.0914,  0.0065, -0.4467],
          [-0.3514, -0.2216, -0.1388, -0.4857,  0.3218,  0.0411]],
         requires_grad=True)),
 ('layers.0.bias',
  Parameter containing:
  tensor([-0.2979, -0.3259,  0.4670,  0.1606,  0.3767], requires_grad=True)),
 ('layers.1.weight',
  Parameter containing:
  tensor([[ 0.1442, -0.1540,  0.8235, -0.3451, -0.0610]], requires_grad=True)),
 ('layers.1.bias',
  Parameter containing:
  tensor([0.7270], requires_grad=True))]