In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parameter as P
import torch.optim as optim

import numpy as np

In [2]:
userIds = set()
movieIds = set()
triplets = []

file1 = open('../../ml-100k/u.data', 'r')
for line in file1.readlines():
    uid, mid, rating, timestamp = line.split('	')
    userIds.add(int(uid))
    movieIds.add(int(mid))
    triplets.append([uid,mid,rating])

triplets = np.array(triplets, dtype='int')
numUsers = max(userIds)
numItems = max(movieIds)
X = triplets[:,0:2]
Y = triplets[:,2]

trainX = X[:-10000]
valX = X[-10000:]
trainY = Y[:-10000]
valY = Y[-10000:]

In [None]:
class Dataset(torch.utils.data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, list_IDs, labels):
        'Initialization'
        self.labels = labels
        self.list_IDs = list_IDs

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = torch.load('data/' + ID + '.pt')
        y = self.labels[ID]

        return X, y

In [3]:
def weights_init_uniform(m):
    classname = m.__class__.__name__
    
    # for every Linear layer in a model..
    if classname.find('Linear') != -1:
        weight_range = 4.0 * pow(6, 0.5) / pow(m.in_features + m.out_features, 0.5)
        # apply a paper distribution to the weights and a bias=0
        m.weight.data.uniform_(-1 * weight_range, weight_range)
        m.bias.data.fill_(0)


In [None]:
train_dataloader = DataLoader(
        train_dataset,  # The training samples.
        sampler=RandomSampler(train_dataset),  # Select batches randomly
        batch_size=batch_size  # Trains with this batch size.
    )

In [4]:
class FCMF(nn.Module):
    '''
    Base class for Fully-Connected Matrix Factorization networks
    '''

    def __init__ (self, N, M, D, D_, K, layers):
        '''
        variable definitions taken from paper: https://arxiv.org/pdf/1511.06443.pdf
        
        @param N:  Number of users
        @param M:  Number of items
        @param D:  size of latent-feature vectors
        @param D_: num rows in latent-features matrices
        @param K:  num cols in latent-feature matrices
        
        @param layers: list of hidden layer sizes; does not include input or output
        '''
        
        assert (min(N,M,D,D_,K) > 0), "Params must be nonzero and positive"
        assert (len(layers) > 0),     "Must have nonzero hidden layers"
        
        ########################################################################
        
        super(FCMF, self).__init__()

        
        self.N, self.M, self.D, self.D_, self.K = N, M, D, D_, K

        
        self.userLatentVectors = P.Parameter(torch.rand(N,D, requires_grad=True))
        self.itemLatentVectors = P.Parameter(torch.rand(M,D, requires_grad=True))

        
        self.userLatentMatrices = P.Parameter(torch.rand(N,D_,K, requires_grad=True))
        self.itemLatentMatrices = P.Parameter(torch.rand(M,D_,K, requires_grad=True))

        
        linear_inputs = [2*D + D_] + layers
        linear_outputs = layers + [1]

        self.layers = nn.ModuleList([nn.Linear(i,o) for (i,o) in zip(linear_inputs, linear_outputs)])
        #Initialize weights as specified in paper
        self.apply(weights_init_uniform)
        

        
    def forward(self, x):
        '''
        @param x: let this be a tensor of size (X, 2): (user index, item index)
        
        WARNING: 
            - forward currently does not account for user/items outside of training data
            - mitigations include returning smart averages    
        '''   
        #Get 100 user and item indices
        userIndices, itemIndices = x[:,0].long(), x[:,1].long()
        
        
        #Select the 10 dimensional rows for each user and item        
        userLatMats = self.userLatentMatrices[userIndices]
        itemLatMats = self.itemLatentMatrices[itemIndices]

        #Take the product of these and sum it to get the feature U'n,1 * V'm,1 + ... + U'n,D * V'm,D
        latentDotProducts = torch.sum(userLatMats * itemLatMats, dim=-1)
        
        x = torch.hstack([
            #D
            self.userLatentVectors[userIndices],
            #D
            self.itemLatentVectors[itemIndices],
            #D'
            latentDotProducts
        ])
        

        
        for l in self.layers[:-1]:
            x = torch.sigmoid(l(x))
        
        # TODO: should last layer go through a sigmoid? NO!
        out = self.layers[-1](x)
        return out
    
    def gradAll(self):
        self._setGrads(True, True, True, True, True)
    
    def gradNetwork(self):
        self._setGrads(False, False, False, False, True)

    def gradLatent(self):
        self._setGrads(True, True, True, True, False)

    def _setGrads(self, userVec, itemVec, userMat, itemMat, net):
        self.userLatentVectors.requires_grad = userVec
        self.itemLatentVectors.requires_grad = itemVec
        
        self.userLatentMatrices.requires_grad = userMat
        self.itemLatentMatrices.requires_grad = itemMat
        
        self.layers.requires_grad = net
        
        
        

        

In [17]:
def getBatches(X, Y, usersPerBatch=100):
    '''
    batchSize = min(N - start, usersPerBatch) * M
    '''
    N = X.shape[0]
    
    start = 0
    while start < N:

        if start+usersPerBatch + 1 < N:
            batch_x = torch.tensor(X[start:start+usersPerBatch]).to(device)
            batch_y = torch.tensor(Y[start:start+usersPerBatch]).to(device).float()
            start += usersPerBatch
            yield (batch_x, batch_y)

        else:
            batch_x = torch.tensor(X[start:]).to(device)
            batch_y = torch.tensor(Y[start:]).to(device).float()
            start += usersPerBatch
            yield (batch_x, batch_y)
            
def trainEpoch(opt, criterion, model, X,Y):
    model.train()
    opt.zero_grad()
    totalLoss = 0
    print(X.shape)
    count = 0
    for batch_x, batch_y in getBatches(X,Y):
        count += 1
        optimizer.zero_grad()
        pred_y = model(batch_x)
        #RMSE in order to compare to paper
        loss = pow(criterion(batch_y, pred_y.flatten()), 0.5)
        #print(loss)
        totalLoss += loss
        loss.backward()
        optimizer.step()
        
    return totalLoss/count

def evaluate(criterion, model, X,Y):
    model.eval()
    loss = 0
    accuracy = 0
    count = 0
    for batch_x, batch_y in getBatches(X,Y):
        count += 1
        pred_y = model(batch_x)
        loss += pow(criterion(batch_y, pred_y.squeeze()), 0.5)
        matching = (torch.round(pred_y.detach()).flatten() == batch_y.flatten()).type(torch.uint8).sum()
        matching = matching
        print(matching)
        accuracy += matching
    return loss/count, accuracy/X.shape[0]


In [18]:
# device = 'cuda'
device = 'cpu'

In [None]:
fc3 = FCMF(numUsers+1, numItems+1 ,10,10,5,[5]).to(device)
# Paper uses RMSE as objective and RMSProp optimizer
criterion = nn.MSELoss()
optimizer = optim.RMSprop(fc3.parameters(), lr=0.001)

epochs = 0
max_epochs = 50
while epochs < max_epochs:
    fc3.gradAll()
    fc3.gradLatent()
    loss = trainEpoch(optimizer, criterion, fc3, trainX,trainY)
    fc3.gradNetwork()
    loss += trainEpoch(optimizer, criterion, fc3, trainX,trainY)
    val_loss, val_acc = evaluate(criterion, fc3, valX, valY)
    print("Epoch {} Train Loss: {} Val Loss: {} Val Acc: {}".format(epochs,loss, val_loss, val_acc))
    epochs+=1




(90000, 2)
(90000, 2)
tensor(44)
tensor(36)
tensor(41)
tensor(44)
tensor(35)
tensor(36)
tensor(29)
tensor(44)
tensor(48)
tensor(41)
tensor(32)
tensor(42)
tensor(35)
tensor(33)
tensor(30)
tensor(35)
tensor(33)
tensor(37)
tensor(36)
tensor(36)
tensor(43)
tensor(29)
tensor(33)
tensor(49)
tensor(28)
tensor(35)
tensor(29)
tensor(37)
tensor(38)
tensor(42)
tensor(38)
tensor(39)
tensor(36)
tensor(39)
tensor(24)
tensor(34)
tensor(37)
tensor(40)
tensor(33)
tensor(37)
tensor(37)
tensor(29)
tensor(43)
tensor(35)
tensor(34)
tensor(34)
tensor(35)
tensor(42)
tensor(38)
tensor(39)
tensor(37)
tensor(45)
tensor(45)
tensor(36)
tensor(38)
tensor(49)
tensor(29)
tensor(37)
tensor(43)
tensor(36)
tensor(29)
tensor(30)
tensor(33)
tensor(25)
tensor(38)
tensor(34)
tensor(37)
tensor(35)
tensor(37)
tensor(27)
tensor(37)
tensor(37)
tensor(31)
tensor(37)
tensor(46)
tensor(44)
tensor(38)
tensor(31)
tensor(37)
tensor(35)
tensor(30)
tensor(39)
tensor(33)
tensor(39)
tensor(35)
tensor(32)
tensor(34)
tensor(33)
tensor(34)

(90000, 2)
tensor(38)
tensor(42)
tensor(33)
tensor(47)
tensor(38)
tensor(38)
tensor(44)
tensor(41)
tensor(40)
tensor(43)
tensor(48)
tensor(42)
tensor(49)
tensor(35)
tensor(42)
tensor(35)
tensor(34)
tensor(34)
tensor(35)
tensor(50)
tensor(36)
tensor(35)
tensor(44)
tensor(55)
tensor(40)
tensor(50)
tensor(43)
tensor(33)
tensor(40)
tensor(44)
tensor(39)
tensor(42)
tensor(36)
tensor(39)
tensor(31)
tensor(42)
tensor(40)
tensor(41)
tensor(39)
tensor(42)
tensor(40)
tensor(37)
tensor(42)
tensor(45)
tensor(40)
tensor(37)
tensor(38)
tensor(44)
tensor(42)
tensor(33)
tensor(34)
tensor(45)
tensor(37)
tensor(38)
tensor(41)
tensor(42)
tensor(33)
tensor(50)
tensor(46)
tensor(35)
tensor(35)
tensor(37)
tensor(33)
tensor(36)
tensor(47)
tensor(43)
tensor(44)
tensor(37)
tensor(33)
tensor(38)
tensor(42)
tensor(40)
tensor(28)
tensor(42)
tensor(50)
tensor(45)
tensor(36)
tensor(39)
tensor(42)
tensor(35)
tensor(42)
tensor(42)
tensor(43)
tensor(39)
tensor(46)
tensor(39)
tensor(33)
tensor(36)
tensor(45)
tensor(40)

(90000, 2)
tensor(35)
tensor(43)
tensor(27)
tensor(48)
tensor(41)
tensor(38)
tensor(37)
tensor(41)
tensor(42)
tensor(42)
tensor(42)
tensor(37)
tensor(42)
tensor(38)
tensor(39)
tensor(36)
tensor(40)
tensor(33)
tensor(35)
tensor(49)
tensor(37)
tensor(41)
tensor(44)
tensor(46)
tensor(37)
tensor(41)
tensor(41)
tensor(28)
tensor(43)
tensor(43)
tensor(40)
tensor(42)
tensor(36)
tensor(36)
tensor(32)
tensor(44)
tensor(39)
tensor(42)
tensor(42)
tensor(45)
tensor(35)
tensor(37)
tensor(30)
tensor(43)
tensor(38)
tensor(35)
tensor(36)
tensor(43)
tensor(36)
tensor(37)
tensor(39)
tensor(44)
tensor(36)
tensor(33)
tensor(42)
tensor(33)
tensor(42)
tensor(43)
tensor(49)
tensor(39)
tensor(31)
tensor(41)
tensor(34)
tensor(35)
tensor(44)
tensor(38)
tensor(38)
tensor(33)
tensor(32)
tensor(42)
tensor(41)
tensor(39)
tensor(30)
tensor(44)
tensor(50)
tensor(42)
tensor(35)
tensor(46)
tensor(38)
tensor(37)
tensor(31)
tensor(40)
tensor(43)
tensor(42)
tensor(46)
tensor(39)
tensor(30)
tensor(37)
tensor(43)
tensor(42)

(90000, 2)
tensor(33)
tensor(42)
tensor(29)
tensor(48)
tensor(40)
tensor(38)
tensor(33)
tensor(41)
tensor(38)
tensor(37)
tensor(36)
tensor(35)
tensor(43)
tensor(40)
tensor(31)
tensor(32)
tensor(39)
tensor(35)
tensor(34)
tensor(43)
tensor(34)
tensor(30)
tensor(37)
tensor(38)
tensor(42)
tensor(43)
tensor(40)
tensor(33)
tensor(39)
tensor(41)
tensor(37)
tensor(35)
tensor(34)
tensor(37)
tensor(35)
tensor(39)
tensor(38)
tensor(38)
tensor(40)
tensor(44)
tensor(40)
tensor(33)
tensor(33)
tensor(40)
tensor(39)
tensor(32)
tensor(36)
tensor(36)
tensor(28)
tensor(35)
tensor(29)
tensor(43)
tensor(42)
tensor(33)
tensor(35)
tensor(35)
tensor(38)
tensor(41)
tensor(43)
tensor(35)
tensor(30)
tensor(35)
tensor(30)
tensor(34)
tensor(37)
tensor(38)
tensor(34)
tensor(31)
tensor(32)
tensor(40)
tensor(41)
tensor(37)
tensor(32)
tensor(40)
tensor(50)
tensor(41)
tensor(37)
tensor(43)
tensor(31)
tensor(37)
tensor(36)
tensor(35)
tensor(44)
tensor(38)
tensor(39)
tensor(35)
tensor(31)
tensor(43)
tensor(43)
tensor(39)

(90000, 2)


In [None]:
evaluate(optimizer, criterion, testX, testY)