In [1]:
import tensorflow as tf
import array
import gzip
import random
from tensorflow.keras import Model
from collections import defaultdict
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parameter as P
import torch.optim as optim
import random
import numpy as np
import os
tf.enable_eager_execution()



In [2]:
userIds = {}
movieIds = {}
triplets = []

# Parameters
params = {'batch_size': 64,
          'shuffle': True,
         'drop_last': True}

file1 = open('../../ml-100k/u.data', 'r')
for line in file1.readlines():
    uid, mid, rating, timestamp = line.split('	')
    uid, mid = int(uid), int(mid)
    if not uid in userIds: userIds[uid] = len(userIds)
    if not mid in movieIds: movieIds[mid] = len(movieIds)
    triplets.append([uid,mid,rating])
    
random.shuffle(triplets)

triplets = np.array(triplets, dtype='int32')

train_val_split = int(len(triplets)*0.8)
train_triplets = triplets[: train_val_split]
val_triplets = triplets[train_val_split:]


numUsers = max(userIds)
numItems = max(movieIds)

In [3]:

train_dataset = tf.data.Dataset.from_tensor_slices((train_triplets[:,0], train_triplets[:,1], train_triplets[:,2]))
val_dataset = tf.data.Dataset.from_tensor_slices((val_triplets[:,0], val_triplets[:,1], val_triplets[:,2]))

In [4]:
#print(len(train_dataset))

In [5]:
# mean rating, just for initialization
mu = sum([r for _,_,r in train_triplets]) / len(train_triplets)

In [6]:
# Gradient descent optimizer, experiment with learning rate
optimizer = tf.keras.optimizers.RMSprop(0.001)

In [7]:
class LatentFactorModel(tf.keras.Model):
    def __init__(self, mu, K, lamb):
        super(LatentFactorModel, self).__init__()
        # Initialize to average
        self.alpha = tf.Variable(mu)
        self.alpha = tf.dtypes.cast(self.alpha, tf.float32)
        # Initialize to small random values
        self.betaU = tf.Variable(tf.random.normal([len(userIds)],stddev=0.001))
        self.betaI = tf.Variable(tf.random.normal([len(movieIds)],stddev=0.001))
        self.gammaU = tf.Variable(tf.random.normal([len(userIds),K],stddev=0.001))
        self.gammaI = tf.Variable(tf.random.normal([len(movieIds),K],stddev=0.001))
        self.lamb = lamb

    # Prediction for a single instance (useful for evaluation)
    def predict(self, u, i):
        p = self.alpha + self.betaU[u] + self.betaI[i] +\
            tf.tensordot(self.gammaU[u], self.gammaI[i], 1)
        return p

    # Regularizer
    def reg(self):
        return self.lamb * tf.reduce_sum(self.betaU**2) +\
                           tf.reduce_sum(self.betaI**2) +\
                           tf.reduce_sum(self.gammaU**2) +\
                           tf.reduce_sum(self.gammaI**2)
    
    # Prediction for a sample of instances
    def predictSample(self, sampleU, sampleI):
        #u = tf.convert_to_tensor(sampleU, dtype=tf.int32)
        #i = tf.convert_to_tensor(sampleI, dtype=tf.int32)
        u = sampleU
        i = sampleI
        beta_u = tf.nn.embedding_lookup(self.betaU, u)
        beta_i = tf.nn.embedding_lookup(self.betaI, i)
        gamma_u = tf.nn.embedding_lookup(self.gammaU, u)
        gamma_i = tf.nn.embedding_lookup(self.gammaI, i)
        pred = self.alpha + beta_u + beta_i +\
               tf.reduce_sum(tf.multiply(gamma_u, gamma_i), 1)
        return pred
    
    # Loss
    def call(self, sampleU, sampleI, sampleR):
        pred = self.predictSample(sampleU, sampleI)
        #r = tf.convert_to_tensor(sampleR, dtype=tf.float32)
        r = sampleR
        loss = pow(tf.keras.losses.mean_squared_error(r, pred), 0.5)
        return loss
    

In [8]:
# Experiment with number of factors and regularization rate

model = LatentFactorModel(mu, 10, 0)


In [9]:
def trainingStep(interactions):
    Nsamples = 500
    with tf.GradientTape() as tape:
        sampleU, sampleI, sampleR = [], [], []
        for _ in range(Nsamples):
            u,i,r = random.choice(interactions)
            sampleU.append(userIds[u])
            sampleI.append(movieIds[i])
            sampleR.append(r)

        loss = model(sampleU,sampleI,sampleR)
        loss += model.reg()
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients((grad, var) for
                              (grad, var) in zip(gradients, model.trainable_variables)
                              if grad is not None)
    return loss.numpy()

In [10]:
def trainingStep(interactions):
    totalLoss = 0
    count = 0
    for x in train_dataset.batch(64, True):
        with tf.GradientTape() as tape:
            loss = model(x[0], x[1],x[2])
            totalLoss += loss
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients((grad, var) for
                                  (grad, var) in zip(gradients, model.trainable_variables)
                                  if grad is not None)
        count += 1
    

    return totalLoss/count

In [11]:
def valStep(interactions):
    totalLoss = 0
    count = 0
    accuracy = 0
    for x in val_dataset.batch(64, True):
        loss = model(x[0], x[1],x[2])
        totalLoss += loss
        count += 1
        preds = tf.math.round(model.predictSample(x[0], x[1]))
        preds = tf.cast(preds, tf.int32)
        matching = tf.math.equal(preds, x[2])
        matching = tf.reduce_sum(tf.cast(matching, tf.int32))
        accuracy += matching.numpy()
        
    return totalLoss/count, accuracy/len(val_triplets)

In [12]:
checkpoint_path = "BiasedMF/best_model.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
min_val_loss = float('inf')


# 10 iterations of gradient descent
for i in range(50):
    #print(valStep(val_triplets))
    train_loss = trainingStep(train_triplets)
    val_loss, val_acc = valStep(val_triplets)
    train_loss, val_loss, val_acc = train_loss.numpy(), val_loss.numpy(), val_acc
    
    
    print("Epoch " + str(i) + ", Train Loss = " + str(train_loss) + ", Val Loss = " + str(val_loss) + ", Val Acc = " + str(val_acc))
    if val_loss < min_val_loss:
        # Save the weights using the `checkpoint_path` format
        model.save_weights(checkpoint_path)
        print(f"New min val loss: {val_loss}. Saving model weights")
        min_val_loss = val_loss

Epoch 0, Train Loss = 1.0850519, Val Loss = 1.0441837, Val Acc = 0.3902
New min val loss: 1.0441837310791016. Saving model weights
Epoch 1, Train Loss = 1.0141151, Val Loss = 0.9890019, Val Acc = 0.40225
New min val loss: 0.9890019297599792. Saving model weights
Epoch 2, Train Loss = 0.96937627, Val Loss = 0.96233904, Val Acc = 0.4121
New min val loss: 0.9623390436172485. Saving model weights
Epoch 3, Train Loss = 0.94414777, Val Loss = 0.94791657, Val Acc = 0.42025
New min val loss: 0.947916567325592. Saving model weights
Epoch 4, Train Loss = 0.92731094, Val Loss = 0.939288, Val Acc = 0.4251
New min val loss: 0.9392880201339722. Saving model weights
Epoch 5, Train Loss = 0.9149323, Val Loss = 0.933774, Val Acc = 0.42865
New min val loss: 0.9337739944458008. Saving model weights
Epoch 6, Train Loss = 0.9051913, Val Loss = 0.93002933, Val Acc = 0.43095
New min val loss: 0.9300293326377869. Saving model weights
Epoch 7, Train Loss = 0.89705265, Val Loss = 0.9273134, Val Acc = 0.4324
New

KeyboardInterrupt: 