#User-Movie Mapping Sparse Matrix Dataset
**No need to execute if dataset_matrix.npz already present**

In [None]:
import numpy as np
from scipy import sparse
def save_sparse_matrix(filename, x):
    x_coo = x.tocoo()
    row = x_coo.row
    col = x_coo.col
    data = x_coo.data
    shape = x_coo.shape
    np.savez(filename, row=row, col=col, data=data, shape=shape)

def load_sparse_matrix(filename):
    y = np.load(filename)
    z = sparse.coo_matrix((y['data'], (y['row'], y['col'])), shape=y['shape'])
    return z

In [None]:
import pandas as pd
ratings_df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Data/rating_updated_clean.csv')

In [None]:
movies_df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/Data/movies_clean.csv')
movies_df.iloc[0].movieId

1

In [None]:
from scipy.sparse import lil_matrix
from tqdm import tqdm
user_movie = lil_matrix((ratings_df['userId'].unique().shape[0]+1, movies_df.shape[0]))
for i in tqdm(range(0,movies_df.shape[0])):
  movies = ratings_df[ratings_df.movieId == movies_df.iloc[i]['movieId']]
  userIdList = movies['userId'].values
  for j in userIdList:
        user_movie[j,i] = 1

In [None]:
save_sparse_matrix('tmp/dataset_matrix',user_movie)

In [None]:
z = load_sparse_matrix('/content/drive/My Drive/Colab Notebooks/Data/dataset_matrix.npz').tolil()
z[1]

<1x62000 sparse matrix of type '<class 'numpy.float64'>'
	with 70 stored elements in List of Lists format>



---


#<h2>Model Code</h2>

---
Download Required:

[PreTrained Weights](https://drive.google.com/drive/folders/1-6nMVziieH2K4SqKcJQeiHIwmzV8oRr0?usp=sharing)

[User-Movie Sparse Matrix dataset.npz](https://drive.google.com/file/d/1onaqEkTF-Fo7iHTztcJUrep1l5Ht5rz6/view)

[Embed Matrix](https://drive.google.com/file/d/1YF4BGBIklBRso-7rAmYkccT9REVTNXbK/view)

[Movies Dataset for movie title and index](https://drive.google.com/file/d/1-BvShIGsXyWzvQ_ssXqp9E5wnbnxSXA7/view)

Instruction to Execute:
1. Download necessary dataset and matrices. Update below cell and fix path according to downloaded data path. 
2. Execute First Cell to load required datasest, matrices and define VAE model architecture and loss function
3. Load PreSaved Weights from checkpoint
4. Generate Predictions 

### Mount


In [1]:
cd

/root


In [2]:
from google.colab import drive
drive.flush_and_unmount()

Drive not mounted, so nothing to flush and unmount.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### <h1>Complete HVAE CODE </h1>

In [4]:
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn.utils import shuffle
# Load the TensorBoard notebook extension.
%load_ext tensorboard

'''
  Load Movies Dataset (62000k)
  Load [User -> Movie Map] : [162k,62K] Sparse Matrix 
  Load [Embed Movie Feature Vector] : [62k, 3] Embedding generated from MVAE
'''
movies_df = pd.read_csv('/content/drive/My Drive/Colab Notebooks/Data/movies_clean.csv')

embed_movie_feature = np.load('/content/drive/My Drive/Colab Notebooks/Data/embed_movie.npy')

import numpy as np
from scipy import sparse
def save_sparse_matrix(filename, x):
    x_coo = x.tocoo()
    row = x_coo.row
    col = x_coo.col
    data = x_coo.data
    shape = x_coo.shape
    np.savez(filename, row=row, col=col, data=data, shape=shape)

def load_sparse_matrix(filename):
    y = np.load(filename)
    z = sparse.coo_matrix((y['data'], (y['row'], y['col'])), shape=y['shape'], dtype='int8')   
    z = shuffle(z)
    return z




'''
  BATCH GENERATOR FOR FIT
'''
def nn_batch_generator(x,  batch_size, samples_per_epoch):
    movie_indices = np.array([range(0,embed_movie_feature.shape[0])])
    movie_indices = np.repeat(movie_indices, batch_size, axis = 0)
    number_of_batches = samples_per_epoch/batch_size
    counter=0
    shuffle_index = np.arange(np.shape(x)[0])
    np.random.shuffle(shuffle_index)
    x =  x[shuffle_index, :]
    # y =  y[shuffle_index, :]
    while 1:
        index_batch = shuffle_index[batch_size*counter:batch_size*(counter+1)]
        x_batch = np.array(x[index_batch,:].todense()).astype('float32')
        # x_new_batch = x_batch
        
        counter += 1
        yield (x_batch, x_batch)
        if (counter >= number_of_batches):
            counter=0

'''
  Generate Predictions
'''
def generate_predictions(vae, x_train,movies_df,k=20):
  INPUT_DIM = x_train.shape[1]

  user_ID_rand = int(np.random.randint(x_train.shape[0], size=1))

  test = np.array(x_train[user_ID_rand].todense()).reshape((1,INPUT_DIM))
  # _,_,z=vae.encoder(test)
  test_reconstructed = vae.predict(test)
  # test_reconstructed = test_reconstructed.numpy()
  top_rated_movies_idx = [i for i, x in enumerate(test[0].tolist()) if x == 1.0]
  np.random.shuffle(top_rated_movies_idx)
  
  print(f'User liked {len(top_rated_movies_idx)} movies')

  if len(top_rated_movies_idx) == 0:
    print('Emptylist')
  else:
    # print(top_rated_movies_idx)
    sorted_ratings = test_reconstructed[0].tolist()

    top_predicted_movies_idx = sorted(range(len(sorted_ratings)), key=lambda i: sorted_ratings[i])[-k:]
    # print(top_predicted_movies_idx)

  print('Liked')
  count=0
  for i in top_rated_movies_idx:
    print(movies_df.iloc[i]['movieId'], end= ' ')
    count += 1
    if count >20:
      break
  print() 
  print('Predicted')
  for i in top_predicted_movies_idx:
    print(movies_df.iloc[i]['movieId'], end= ' ')
  print() 

  count=0
  for i in top_rated_movies_idx:

    print(movies_df[movies_df.movieId == movies_df.iloc[i]['movieId']]['title'].values, end= '->')
    print(movies_df[movies_df.movieId ==  movies_df.iloc[i]['movieId']]['genres'].values)
    count += 1
    if count >20:
      break
    # print(movies_df[movies_df.movieId ==  movie_dataset_df.columns[i]].head())
    # print(i)
    # print(movie_dataset_df.columns[i])
  print('*'*100)
  for i in top_predicted_movies_idx:
    print(movies_df[movies_df.movieId == movies_df.iloc[i]['movieId']]['title'].values, end= '->')
    print(movies_df[movies_df.movieId ==  movies_df.iloc[i]['movieId']]['genres'].values)


def recallatk(x_test, x_test_reconstructed, k):
    recall_values = []
    total_recall = 0.0
    for i in tqdm(range(len(x_test))):
        top_rated_movies_idx = [i for i, x in enumerate(x_test[i].tolist()) if x == 1.0]
        if len(top_rated_movies_idx) == 0:
            continue

        sorted_ratings = x_test_reconstructed[i].tolist()
        top_predicted_movies_idx = sorted(range(len(sorted_ratings)), key=lambda i: sorted_ratings[i])[-k:]
        
        sum = 0.0
        for i in range(0, k):
            if top_predicted_movies_idx[i] in top_rated_movies_idx:
                sum+=1.0
        recall = sum/float(min(k, len(top_rated_movies_idx)))
        total_recall += recall
        recall_values.append(recall)
    return total_recall/float(len(recall_values))

def ndcgatk(x_test, x_test_reconstructed, k):
    ndcg_values = []
    total_ndcg = 0.0
    best  = 0.0
    for i in tqdm(range(len(x_test))):
        top_rated_movies_idx = [i for i, x in enumerate(x_test[i].tolist()) if x == 1.0]
        
        if len(top_rated_movies_idx) == 0:
            continue
        sorted_ratings = x_test_reconstructed[i].tolist()
        top_predicted_movies_idx = sorted(range(len(sorted_ratings)), key=lambda i: sorted_ratings[i])[-k:]
        sum_ndcg = 0
        for i in range(0, k):
            if top_predicted_movies_idx[i] in top_rated_movies_idx:
                ndcg = 1/(np.math.log(i+2))
            else:
                ndcg = 0
            sum_ndcg += ndcg

        total_ndcg += sum_ndcg
        ndcg_values.append(sum_ndcg)

    ndcg_values = np.array(ndcg_values)
    max_ndcg = ndcg_values.max()
    ndcg_values = ndcg_values / max_ndcg 
    total_ndcg = np.sum(ndcg_values)

    return total_ndcg/float(len(ndcg_values))


import bottleneck as bn
import numpy as np

def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    Normalized Discounted Cumulative Gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)

    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]

    tp = 1. / np.log2(np.arange(2, k + 2))

    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    IDCG = np.array([(tp[:min(n, k)]).sum()
                     for n in heldout_batch.getnnz(axis=1)])
    return DCG / IDCG


def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()
    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(
        np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall




In [5]:
ui_matrix = load_sparse_matrix('/content/drive/My Drive/Colab Notebooks/Data/dataset_matrix.npz')
# x_train_coo = x_train
# Convert to CSR format from stored COO format remove initial empty 
# x_train = x_train.tocsr()
# x_train = x_train[1:]



train_size = ui_matrix.shape[0]-20000
train_dataset = ui_matrix[0:train_size]
validation_dataset = ui_matrix[train_size: train_size+10000]
test_dataset = ui_matrix[train_size+10000:]
test_dataset_sparse = test_dataset
y_train = test_dataset

In [6]:
print(train_size)
print(ui_matrix.shape)


print(test_dataset.shape)

142542
(162542, 62000)
(10000, 62000)


## MULTVAE

In [202]:
import tensorflow as tf
layers = tf.keras.layers


class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        # epsilon = tf.keras.backend.random_normal(shape=(batch, dim))

        return z_mean + tf.exp(0.5 * z_log_var) 


def model_encoder(input_dim, latent_dim, dims, vocab_size=1000, embed_dim=3, seq_length=1000, weights=[], **kwargs):
  encoder_inputs = keras.Input(shape=(input_dim,), dtype="int32")
  # embed = layers.Embedding(vocab_size,embed_dim ,weights=weights, input_length=seq_length, trainable=True)(encoder_inputs)
  # flat_embed = layers.Flatten()(embed)

  x=layers.Dense(dims[0], activation='tanh',
                  name="encoder_{}".format(dims[0]),
                  kernel_initializer=tf.initializers.GlorotUniform(),
                  bias_initializer=tf.keras.initializers.truncated_normal(stddev=0.001),
                  # kernel_regularizer='l2'
                  ) (encoder_inputs)

  for d in dims[1:]:
    x = layers.Dense(
                  d,
                  activation='tanh',
                  name="encoder_{}".format(d),
                  kernel_initializer=tf.initializers.GlorotUniform(),
                  bias_initializer=tf.keras.initializers.truncated_normal(stddev=0.001),
                  # kernel_regularizer='l2'
                  )(x)
  z_mean = layers.Dense(latent_dim, name="z_mean")(x)
  z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
  z = Sampling()([z_mean, z_log_var])
  encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
  # encoder.summary()
  return encoder

def model_decoder(input_dim,latent_dim,dims):
  latent_inputs = keras.Input(shape=(latent_dim,))
  x=layers.Dense(dims[0], activation='tanh',
                  name="decoder_{}".format(dims[0]),
                  kernel_initializer=tf.initializers.GlorotUniform(),
                  bias_initializer=tf.keras.initializers.truncated_normal(stddev=0.001),
                  ) (latent_inputs)
  for d in dims[1:]:
    x = layers.Dense(
                  d,
                  activation='tanh',
                  name="decoder_{}".format(d),
                  kernel_initializer=tf.initializers.GlorotUniform(),
                  bias_initializer=tf.keras.initializers.truncated_normal(stddev=0.001),
                  )(x)

  decoder_outputs=layers.Dense(input_dim,
                  activation='relu',
                  name="decoder_output_{}".format(input_dim),
                  kernel_initializer=tf.initializers.GlorotUniform(),
                  bias_initializer=tf.keras.initializers.truncated_normal(stddev=0.001),
                ) (x)
  decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
  # decoder.summary()
  return decoder




class MULTVAE(keras.Model):
    def __init__(self, encoder, decoder,lam=3e-2,
                 total_anneal_steps=200000,
                 anneal_cap=0.2, **kwargs):
        super(MULTVAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.lam = lam
        self.total_anneal_steps = total_anneal_steps
        self.anneal_cap = anneal_cap
        self.update_count=0
     
    def call(self, inputs, training=False):
        # if isinstance(inputs,tf.sparse.SparseTensor):
        #     inp = tf.sparse.to_dense(inputs)
        # else:
        #     inp = inputs
        inp = inputs
        z_mean, z_log_var, z = self.encoder(inp)
        # if training:
        #   logits=self.decoder(z)
        # else:
        #   logits=self.decoder(z_mean)
        logits = self.decoder(z)
        # Add KL divergence regularization loss.
        kl_loss = tf.reduce_mean(-0.5 * tf.reduce_sum(
            z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1
        ,axis=1))


      
        if self.total_anneal_steps>0:
          anneal= min(self.anneal_cap, 1.* (self.update_count/self.total_anneal_steps))
        else:
          anneal = self.anneal_cap

        if training:
          self.update_count += 1

        # softmax=tf.nn.log_softmax(logits)
        # per-user average negative log-likelihood part of loss
        # ll_loss = -tf.reduce_sum(tf.gather_nd(softmax, inputs.indices)) / batch_size
        loss = 62000* tf.reduce_mean(tf.keras.losses.binary_crossentropy(inp, logits))
        # loss = -tf.reduce_mean(tf.reduce_sum(tf.nn.log_softmax(logits)*inputs ,axis=1))
        
        # loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(tf.cast(inp,"float32"), logits), axis=1))

        # regularization part of loss
        reg_loss = 2 * tf.reduce_sum(self.losses)

        # anneal * kl_loss
        loss =  loss +  kl_loss
        # +  self.lam * reg_loss 
        self.add_loss(loss)        
        return logits

In [204]:
y_train = train_dataset
print(y_train.shape)

(142542, 62000)


In [205]:
VOCAB_SIZE = y_train.shape[1]
EMBED_DIM = 3
input_dim=y_train.shape[1]
latent_dim = 32
encoder_dims=[64]
encoder = model_encoder(input_dim,
                        latent_dim,
                        encoder_dims,
                        vocab_size=VOCAB_SIZE,
                embed_dim=EMBED_DIM,
                weights=[embed_movie_feature],
                seq_length=VOCAB_SIZE)
encoder_dims.reverse()
decoder = model_decoder(input_dim, latent_dim, encoder_dims)

# training_steps = len(range(0, y_train.shape[0], BATCH_SIZE))
BATCH_SIZE=128
TRAIN_SAMPLES_PER_EPOCH = y_train.shape[0]
TRAIN_STEPS_PER_EPOCH = np.math.ceil(TRAIN_SAMPLES_PER_EPOCH/BATCH_SIZE)

EPOCHS=5
ANNEAL_CAP = 0.2
TOTAL_ANNEAL_STEPS = (
        TRAIN_STEPS_PER_EPOCH * (EPOCHS - int(EPOCHS * 0.2))
    ) / ANNEAL_CAP

VAL_SAMPLES_PER_EPOCH = validation_dataset.shape[0]
VAL_BATCH_SIZE = BATCH_SIZE
VAL_STEPS_PER_EPOCH = np.math.ceil(VAL_SAMPLES_PER_EPOCH/VAL_BATCH_SIZE)


vae = MULTVAE(encoder, decoder,total_anneal_steps=TOTAL_ANNEAL_STEPS,
                 anneal_cap=ANNEAL_CAP)
# out = vae(np.random.random((10,10)).astype('float32'))
# vae.summary()
# out.shape
vae.compile(optimizer='adam')
# dataset = np.random.random((10000,1000)).astype('float32')


In [206]:
vae.encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_32 (InputLayer)           [(None, 62000)]      0                                            
__________________________________________________________________________________________________
encoder_64 (Dense)              (None, 64)           3968064     input_32[0][0]                   
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 32)           2080        encoder_64[0][0]                 
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 32)           2080        encoder_64[0][0]                 
____________________________________________________________________________________________

In [207]:
# vae.decoder.save('/content/drive/My Drive/Colab Notebooks/Data/tmp/model/decoder.h5')

In [208]:
vae.decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_33 (InputLayer)        [(None, 32)]              0         
_________________________________________________________________
decoder_64 (Dense)           (None, 64)                2112      
_________________________________________________________________
decoder_output_62000 (Dense) (None, 62000)             4030000   
Total params: 4,032,112
Trainable params: 4,032,112
Non-trainable params: 0
_________________________________________________________________


### Train

In [209]:
checkpoint_path = '/content/drive/My Drive/Colab Notebooks/Data/tmp/training/cp-latest-MULTVAE-TRAIN-WITH_SOFTMAX.h5'

cp_callback = keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                              save_weights_only=True, 
                                              verbose=1, save_freq='epoch',
            monitor='loss',
            mode='auto',
            save_best_only=True)


In [210]:






vae.fit(nn_batch_generator(y_train, BATCH_SIZE, TRAIN_SAMPLES_PER_EPOCH) ,
        steps_per_epoch=TRAIN_STEPS_PER_EPOCH, 
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_data = nn_batch_generator(validation_dataset, VAL_BATCH_SIZE, VAL_SAMPLES_PER_EPOCH),
        validation_steps=VAL_STEPS_PER_EPOCH,
        # callbacks=[cp_callback]
        )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fadb532a080>

In [120]:
# vae.save_weights('/content/drive/My Drive/Colab Notebooks/Data/tmp/training/cp-latest-MULTVAE.h5')

In [211]:
# vae.save('/content/drive/My Drive/Colab Notebooks/Data/tmp/model/MultVae')
vae.save('/content/tmp/model/New')

INFO:tensorflow:Assets written to: /content/tmp/model/New/assets


### LOAD WEIGHTS

In [None]:
import tensorflow as tf

new_vae = tf.keras.models.load_model('/content/drive/My Drive/Colab Notebooks/Data/tmp/model/MultVae')

In [None]:
new_vae.predict(np.array(x_train[0].todense()).reshape((1,x_train.shape[1])).astype('float32'))

In [None]:







# vae.load_weights('/content/drive/My Drive/Colab Notebooks/Data/tmp/training/cp-latest-MULTVAE.h5')
vae.load_weights('/content/drive/My Drive/Colab Notebooks/Data/tmp/training/cp-latest-MULTVAE-TRAIN-2WITHSOFTTMAX.h5')

### Generate Predictions

In [114]:

'''
  Generate Predictions
'''
def generate_predictions(vae, x_train,movies_df,k=20):
  INPUT_DIM = x_train.shape[1]

  user_ID_rand = int(np.random.randint(x_train.shape[0], size=1))


  test = np.array(x_train[user_ID_rand].todense()).reshape((1,INPUT_DIM))
  # _,_,z=vae.encoder(test)
  test_reconstructed = vae.predict(test)
  # test_reconstructed = test_reconstructed.numpy()
  top_rated_movies_idx = x_train[user_ID_rand].indices
  shuffle(top_rated_movies_idx)
  
  print(f'User liked {len(top_rated_movies_idx)} movies')

  if len(top_rated_movies_idx) == 0:
    print('Emptylist')
  else:
    # print(top_rated_movies_idx)
    sorted_ratings = test_reconstructed[0].tolist()

    top_predicted_movies_idx = sorted(range(len(sorted_ratings)), key=lambda i: sorted_ratings[i])[-k:]
    # print(top_predicted_movies_idx)

  print('Liked')
  count=0
  for i in top_rated_movies_idx:
    print(movies_df.iloc[i]['movieId'], end= ' ')
    count += 1
    if count >20:
      break
  print() 
  print('Predicted')
  for i in top_predicted_movies_idx:
    print(movies_df.iloc[i]['movieId'], end= ' ')
  print() 

  count=0
  for i in top_rated_movies_idx:

    print(movies_df[movies_df.movieId == movies_df.iloc[i]['movieId']]['title'].values, end= '->')
    print(movies_df[movies_df.movieId ==  movies_df.iloc[i]['movieId']]['genres'].values)
    count += 1
    if count >20:
      break
    # print(movies_df[movies_df.movieId ==  movie_dataset_df.columns[i]].head())
    # print(i)
    # print(movie_dataset_df.columns[i])
  print('*'*100)
  for i in top_predicted_movies_idx:
    if i in top_rated_movies_idx:
      print("****IN USER PREFERENCE****", end="->")
      print(movies_df[movies_df.movieId == movies_df.iloc[i]['movieId']]['title'].values, end= '->')
      print(movies_df[movies_df.movieId ==  movies_df.iloc[i]['movieId']]['genres'].values)

    else:
      print(movies_df[movies_df.movieId == movies_df.iloc[i]['movieId']]['title'].values, end= '->')
      print(movies_df[movies_df.movieId ==  movies_df.iloc[i]['movieId']]['genres'].values)


In [212]:
test = np.array(y_train[1].todense()).reshape((1,62000))
pred = vae.predict(test)

print(test)
print(pred)

[[0 0 0 ... 0 0 0]]
[[0.4455937  0.15677905 0.         ... 0.         0.         0.        ]]


In [213]:

max(pred[0])

1.0266626

In [216]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 54 movies
Liked
10 19 21 34 39 47 50 78 110 111 150 153 161 165 185 208 225 231 253 288 292 
Predicted
110 329 292 10 356 434 454 165 380 161 590 153 349 457 592 
['GoldenEye (1995)']->['Action|Adventure|Thriller']
['Ace Ventura: When Nature Calls (1995)']->['Comedy']
['Get Shorty (1995)']->['Comedy|Crime|Thriller']
['Babe (1995)']->['Children|Drama']
['Clueless (1995)']->['Comedy|Romance']
['Seven (a.k.a. Se7en) (1995)']->['Mystery|Thriller']
['Usual Suspects, The (1995)']->['Crime|Mystery|Thriller']
['Crossing Guard, The (1995)']->['Action|Crime|Drama|Thriller']
['Braveheart (1995)']->['Action|Drama|War']
['Taxi Driver (1976)']->['Crime|Drama|Thriller']
['Apollo 13 (1995)']->['Adventure|Drama|IMAX']
['Batman Forever (1995)']->['Action|Adventure|Comedy|Crime']
['Crimson Tide (1995)']->['Drama|Thriller|War']
['Die Hard: With a Vengeance (1995)']->['Action|Crime|Thriller']
['Net, The (1995)']->['Action|Crime|Thriller']
['Waterworld (1995)']->['Action|Adventure|Sci-Fi']
['Disc

In [217]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 22 movies
Liked
1 318 356 480 1866 1892 1902 2335 2553 2675 2743 2939 3896 4002 4344 4615 4697 4753 4863 4940 4950 
Predicted
356 480 2939 1720 3558 1902 527 318 593 4970 5926 3773 3445 2553 3012 
['Toy Story (1995)']->['Adventure|Animation|Children|Comedy|Fantasy']
['Shawshank Redemption, The (1994)']->['Crime|Drama']
['Forrest Gump (1994)']->['Comedy|Drama|Romance|War']
['Jurassic Park (1993)']->['Action|Adventure|Sci-Fi|Thriller']
['Big Hit, The (1998)']->['Action|Comedy|Crime']
['Perfect Murder, A (1998)']->['Thriller']
['Dream for an Insomniac (1996)']->['Drama|Romance']
['Waterboy, The (1998)']->['Comedy']
['Village of the Damned (1960)']->['Horror|Sci-Fi|Thriller']
['Twice Upon a Yesterday (a.k.a. Man with Rain in His Shoes, The) (1998)']->['Comedy|Drama|Romance']
['Native Son (1986)']->['Drama']
['Niagara (1953)']->['Drama|Thriller']
['Way of the Gun, The (2000)']->['Crime|Thriller']
['Planes, Trains & Automobiles (1987)']->['Comedy']
['Swordfish (2001)']->['Action|C

In [218]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 287 movies
Liked
2 10 17 21 32 34 47 48 50 70 111 163 196 235 247 253 260 273 293 296 313 
Predicted
1251 296 2697 110 1263 1031 1232 2838 1202 260 589 2013 593 2743 1207 
['Jumanji (1995)']->['Adventure|Children|Fantasy']
['GoldenEye (1995)']->['Action|Adventure|Thriller']
['Sense and Sensibility (1995)']->['Drama|Romance']
['Get Shorty (1995)']->['Comedy|Crime|Thriller']
['Twelve Monkeys (a.k.a. 12 Monkeys) (1995)']->['Mystery|Sci-Fi|Thriller']
['Babe (1995)']->['Children|Drama']
['Seven (a.k.a. Se7en) (1995)']->['Mystery|Thriller']
['Pocahontas (1995)']->['Animation|Children|Drama|Musical|Romance']
['Usual Suspects, The (1995)']->['Crime|Mystery|Thriller']
['From Dusk Till Dawn (1996)']->['Action|Comedy|Horror|Thriller']
['Taxi Driver (1976)']->['Crime|Drama|Thriller']
['Desperado (1995)']->['Action|Romance|Western']
['Species (1995)']->['Horror|Sci-Fi']
['Ed Wood (1994)']->['Comedy|Drama']
['Heavenly Creatures (1994)']->['Crime|Drama']
['Interview with the Vampire: The V

In [219]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 25 movies
Liked
1670 58246 59014 59832 62439 68572 72731 78776 101680 101880 102417 102878 104280 106094 108885 109529 110568 111852 111924 112279 116345 
Predicted
116413 114875 133735 98294 58246 106234 112279 81537 161054 109529 89371 78776 108885 83613 122567 
['Welcome to Sarajevo (1997)']->['Drama|War']
['Grace Is Gone (2007)']->['Drama']
['Superhero Movie (2008)']->['Action|Comedy|Sci-Fi']
['Where the Sidewalk Ends (1950)']->['Crime|Drama|Film-Noir']
["My Best Friend's Girl (2008)"]->['Comedy|Romance']
['Kids in the Hall: Same Guys, New Dresses (2001)']->['Comedy|Documentary']
['Lovely Bones, The (2009)']->['Crime|Drama|Fantasy|Horror|Thriller']
['Hannah Free (2009)']->['Drama']
['Copper Mountain (1983)']->['Comedy|Musical']
['Siberian Education (Educazione siberiana) (2013)']->['Drama']
['Sicily! (Sicilia!) (1999)']->['Drama']
['Global Affair, A (1964)']->['Comedy']
['Black Room, The (1935)']->['Crime|Horror|Thriller']
['Godzilla: Tokyo S.O.S. (Gojira tai Mosura tai 

In [220]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 100 movies
Liked
39 318 364 500 527 597 1024 1189 1670 1892 2044 2066 2129 2554 2652 3862 4425 4863 4873 5592 5595 
Predicted
69542 134437 62439 97457 98294 68572 58246 83613 135127 161054 89371 122567 109529 81537 78776 
['Clueless (1995)']->['Comedy|Romance']
['Shawshank Redemption, The (1994)']->['Crime|Drama']
['Lion King, The (1994)']->['Adventure|Animation|Children|Drama|Musical|IMAX']
['Mrs. Doubtfire (1993)']->['Comedy|Drama']
["Schindler's List (1993)"]->['Drama|War']
['Pretty Woman (1990)']->['Comedy|Romance']
['Three Caballeros, The (1945)']->['Animation|Children|Musical']
['Thin Blue Line, The (1988)']->['Documentary']
['Welcome to Sarajevo (1997)']->['Drama|War']
['Perfect Murder, A (1998)']->['Thriller']
['Devil and Max Devlin, The (1981)']->['Comedy|Fantasy']
['Out of the Past (1947)']->['Film-Noir']
['Saltmen of Tibet, The (Salzmänner von Tibet, Die) (1997)']->['Documentary']
['Children of the Damned (1963)']->['Horror|Sci-Fi|Thriller']
['Curse of Frankenstei

In [221]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 96 movies
Liked
1 19 50 110 150 161 296 344 356 377 380 480 592 593 607 670 733 776 1252 1568 1574 
Predicted
6513 1189 1263 2743 480 3012 2939 356 260 7126 5926 318 4970 593 2553 
['Toy Story (1995)']->['Adventure|Animation|Children|Comedy|Fantasy']
['Ace Ventura: When Nature Calls (1995)']->['Comedy']
['Usual Suspects, The (1995)']->['Crime|Mystery|Thriller']
['Braveheart (1995)']->['Action|Drama|War']
['Apollo 13 (1995)']->['Adventure|Drama|IMAX']
['Crimson Tide (1995)']->['Drama|Thriller|War']
['Pulp Fiction (1994)']->['Comedy|Crime|Drama|Thriller']
['Ace Ventura: Pet Detective (1994)']->['Comedy']
['Forrest Gump (1994)']->['Comedy|Drama|Romance|War']
['Speed (1994)']->['Action|Romance|Thriller']
['True Lies (1994)']->['Action|Adventure|Comedy|Romance|Thriller']
['Jurassic Park (1993)']->['Action|Adventure|Sci-Fi|Thriller']
['Batman (1989)']->['Action|Crime|Thriller']
['Silence of the Lambs, The (1991)']->['Crime|Horror|Thriller']
['Century (1993)']->['Drama']
['World of

In [222]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 520 movies
Liked
1 5 10 19 39 47 48 95 110 145 153 161 185 204 208 231 260 316 318 349 350 
Predicted
80290 4863 4284 45335 5435 1202 1568 6513 116413 45003 3979 53582 999 1284 69945 
['Toy Story (1995)']->['Adventure|Animation|Children|Comedy|Fantasy']
['Father of the Bride Part II (1995)']->['Comedy']
['GoldenEye (1995)']->['Action|Adventure|Thriller']
['Ace Ventura: When Nature Calls (1995)']->['Comedy']
['Clueless (1995)']->['Comedy|Romance']
['Seven (a.k.a. Se7en) (1995)']->['Mystery|Thriller']
['Pocahontas (1995)']->['Animation|Children|Drama|Musical|Romance']
['Broken Arrow (1996)']->['Action|Adventure|Thriller']
['Braveheart (1995)']->['Action|Drama|War']
['Bad Boys (1995)']->['Action|Comedy|Crime|Drama|Thriller']
['Batman Forever (1995)']->['Action|Adventure|Comedy|Crime']
['Crimson Tide (1995)']->['Drama|Thriller|War']
['Net, The (1995)']->['Action|Crime|Thriller']
['Under Siege 2: Dark Territory (1995)']->['Action']
['Waterworld (1995)']->['Action|Adventure|Sci-Fi

In [223]:
generate_predictions(vae,y_train,  movies_df, 15)

User liked 344 movies
Liked
29 70 110 213 260 266 293 296 356 509 518 539 588 589 594 595 596 615 713 779 853 
Predicted
27664 3483 4950 4970 853 527 919 1168 1272 776 747 3287 260 541 1705 
['City of Lost Children, The (Cité des enfants perdus, La) (1995)']->['Adventure|Drama|Fantasy|Mystery|Sci-Fi']
['From Dusk Till Dawn (1996)']->['Action|Comedy|Horror|Thriller']
['Braveheart (1995)']->['Action|Drama|War']
['Burnt by the Sun (Utomlyonnye solntsem) (1994)']->['Drama']
['Star Wars: Episode IV - A New Hope (1977)']->['Action|Adventure|Sci-Fi']
['Legends of the Fall (1994)']->['Drama|Romance|War|Western']
['Léon: The Professional (a.k.a. The Professional) (Léon) (1994)']->['Action|Crime|Drama|Thriller']
['Pulp Fiction (1994)']->['Comedy|Crime|Drama|Thriller']
['Forrest Gump (1994)']->['Comedy|Drama|Romance|War']
['Piano, The (1993)']->['Drama|Romance']
['Road to Wellville, The (1994)']->['Comedy']
['Sleepless in Seattle (1993)']->['Comedy|Drama|Romance']
['Aladdin (1992)']->['Adventure|

### Metrics 

In [224]:
from tqdm import tqdm
size = 1000
ndcg= []
recall=[]
for i in tqdm(range(10)):
  x_test_matrix_sparse = test_dataset_sparse[i*size:(i+1)*size]
  x_test_matrix = np.array(x_test_matrix_sparse.todense() ) 
  x_test_reconstructed = vae.predict(x_test_matrix)  # float values per user

  ndcg.append(np.mean(NDCG_binary_at_k_batch(x_test_reconstructed, x_test_matrix_sparse, 100)))
  recall.append(np.mean(Recall_at_k_batch(x_test_reconstructed, x_test_matrix_sparse,20)))
 



100%|██████████| 10/10 [00:18<00:00,  1.86s/it]


In [225]:
import numpy as np
print("RECALL: "+str(np.mean(recall)))
print("NDCG: "+str(np.mean(ndcg)))

RECALL: 0.5094955263157895
NDCG: 0.49558543182374015


In [226]:
batch_size=128
x_test_matrix_sparse = test_dataset_sparse[4000:5000]
x_test_matrix = np.array(x_test_matrix_sparse.todense() ) 


x_test_reconstructed = vae.predict(x_test_matrix)  # float values per user
print(x_test_matrix.shape)
print(x_test_reconstructed.shape)

(1000, 62000)
(1000, 62000)


In [227]:
ndcgatk(x_test_matrix, x_test_reconstructed, 100)

100%|██████████| 1000/1000 [00:18<00:00, 54.63it/s]


0.33885060703488

In [228]:
recallatk(x_test_matrix, x_test_reconstructed,20)

100%|██████████| 1000/1000 [00:18<00:00, 53.88it/s]


0.5298000000000004

In [229]:
ndcg = NDCG_binary_at_k_batch(x_test_reconstructed, x_test_matrix_sparse, 100)

In [230]:
recall = Recall_at_k_batch(x_test_reconstructed, x_test_matrix_sparse,20)

In [231]:
print(np.mean(ndcg))
print(np.mean(recall))


0.5104991262707413
0.5297999999999999


## TFJS CONVERSSION

In [54]:
pip install tensorflowjs

Collecting tensorflowjs
[?25l  Downloading https://files.pythonhosted.org/packages/67/4e/f9a147cbf1694b76ac5d2689bbaecf4f70a98712908474ea733d2f545f61/tensorflowjs-3.1.0-py3-none-any.whl (63kB)
[K     |████████████████████████████████| 71kB 3.4MB/s 
Collecting tensorflow-hub<0.10,>=0.7.0
[?25l  Downloading https://files.pythonhosted.org/packages/ac/83/a7df82744a794107641dad1decaad017d82e25f0e1f761ac9204829eef96/tensorflow_hub-0.9.0-py2.py3-none-any.whl (103kB)
[K     |████████████████████████████████| 112kB 7.5MB/s 
Installing collected packages: tensorflow-hub, tensorflowjs
  Found existing installation: tensorflow-hub 0.11.0
    Uninstalling tensorflow-hub-0.11.0:
      Successfully uninstalled tensorflow-hub-0.11.0
Successfully installed tensorflow-hub-0.9.0 tensorflowjs-3.1.0


In [55]:
import tensorflowjs as tfjs

In [232]:
# tfjs.converters.convert_tf_saved_model('/content/drive/My Drive/Colab Notebooks/Data/tmp/model/MultVae', '/content/drive/My Drive/Colab Notebooks/Data/tmp/model/MultVae1/js')
tfjs.converters.convert_tf_saved_model('/content/tmp/model/New', '/content/drive/MyDrive/Colab Notebooks/Data/tmp/model/VAEModel/js')

Writing weight file /content/drive/MyDrive/Colab Notebooks/Data/tmp/model/VAEModel/js/model.json...
