In [1]:
import time
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF

In [2]:
%%time
dataset = pd.read_csv('http://files.grouplens.org/datasets/movielens/ml-100k/u.data', names=["user_id", "item_id", "rating", "timestamp"], sep="\t")
uq_users = np.sort(dataset.user_id.unique().tolist())
uq_items = np.sort(dataset.item_id.unique().tolist())
n_users = len(uq_users)
n_items = len(uq_items)

CPU times: user 54.2 ms, sys: 24.1 ms, total: 78.3 ms
Wall time: 362 ms


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Embedding, Flatten, Input, Dot, Dense, Dropout, Concatenate
from tensorflow.keras import layers, regularizers, optimizers

dataset['rating'] /= 5
topk = 10
rank_list = [i+1 for i in range(topk)]

In [4]:
class MLP(Model):
  def __init__(self, num_users, num_items, layers=[50, 100, 50, 1], reg_layers=[1e-6, 1e-6, 1e-6, 1e-6]):
    super(MLP, self).__init__()
    self.MLP_Embedding_User = Embedding(
      input_dim=num_users,
      output_dim=layers[0],
      name='user_embedding',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_layers[0])
    )
    self.MLP_Embedding_Item = Embedding(
      input_dim=num_items,
      output_dim=layers[0],
      name='item_embedding',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_layers[0]))
    self.flatten = Flatten()
    self.vector = Concatenate(axis=-1)
    self.dropout = Dropout(0.2)
    self.layer1 = Dense(
      layers[1],
      activation='relu',
      name='layer1',
      kernel_regularizer=regularizers.l2(reg_layers[1]),
    )
    self.layer2 = Dense(
      layers[2],
      activation='relu',
      name='layer2',
      kernel_regularizer=regularizers.l2(reg_layers[2]),
    )
    self.layer3 = Dense(
      layers[3],
      activation='relu',
      name='layer3',
      kernel_regularizer=regularizers.l2(reg_layers[3]),
    )
    self.layer4 = Dense(
      1,
      name='prediction',
      activation='sigmoid',
      kernel_initializer='lecun_uniform'
    )

  @tf.function
  def call(self, inputs):
    # Embedding
    MLP_Embedding_User = self.MLP_Embedding_User(inputs[0])
    MLP_Embedding_Item = self.MLP_Embedding_Item(inputs[1])

    # flatten
    user_latent = self.flatten(MLP_Embedding_User)
    item_latent = self.flatten(MLP_Embedding_Item)

    # concatenation of embedding layers
    x = self.vector([user_latent, item_latent])
    x = self.dropout(x)

    # MLP
    x = self.layer1(x)
    x = self.dropout(x)
    x = self.layer2(x)
    x = self.dropout(x)
    x = self.layer3(x)
    output = self.layer4(x)

    return output

In [5]:
model = MLP(n_users, n_items)
model.compile(optimizer=optimizers.Adam(lr=0.001), loss='mean_squared_error')
history = model.fit([dataset.user_id, dataset.item_id], dataset.rating, epochs=10)

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/10


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
%%time
df_recommend_list = pd.DataFrame(columns=['user_id', 'item_id', 'score', 'rank'])
for user_id in uq_users:
  i_list = list(set(uq_items) - set(dataset[dataset['user_id']==user_id]['item_id'].tolist()))
  u_list = [user_id] * len(i_list)
  df_predict = pd.DataFrame()
  df_predict['user_id'] = u_list
  df_predict['item_id'] = i_list
  prediction = model.predict([df_predict.user_id, df_predict.item_id])
  df_predict['score'] = prediction
  df_recommend = df_predict.sort_values('score', ascending=False)[:topk]
  df_recommend['rank'] = rank_list
  df_recommend_list = df_recommend_list.append(df_recommend, ignore_index=True)

CPU times: user 2min 31s, sys: 14.8 s, total: 2min 46s
Wall time: 2min 44s


In [7]:
df_recommend_list

Unnamed: 0,user_id,item_id,score,rank
0,1,273,0.705599,1
1,1,1330,0.705599,2
2,1,1218,0.705599,3
3,1,1217,0.705599,4
4,1,1216,0.705599,5
...,...,...,...,...
9425,943,1177,0.705599,6
9426,943,1176,0.705599,7
9427,943,1175,0.705599,8
9428,943,1174,0.705599,9
