In [1]:
import time
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.decomposition import NMF

In [2]:
%%time
dataset = pd.read_csv('http://files.grouplens.org/datasets/movielens/ml-100k/u.data', names=["user_id", "item_id", "rating", "timestamp"], sep="\t")
uq_users = np.sort(dataset.user_id.unique().tolist())
uq_items = np.sort(dataset.item_id.unique().tolist())
n_users = len(uq_users)
n_items = len(uq_items)

CPU times: user 30.1 ms, sys: 31.2 ms, total: 61.3 ms
Wall time: 1.08 s


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Embedding, Flatten, Input, Dot, Dense, Dropout, Concatenate
from tensorflow.keras import layers, regularizers, optimizers

dataset['rating'] /= 5
topk = 10
rank_list = [i+1 for i in range(topk)]

In [4]:
class NeuMF(Model):
  def __init__(self, num_users, num_items, mf_dim=50, layers=[50, 100, 50, 1], reg_layers=[1e-6, 1e-6, 1e-6, 1e-6], reg_mf=[1e-6, 1e-6]):
    super(NeuMF, self).__init__()
    self.MF_Embedding_User = Embedding(
      input_dim=num_users,
      output_dim=mf_dim,
      name='mf_embedding_user',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_mf[0]),
    )
    self.MF_Embedding_Item = Embedding(
      input_dim=num_items,
      output_dim=mf_dim,
      name='mf_embedding_item',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_mf[1]),
    )
    self.MLP_Embedding_User = Embedding(
      input_dim=num_users,
      output_dim=int(layers[0] / 2),
      name='mlp_embedding_user',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_layers[0]),
    )
    self.MLP_Embedding_Item = Embedding(
      input_dim=num_items,
      output_dim=int(layers[0] / 2),
      name='mlp_embedding_item',
      embeddings_initializer='random_uniform',
      embeddings_regularizer=regularizers.l2(reg_layers[0]),
    )
    self.flatten = Flatten()
    self.mf_vector = Dot(axes=1)
    self.mlp_vector = Concatenate(axis=-1)
    self.dropout = Dropout(0.2)
    self.layer1 = Dense(
      layers[1],
      name='layer1',
      activation='relu',
      kernel_regularizer=regularizers.l2(reg_layers[1]),
    )
    self.layer2 = Dense(
      layers[2],
      name='layer2',
      activation='relu',
      kernel_regularizer=regularizers.l2(reg_layers[2]),
    )
    self.layer3 = Dense(
      layers[3],
      name='layer3',
      activation='relu',
      kernel_regularizer=regularizers.l2(reg_layers[3]),
    )
    self.predict_vector = Concatenate(axis=-1)
    self.layer4 = Dense(
      1,
      activation='sigmoid',
      kernel_initializer='lecun_uniform',
      name='prediction'
    )

  @tf.function
  def call(self, inputs):
    # Embedding
    MF_Embedding_User = self.MF_Embedding_User(inputs[0])
    MF_Embedding_Item = self.MF_Embedding_Item(inputs[1])
    MLP_Embedding_User = self.MLP_Embedding_User(inputs[0])
    MLP_Embedding_Item = self.MLP_Embedding_Item(inputs[1])

    # MF(GMF)
    mf_user_latent = self.flatten(MF_Embedding_User)
    mf_item_latent = self.flatten(MF_Embedding_Item)
    mf_vector = self.mf_vector([mf_user_latent, mf_item_latent])

    # MLP
    mlp_user_latent = self.flatten(MLP_Embedding_User)
    mlp_item_latent = self.flatten(MLP_Embedding_Item)
    mlp_vector = self.mlp_vector([mlp_user_latent, mlp_item_latent])
    mlp_vector = self.dropout(mlp_vector)
    mlp_vector = self.layer1(mlp_vector)
    mlp_vector = self.dropout(mlp_vector)
    mlp_vector = self.layer2(mlp_vector)
    mlp_vector = self.dropout(mlp_vector)
    mlp_vector = self.layer3(mlp_vector)

    # NeuMF
    vector = self.predict_vector([mf_vector, mlp_vector])
    output = self.layer4(vector)

    return output

In [5]:
%%time
model = NeuMF(n_users, n_items)
model.compile(optimizer=optimizers.Adam(lr=0.001), loss='mean_squared_error')
history = model.fit([dataset.user_id, dataset.item_id], dataset.rating, epochs=10)

  super(Adam, self).__init__(name, **kwargs)


Epoch 1/10


  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)
  "shape. This may consume a large amount of memory." % value)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 2min 14s, sys: 14.7 s, total: 2min 28s
Wall time: 3min 27s


In [6]:
%%time
df_recommend_list = pd.DataFrame(columns=['user_id', 'item_id', 'score', 'rank'])
for user_id in uq_users:
  i_list = list(set(uq_items) - set(dataset[dataset['user_id']==user_id]['item_id'].tolist()))
  u_list = [user_id] * len(i_list)
  df_predict = pd.DataFrame()
  df_predict['user_id'] = u_list
  df_predict['item_id'] = i_list
  prediction = model.predict([df_predict.user_id, df_predict.item_id])
  df_predict['score'] = prediction
  df_recommend = df_predict.sort_values('score', ascending=False)[:topk]
  df_recommend['rank'] = rank_list
  df_recommend_list = df_recommend_list.append(df_recommend, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


CPU times: user 2min 23s, sys: 15 s, total: 2min 38s
Wall time: 2min 37s


In [7]:
df_recommend_list

Unnamed: 0,user_id,item_id,score,rank
0,1,285,0.993349,1
1,1,313,0.990448,2
2,1,430,0.990061,3
3,1,462,0.984830,4
4,1,408,0.984652,5
...,...,...,...,...
9425,943,1177,0.632554,6
9426,943,1176,0.632554,7
9427,943,1175,0.632554,8
9428,943,1174,0.632554,9
