In [1]:
# Code adapted from https://medium.com/@jdwittenauer/deep-learning-with-keras-recommender-systems-e7b99cb29929
%matplotlib inline

import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from keras.models import Model, model_from_json, load_model, Sequential
from keras.layers import Input, Reshape, Dot, Add, Activation, Lambda, Concatenate, Dense, Multiply, Conv1D, Flatten
from keras.layers.embeddings import Embedding
from keras.optimizers import adam_v2
from keras.regularizers import l2

In [2]:
# Reads Data
ratings = pd.read_csv('ratings.csv')
n_users = ratings['userId'].nunique()
n_movies = ratings['movieId'].max()
ratings['rating'] = ratings['rating'].values.astype(np.float32)
min_rating = 1.0
max_rating = 5.0

n_users, n_movies, min_rating, max_rating

(6038, 3952, 1.0, 5.0)

In [3]:
# Splits data into training and testing datasets
X = ratings[['userId', 'movieId']].values
y = ratings['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((199949, 2), (799796, 2), (199949,), (799796,))

In [4]:
# n_factors = size of item and movie embedding matrices
n_factors = 25
X_train_array = [X_train[:, 0], X_train[:, 1]]
X_test_array = [X_test[:, 0], X_test[:, 1]]

In [5]:
# Embedding layer class
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

def MF(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    u_1d = EmbeddingLayer(n_users, 1)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    m_1d = EmbeddingLayer(n_movies, 1)(movie)
    
    x = Dot(axes=1)([u, m])
    x = Add()([x, u_1d, m_1d])
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    model = Model(inputs=[user, movie], outputs=x)
    opt = adam_v2.Adam(learning_rate=0.001)
    model.compile(loss='mae', optimizer=opt)
    return model

In [7]:
# Instantiate model and fit to data
model = MF(n_users+1, n_movies+1, n_factors, min_rating, max_rating)
model.summary()
history = model.fit(x=X_train_array, y=y_train,epochs=5, batch_size=64,
                    verbose=1, validation_data=(X_test_array, y_test))

2022-02-13 18:49:03.301515: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_2 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding (Embedding)          (None, 1, 25)        150975      ['input_1[0][0]']                
                                                                                                  
 embedding_2 (Embedding)        (None, 1, 25)        98825       ['input_2[0][0]']                
                                                                                              

In [8]:
# Evaluate model
model.evaluate(x=X_test_array, y=y_test, batch_size=64)



0.7455586194992065