In [27]:
# Code adapted from https://medium.com/@jdwittenauer/deep-learning-with-keras-recommender-systems-e7b99cb29929
# Relevant libraries: tensorflow, numpy, pandas
%matplotlib inline

import tensorflow as tf
import numpy as np
import pandas as pd

from keras.models import Model, model_from_json, load_model
from keras.layers import Input, Reshape, Dot, Add, Activation, Lambda, Concatenate, Dense
from keras.layers.embeddings import Embedding
from keras.optimizers import adam_v2
from keras.regularizers import l2

In [22]:
# Reads past data
ratings = pd.read_csv('ratings.csv')

In [23]:
# Sets up some variables
k = 1
n_users = ratings['userId'].nunique() + k
n_movies = ratings['movieId'].max()
ratings['rating'] = ratings['rating'].values.astype(np.float32)
min_rating = 1
max_rating = 5
print(ratings)

n_users, n_movies, min_rating, max_rating

        userId  movieId  rating  timestamp
0            1     1193     5.0  978300760
1            1      661     3.0  978302109
2            1      914     3.0  978301968
3            1     3408     4.0  978300275
4            1     2355     5.0  978824291
...        ...      ...     ...        ...
999740    6038     1387     2.0  956707005
999741    6038     2700     1.0  956715051
999742    6038     2716     3.0  956707604
999743    6038     3396     3.0  956706827
999744    6038     1079     5.0  956707547

[999745 rows x 4 columns]


(6039, 3952, 1, 5)

In [24]:
# Sets up data
X = np.array(ratings['userId'].values), np.array(ratings['movieId'].values)
y = ratings['rating'].values
n_factors = 25

In [28]:
# Embedding layer class
class EmbeddingLayer:
    def __init__(self, n_items, n_factors):
        self.n_items = n_items
        self.n_factors = n_factors
    
    def __call__(self, x):
        x = Embedding(self.n_items, self.n_factors, embeddings_initializer='he_normal',
                      embeddings_regularizer=l2(1e-6))(x)
        x = Reshape((self.n_factors,))(x)
        return x

# Graph of Neural Network
# user --> u, u_1d
# movie --> m, m_1d
# sigmoid(u * m + u_1d + m_1d) --> output
def NeuralNetwork(n_users, n_movies, n_factors, min_rating, max_rating):
    user = Input(shape=(1,))
    u = EmbeddingLayer(n_users, n_factors)(user)
    u_1d = EmbeddingLayer(n_users, 1)(user)
    
    movie = Input(shape=(1,))
    m = EmbeddingLayer(n_movies, n_factors)(movie)
    m_1d = EmbeddingLayer(n_movies, 1)(movie)
    
    x = Dot(axes=1)([u, m])
    x = Add()([x, u_1d, m_1d])
    x = Activation('sigmoid')(x)
    x = Lambda(lambda x: x * (max_rating - min_rating) + min_rating)(x)
    model = Model(inputs=[user, movie], outputs=x)
    opt = adam_v2.Adam(learning_rate=0.001)
    model.compile(loss='mae', optimizer=opt)
    return model

In [26]:
# Instantiate model and fit to data
model = NeuralNetwork(n_users+1, n_movies+1, n_factors, min_rating, max_rating)
model.summary()
history = model.fit(x=X, y=y, batch_size = 64,epochs=4,
                    verbose=1)
# model.evaluate(x=X, y=y, batch_size=64)

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_7 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_8 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding_18 (Embedding)       (None, 1, 25)        151000      ['input_7[0][0]']                
                                                                                                  
 embedding_20 (Embedding)       (None, 1, 25)        98825       ['input_8[0][0]']                
                                                                                            

KeyboardInterrupt: 

In [47]:
# Evaluate model (on training set)
model.evaluate(x=X, y=y, batch_size=64)



0.5941396355628967

In [48]:
# Save model to mf.h5
f = 3
tf.keras.models.save_model(model, 'mf' + str(f) + '.h5')

  layer_config = serialize_layer_fn(layer)


In [29]:
# Load in a model
g = 3
upload_model = tf.keras.models.load_model('mf' + str(g) + '.h5', custom_objects={'EmbeddingLayer': EmbeddingLayer})

In [30]:
# Evaluate loaded model (on training set)
upload_model.evaluate(x=X, y=y, batch_size=64)



0.5941396355628967

In [62]:
# Fit test data
# Website STEP 1

# Given data about new user (used in training)
# Do this for website 
new_ratings = pd.read_csv('new_user_ratings.csv')
X_new = np.array(new_ratings['userId'].values), np.array(new_ratings['movieId'].values)
y_new = new_ratings['rating'].values

# Evaluative data about new user
# Not relevant for website
new_ratings_eval = pd.read_csv('new_user_test.csv')
X_eval = np.array(new_ratings_eval['userId'].values), np.array(new_ratings_eval['movieId'].values)
y_eval = new_ratings_eval['rating'].values

# the number of epochs can be adjusted, 10 seems to work well
# IMPORTANT, this fits the existing model with the data about user 6039
upload_model.fit(x=X_new, y=y_new,epochs=10, validation_data = (X_eval, y_eval), verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x151fd1c10>

In [58]:
# Evaluate loaded model on non-training set
# Not relevant for website
new_ratings_eval = pd.read_csv('new_user_test.csv')
X_eval = np.array(new_ratings_eval['userId'].values), np.array(new_ratings_eval['movieId'].values)
y_eval = new_ratings_eval['rating'].values
upload_model.evaluate(x=X_eval, y=y_eval)



0.5715410113334656

In [1]:
# Code for making a specific prediction
# This returns the prediction for user new_user and movie new_movie
# Website STEP 2
model.predict([np.array([new_user]), np.array([new_movie])])

NameError: name 'model' is not defined

In [2]:
# A brief guide:
# The code in the second part of the program (mainly evaluation) is separate from the part of the program that trains the model
# It is not necessary to train the model with the train data every single time (it can be uploaded from a file)
# However, to upload from a file, you obviously need to download it at some point
# To do this: run all of the code parts from the start up to model.fit part and then run the save model code
# The train data consists of users 1-6038, the code tests user 6039
# Here, I have included the partial ratings for 6039 that we could use to make recommendations for user 6039 in new_user_ratings
# The last part of the code evaluates the model on the 6039 test in new_user_test
# In your website, you would want to use the predict function and iterate over all movies and see which ones have the highest predicted ratings
# In have labelled STEPS 1 and STEPS 2 which will hopefully help