In [7]:
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate

import pandas as pd

In [8]:
# Find the number of unique users and movies in the dataset
df_ratings = pd.read_csv('data/preprocessed/ratings.csv')
number_of_unique_users = df_ratings.user_id.unique().shape[0]
number_of_unique_movies = df_ratings.movie_id.unique().shape[0]
print('Unique users: ', number_of_unique_users)
print('Unique movies: ', number_of_unique_movies)

Unique users:  1820
Unique movies:  35196


In [9]:
# Define the neural net architecture
# The authors specify that they use a 4-layer MLP and a 16-dimensional user and item embedding
# There are no other specifics about the architecture, so we will assume a simple architecture to begin with

# Define the embedding dimension
embedding_dim = 16

# Define the input layers
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))

# Create user and movie embeddings
user_embedding = Embedding(number_of_unique_users, embedding_dim)(user_input)
user_embedding = Flatten()(user_embedding)

movie_embedding = Embedding(number_of_unique_movies, embedding_dim)(movie_input)
movie_embedding = Flatten()(movie_embedding)

# Concatenate user and movie embeddings
concat = Concatenate()([user_embedding, movie_embedding])

# Create a 4-layer MLP
layer1 = Dense(64, activation='relu')(concat)
layer2 = Dense(32, activation='relu')(layer1)
layer3 = Dense(16, activation='relu')(layer2)
output = Dense(1)(layer3)  # Output layer for rating prediction

# Create the model
model = Model(inputs=[user_input, movie_input], outputs=output)

In [10]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

In [11]:
# Load the train dataset
df_train = pd.read_csv('data/preprocessed/ratings-train.csv')

In [14]:
# Train the model for 10 epochs with a batch size of 64
train_user_ids = df_train['user_id'].values
train_movie_ids = df_train['movie_id'].values
train_ratings = df_train['rating'].values

model.fit([train_user_ids, train_movie_ids], train_ratings, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e038166f90>

In [15]:
model.save('saved-models/ncf.h5')