We do the same thing we did in the base ncf model but with the addition of most salient personality into the neural model. 
</br>
According to the paper, 'In this model, we introduce a 4-dimensional personality vector for each of the five types of personalities, which are learned during training. We treat the most salient personality as the user’s personality label and concatenate the corresponding personality vector with the user’s latent vector.'

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate

In [2]:
df_ratings = pd.read_csv('data/preprocessed/ratings.csv')
df_personality = pd.read_csv('data/preprocessed/personality.csv')
df = df_ratings.merge(df_personality, on='user_id')

In [20]:
# Find the number of unique users and movies in the dataset
df_ratings = pd.read_csv('data/preprocessed/ratings.csv')
number_of_unique_users = df_ratings.user_id.unique().shape[0]
number_of_unique_movies = df_ratings.movie_id.unique().shape[0]

number_of_personality_dims = 5

In [25]:
# Define the embedding dimensions
embedding_dim = 16

# Define the input layers
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))
personality_input = Input(shape=(1,))

# Create user and movie embeddings
user_embedding = Embedding(number_of_unique_users, embedding_dim)(user_input)
user_embedding = Flatten()(user_embedding)

movie_embedding = Embedding(number_of_unique_movies, embedding_dim)(movie_input)
movie_embedding = Flatten()(movie_embedding)

# Select the most salient personality trait
max_personality_trait = tf.argmax(personality_input, axis=1)

# Create personality embedding
personality_embedding = Embedding(number_of_personality_dims, embedding_dim)(max_personality_trait)
personality_embedding = Flatten()(personality_embedding)

# Concatenate user, movie, and personality embeddings
concat = Concatenate()([user_embedding, movie_embedding, personality_embedding])

# Create a 4-layer MLP
layer1 = Dense(64, activation='relu')(concat)
layer2 = Dense(32, activation='relu')(layer1)
layer3 = Dense(16, activation='relu')(layer2)
output = Dense(1)(layer3)  # Output layer for rating prediction

# Create the model
model = Model(inputs=[user_input, movie_input, personality_input], outputs=output)

In [26]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

In [29]:
# Train the model for 10 epochs with a batch size of 64
df_train = pd.read_csv('data/preprocessed/ratings-train.csv')

train_user_ids = df_train['user_id'].values
train_movie_ids = df_train['movie_id'].values
train_ratings = df_train['rating'].values
train_personality = df_train[['openness', 'agreeableness', 'neuroticism', 'conscientiousness', 'extraversion']].values
train_personality_input = np.argmax(train_personality, axis=1)  # Select the most salient personality trait

In [30]:
# Train the model
model.fit([train_user_ids, train_movie_ids, train_personality_input], train_ratings, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x20acca04590>

In [31]:
model.save('saved-models/ncf-most-salient-personality.h5')