In [1]:
from tensorflow.keras.layers import Softmax, Multiply
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Reshape

In [2]:
df_ratings = pd.read_csv('data/preprocessed/ratings.csv')
df_personality = pd.read_csv('data/preprocessed/personality.csv')
df = df_ratings.merge(df_personality, on='user_id')

In [6]:
# Find the number of unique users and movies in the dataset
df_ratings = pd.read_csv('data/preprocessed/ratings.csv')
num_users = df_ratings.user_id.unique().shape[0]
num_movies = df_ratings.movie_id.unique().shape[0]

num_personality_dims = 5

In [7]:
# Define the embedding dimensions
embedding_dim = 16

# Define the input layers
user_input = Input(shape=(1,))
movie_input = Input(shape=(1,))
personality_input = Input(shape=(num_personality_dims,))

# Create user and movie embeddings
user_embedding = Embedding(num_users, embedding_dim)(user_input)
user_embedding = Flatten()(user_embedding)

movie_embedding = Embedding(num_movies, embedding_dim)(movie_input)
movie_embedding = Flatten()(movie_embedding)

# Scale each personality score to sum to a unit value (here, 100)
personality_sum = tf.reduce_sum(personality_input, axis=1, keepdims=True)
personality_normalized = personality_input / personality_sum * 100

# Flatten and concatenate user embedding with normalized personality vector
personality_vector = Flatten()(personality_normalized)
concat = Concatenate()([user_embedding, movie_embedding, personality_vector])

# Create a 4-layer MLP
layer1 = Dense(64, activation='relu')(concat)
layer2 = Dense(32, activation='relu')(layer1)
layer3 = Dense(16, activation='relu')(layer2)
output = Dense(1)(layer3)  # Output layer for rating prediction

# Create the model
model = Model(inputs=[user_input, movie_input, personality_input], outputs=output)

In [8]:
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam')

In [10]:
# Train the model for 10 epochs with a batch size of 64
df_train = pd.read_csv('data/preprocessed/ratings-train.csv')

train_user_ids = df_train['user_id'].values
train_movie_ids = df_train['movie_id'].values
train_ratings = df_train['rating'].values
train_personality = df_train[['openness', 'agreeableness', 'neuroticism', 'conscientiousness', 'extraversion']].values
train_personality_input = np.array(train_personality)  # Keep all 5 personality scores

In [11]:
# Train the model
model.fit([train_user_ids, train_movie_ids, train_personality], train_ratings, epochs=10, batch_size=64)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x273085ba0d0>

In [12]:
  model.save('ncf-hard-labeled-personality.h5')