In [3]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Embedding, Input, Dense
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization, Dense
import keras_nlp

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import copy
warnings.simplefilter(action='ignore')

1. if movie 2 is watched after movie 1 (doesn't need to be right after), movie 2's rating is N(1, 1). otherwise, movie 2's rating is N(5,1).
2. if movie 4 is watched right after movie 3, movie 4's rating N(1,1). if movie 3 is watched right after movie 4, movie 3's rating is N(1,1).
3. if movie 5 is watched last, its rating is N(5, 1)

In [5]:
# Parameters
N = 10000  # Number of users
M = 5  # Number of movies
sigma = 1  # Standard deviation for ratings

ratings = []  # Ratings for all users
orders = []  # Viewing orders for all users

np.random.seed(42)

for i in range(N):
    order = np.random.permutation(M) + 1 
    orders.append(order)
    
    user_ratings = []
    
    for j, movie in enumerate(order):
        if movie == 2:
            # Movie 2 after Movie 1
            if 1 in order[:j]:
                rating = np.random.normal(1, sigma)
            else:
                rating = np.random.normal(5, sigma)
        
        elif movie == 4:
            # Movie 4 immediately after Movie 3
            if j > 0 and order[j - 1] == 3:
                rating = np.random.normal(1, sigma)
            else:
                rating = np.random.normal(3, sigma)
        
        elif movie == 3:
            # Movie 3 immediately after Movie 4
            if j > 0 and order[j - 1] == 4:
                rating = np.random.normal(1, sigma)
            else:
                rating = np.random.normal(3, sigma)
        
        elif movie == 5:
            # Movie 5 watched last
            if j == M - 1:
                rating = np.random.normal(5, sigma)
            else:
                rating = np.random.normal(3, sigma)
        
        else:
            # Default rating for other cases
            rating = np.random.normal(3, sigma)
        
        user_ratings.append(rating)
    
    ratings.append(user_ratings)

ratings = np.array(ratings)
orders = np.array(orders)

# Example output
print("Example user order:", orders[0])
print("Ratings for the user:", ratings[0])

Example user order: [2 5 3 1 4]
Ratings for the user: [5.47386083 4.36845012 2.08317316 2.87585282 0.98903711]


In [6]:
data = []

# Populate the data list
for user_id in range(N):
    for timestamp, movie_id in enumerate(orders[user_id], start=1):
        data.append({
            "user_id": user_id + 1,
            "movie_id": movie_id,
            "rating": ratings[user_id][timestamp - 1],
            "timestamp": timestamp
        })

data = pd.DataFrame(data)

In [7]:
data

Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,2,5.473861,1
1,1,5,4.368450,2
2,1,3,2.083173,3
3,1,1,2.875853,4
4,1,4,0.989037,5
...,...,...,...,...
49995,10000,4,2.627712,1
49996,10000,1,3.590464,2
49997,10000,2,0.972798,3
49998,10000,3,4.267548,4


In [8]:
data_train = data[data['user_id'] <= 7500].reset_index(drop = True)
data_test = data[data['user_id'] >= 7501].reset_index(drop = True)

In [9]:
def preprocess_data(df):
    
    movie = []  ## (30, 0, 0, 0, 0, 0), (30, 29, 0, 0, 0, 0)
    rating = [] ## (4, 0, 0, 0, 0, 0), (2, 4, 0, 0, 0, 0)
    length = [] ## 1, 2
    pred_idx = [] ## 0, 1
    target = [] ## 2, 1
    
    unique_user_ids = df['user_id'].unique()
    mask_rating_token = df['rating'].max() + 1
    
    for user_id in unique_user_ids:

        temp = df[df['user_id'] == user_id]
        movie_list, rating_list = list(temp['movie_id']), list(temp['rating'])
        target += rating_list
        
        for idx in range(temp.shape[0]):
            movie.append(movie_list[:(idx + 1)] + [0] * (temp.shape[0] - idx - 1))
            rating.append(rating_list[:idx] + [mask_rating_token] + [0] * (temp.shape[0] - idx - 1))
            length.append(idx + 1)
            pred_idx.append(idx)
            
    return movie, rating, length, pred_idx, target

In [10]:
movie_train, rating_train, length_train, pred_idx_train, target_train \
    = preprocess_data(data_train)

movie_test, rating_test, length_test, pred_idx_test, target_test \
    = preprocess_data(data_test)

In [11]:
movie_train, movie_val, rating_train, rating_val, \
length_train, length_val, pred_idx_train, pred_idx_val, \
target_train, target_val = train_test_split(
    movie_train, rating_train, length_train, pred_idx_train, target_train, 
    test_size=0.25, random_state=42
)

In [12]:
max_ctx = max(length_train + length_test + length_val)

In [13]:
movie_train = [movie + [0] * (max_ctx - len(movie)) if len(movie) < max_ctx 
               else movie[:max_ctx] for movie in movie_train]

movie_val = [movie + [0] * (max_ctx - len(movie)) if len(movie) < max_ctx 
               else movie[:max_ctx] for movie in movie_val]

movie_test = [movie + [0] * (max_ctx - len(movie)) if len(movie) < max_ctx 
               else movie[:max_ctx] for movie in movie_test]

rating_train = [rating + [0] * (max_ctx - len(rating)) if len(rating) < max_ctx 
               else rating[:max_ctx] for rating in rating_train]

rating_val = [rating + [0] * (max_ctx - len(rating)) if len(rating) < max_ctx 
               else rating[:max_ctx] for rating in rating_val]

rating_test = [rating + [0] * (max_ctx - len(rating)) if len(rating) < max_ctx 
               else rating[:max_ctx] for rating in rating_test]

In [14]:
movie_train_data = np.array(movie_train)  # Context movies
rating_train_data = np.array(rating_train)  # Context ratings
length_train_data = np.array(length_train)  # Length of the context
pred_idx_train_data = np.array(pred_idx_train)   # Index of masked token
target_train_data = np.array(target_train)  # Target rating

movie_val_data = np.array(movie_val)  # Context movies
rating_val_data = np.array(rating_val)  # Context ratings
length_val_data = np.array(length_val)  # Length of the context
pred_idx_val_data = np.array(pred_idx_val)   # Index of masked token
target_val_data = np.array(target_val)  # Target rating

movie_test_data = np.array(movie_test)  # Context movies
rating_test_data = np.array(rating_test)  # Context ratings
length_test_data = np.array(length_test)  # Length of the context
pred_idx_test_data = np.array(pred_idx_test)   # Index of masked token
target_test_data = np.array(target_test)  # Target rating

In [15]:
embedding_size = 32
movie_input_dim = data['movie_id'].nunique() + 1
rating_input_dim = data['rating'].nunique() + 2

movie_input = layers.Input(shape=(max_ctx,), dtype=tf.int32, name="movie_input")  
rating_input = layers.Input(shape=(max_ctx,), dtype=tf.float32, name="rating_input")  

movie_embedding = layers.Embedding(input_dim=movie_input_dim, output_dim=embedding_size)(movie_input)
rating_embedding = layers.Embedding(input_dim=rating_input_dim, output_dim=embedding_size)(rating_input)
combined_embedding = layers.Concatenate()([movie_embedding, rating_embedding])

position_embedding = keras_nlp.layers.PositionEmbedding(sequence_length=max_ctx)(combined_embedding)
final_embedding = combined_embedding + position_embedding

length_input = layers.Input(shape=(1,), dtype=tf.int32, name="length_input")

class DynamicMaskLayer(layers.Layer):
    def call(self, inputs):
        seq_len = tf.shape(inputs[0])[1]  
        lengths = tf.cast(inputs[1], dtype=tf.int32)  
        seq_range = tf.range(seq_len, dtype=tf.int32)
        seq_range_expanded = tf.expand_dims(seq_range, axis=0)
        mask = tf.cast(seq_range_expanded < lengths, dtype=tf.float32) 
        return mask[:, tf.newaxis, tf.newaxis, :]

attention_mask = DynamicMaskLayer()([final_embedding, length_input])

attention_layer_1 = MultiHeadAttention(num_heads=2, key_dim=16, name="multi_head_attention_1")
attn_output_1 = attention_layer_1(
    query=final_embedding,
    value=final_embedding,
    key=final_embedding,
    attention_mask=attention_mask
)

attn_output_1 = final_embedding + attn_output_1

attention_layer_2 = MultiHeadAttention(num_heads=2, key_dim=16, name="multi_head_attention_2")
attn_output_2 = attention_layer_2(
    query=attn_output_1,
    value=attn_output_1,
    key=attn_output_1,
    attention_mask=attention_mask
)

context_embedding = attn_output_1 + attn_output_2

pred_idx_input = layers.Input(shape=(1,), dtype=tf.int32, name="pred_idx_input")

class GatherLayer(layers.Layer):
    def call(self, inputs):
        context_embedding, pred_idx_input = inputs
        return tf.gather(context_embedding, indices=tf.squeeze(pred_idx_input, axis=-1), batch_dims=1)

extracted_embeddings = GatherLayer()([context_embedding, pred_idx_input])

hidden_layer = layers.Dense(32, activation='relu')(extracted_embeddings)
output_layer_exp = layers.Dense(1, activation='linear')(hidden_layer)

model = Model(inputs=[movie_input, rating_input, length_input, pred_idx_input], 
              outputs=output_layer_exp)

model.compile(optimizer=Adam(learning_rate = 1e-4), loss='mean_squared_error')

early_stopping = EarlyStopping(
    monitor='val_loss',    
    patience=10,  
    restore_best_weights=True 
)

# Fit the model with validation data and early stopping
history = model.fit(
    [movie_train_data, rating_train_data, length_train_data, pred_idx_train_data], 
    target_train_data,        
    validation_data=([movie_val_data, rating_val_data, length_val_data, pred_idx_val_data], target_val_data),  
    epochs=1000,                 
    batch_size=256,            
    callbacks=[early_stopping] 
)

Epoch 1/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - loss: 9.5333 - val_loss: 2.6430
Epoch 2/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2.3686 - val_loss: 2.2069
Epoch 3/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 2.1736 - val_loss: 2.1124
Epoch 4/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 2.1090 - val_loss: 2.0377
Epoch 5/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 2.0846 - val_loss: 1.9762
Epoch 6/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.9897 - val_loss: 1.9170
Epoch 7/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.9502 - val_loss: 1.8684
Epoch 8/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 1.9157 - val_loss: 1.8271
Epoch 9/1000
[1

[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0237 - val_loss: 1.0001
Epoch 68/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0162 - val_loss: 0.9996
Epoch 69/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0188 - val_loss: 1.0035
Epoch 70/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0229 - val_loss: 1.0016
Epoch 71/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 1.0234 - val_loss: 0.9982
Epoch 72/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - loss: 1.0394 - val_loss: 0.9999
Epoch 73/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0400 - val_loss: 0.9983
Epoch 74/1000
[1m110/110[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1.0037 - val_loss: 0.9980
Epoch 75/1000
[1m110/

In [16]:
model.summary()

In [17]:
test_loss = model.evaluate([movie_test_data, rating_test_data, length_test_data, pred_idx_test_data], target_test_data)
print(f"Test Loss: {test_loss}")

[1m391/391[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.0394
Test Loss: 1.032595157623291
