In [1]:
!pip install tensorflow





In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from sklearn.model_selection import train_test_split
import os
import utils
import random




In [3]:
train_path = os.path.join('data', 'ua.base')
valid_path = os.path.join('data', 'ua.test')

# Load data and shuffle
train_data_list = utils.load_csv_data(train_path, '\t')

random.shuffle(train_data_list)
validation_data_list = utils.load_csv_data(valid_path, '\t')
random.shuffle(validation_data_list)

In file data\ua.base, there are 90570 ratings
The ratings have mean: 3.52, median: 4.0, and variance: 1.27
There are 943 unique users and 1680 unique movies
In file data\ua.test, there are 9430 ratings
The ratings have mean: 3.59, median: 4.0, and variance: 1.25
There are 943 unique users and 1129 unique movies


In [4]:
len(train_data_list)

90570

## Predict movies ratings using embedding and neural network

In [5]:
# Convert train_data_list and validation_data_list to arrays for easier manipulation
train_data_array = np.array([(data['in0'][0], data['in1'][0], data['label']) for data in train_data_list])
validation_data_array = np.array([(data['in0'][0], data['in1'][0], data['label']) for data in validation_data_list])

# Determine the number of unique users and items
num_users = len(np.unique(train_data_array[:, 0]))
num_items = len(np.unique(train_data_array[:, 1]))

# Define the embedding dimension
embedding_dim = 50



In [6]:
# Define the input layers for user and item IDs
user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')

# Define embedding layers for users and items
user_embedding = Embedding(input_dim=90570, output_dim=embedding_dim, input_length=1, name='user_embedding')(user_input)

item_embedding = Embedding(input_dim=90570, output_dim=embedding_dim, input_length=1, name='item_embedding')(item_input)

# Flatten the embeddings
user_flat = Flatten()(user_embedding)
item_flat = Flatten()(item_embedding)
print(user_flat)
# Concatenate user and item embeddings
concat = Concatenate()([user_flat, item_flat])

# Define a neural network architecture
dense1 = tf.keras.layers.Dense(64, activation='relu')(concat)
dense2 = tf.keras.layers.Dense(32, activation='relu')(dense1)
output = tf.keras.layers.Dense(1, activation='linear')(dense2)

# Define the model
model = Model(inputs=[user_input, item_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Split the data into training and validation sets
train_data, val_data = train_test_split(train_data_array, test_size=0.1, random_state=42)

# Train the model
history = model.fit([train_data[:, 0], train_data[:, 1]], train_data[:, 2], 
                    batch_size=64, epochs=10, validation_data=([val_data[:, 0], val_data[:, 1]], val_data[:, 2]))

# Evaluate the model
val_loss = model.evaluate([validation_data_array[:, 0], validation_data_array[:, 1]], validation_data_array[:, 2])
print("Validation Loss:", val_loss)



KerasTensor(type_spec=TensorSpec(shape=(None, 50), dtype=tf.float32, name=None), name='flatten/Reshape:0', description="created by layer 'flatten'")

Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation Loss: 0.965259313583374


In [7]:
predictions_ = model.predict([validation_data_array[:, 0], validation_data_array[:, 1]])




In [8]:
predictions= [round(value[0]) for value in predictions_]

In [9]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Calculate Mean Absolute Error (MAE)
mae = mean_absolute_error(validation_data_array[:, 2], predictions)
print("Mean Absolute Error (MAE):", mae)

# Calculate Mean Squared Error (MSE)
mse = mean_squared_error(validation_data_array[:, 2], predictions)
print("Mean Squared Error (MSE):", mse)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error (RMSE):", rmse)


Mean Absolute Error (MAE): 0.730965005302227
Mean Squared Error (MSE): 1.0493107104984094
Root Mean Squared Error (RMSE): 1.0243586825416229


In [10]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [11]:
threshold = 3.5
recommended_items = []
for i in range(len(predictions)):
    if predictions[i] >= threshold:
        recommended_items.append(1)  # Recommended
    else:
        recommended_items.append(0)  # Not recommended

# Evaluate the recommendation performance
true_labels = [1 if rating >= threshold else 0 for rating in validation_data_array[:, 2]]

# Calculate evaluation metrics
accuracy = accuracy_score(true_labels, recommended_items)
precision = precision_score(true_labels, recommended_items)
recall = recall_score(true_labels, recommended_items)
f1 = f1_score(true_labels, recommended_items)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Accuracy: 0.6954400848356309
Precision: 0.7292954264524104
Recall: 0.7551654781495704
F1-score: 0.7420050305425799


## Recommend movies based on classification using embedding and neural network

In [12]:
threshold = 3.5 

# Convert ratings to binary values based on the threshold
train_data_array = np.array([1 if data['label'] >= threshold else 0 for data in train_data_list])

# Extract user and item IDs and include binary ratings
train_data_array = np.array([(data['in0'][0], data['in1'][0], rating) for data, rating in zip(train_data_list, train_data_array)])

# Display the training data
print(train_data_array)
len(train_data_array)

[[296 462   1]
 [297 752   1]
 [653 200   1]
 ...
 [586 161   1]
 [458 526   1]
 [589 304   1]]


90570

In [14]:
validation_data_array = np.array([(data['in0'][0], data['in1'][0], data['label']) for data in validation_data_list])

# Determine the number of unique users and items
num_users = len(np.unique(train_data_array[:, 0]))
num_items = len(np.unique(train_data_array[:, 1]))

# Define the embedding dimension
embedding_dim = 50

In [15]:
from tensorflow.keras.layers import Activation

user_input = Input(shape=(1,), name='user_input')
item_input = Input(shape=(1,), name='item_input')


user_embedding = Embedding(input_dim=90570, output_dim=embedding_dim, input_length=1, name='user_embedding')(user_input)
item_embedding = Embedding(input_dim=90570, output_dim=embedding_dim, input_length=1, name='item_embedding')(item_input)


user_flat = Flatten()(user_embedding)
item_flat = Flatten()(item_embedding)


concat = Concatenate()([user_flat, item_flat])

# Define a neural network architecture
dense1 = tf.keras.layers.Dense(64, activation='relu')(concat)
dense2 = tf.keras.layers.Dense(32, activation='relu')(dense1)

# Output layer with sigmoid activation for binary classification
output = tf.keras.layers.Dense(1, activation='sigmoid')(dense2)

# Define the model
model = Model(inputs=[user_input, item_input], outputs=output)

# Compile the model with binary cross-entropy loss
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Split the data into training and validation sets
train_data, val_data = train_test_split(train_data_array, test_size=0.1, random_state=42)

# Train the model
history = model.fit([train_data[:, 0], train_data[:, 1]], train_data[:, 2], 
                    batch_size=64, epochs=10, validation_data=([val_data[:, 0], val_data[:, 1]], val_data[:, 2]))



Epoch 1/10

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
predictions_ = model.predict([validation_data_array[:, 0], validation_data_array[:, 1]])



In [17]:
prediction_binary=[1 if data >= 0.5 else 0 for data in predictions_]

In [18]:
validation_data_array[:, 2]

array([4., 3., 3., ..., 5., 5., 1.])

In [19]:

prediction_original = np.array([1 if data >= threshold else 0 for data in validation_data_array[:, 2]])

In [20]:
prediction_binary

[1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,


In [21]:
# Evaluate the model
val_loss, val_accuracy = model.evaluate([validation_data_array[:, 0], validation_data_array[:, 1]], prediction_original)
print("Validation Loss:", val_loss)
print("Validation Accuracy:", val_accuracy)

Validation Loss: 0.794411301612854
Validation Accuracy: 0.6741251349449158


In [None]:
accuracy = accuracy_score(prediction_original, prediction_binary)
precision = precision_score(prediction_original, prediction_binary)
recall = recall_score(prediction_original, prediction_binary)
f1 = f1_score(prediction_original, prediction_binary)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)