# recommendation system

<div dir=rtl style="text-align: left">
هدف: 
</div>


In [19]:
# =====================
# import libraries
# =====================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder  
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from keras import layers, models
import tensorflow as tf
from tensorflow import keras
# from tensorflow.keras import layers


In [20]:
# =====================
# load dataset
# =====================

df = pd.read_csv('u.data', sep='\t', names=['user_id', 'movie_id', 'rating', 'timestamp'])

In [21]:
# =====================
# print info
# =====================

print('first rows')
print(df.head())

print('shape')
print(df.shape)

print('columns')
df.dtypes

first rows
   user_id  movie_id  rating  timestamp
0      196       242       3  881250949
1      186       302       3  891717742
2       22       377       1  878887116
3      244        51       2  880606923
4      166       346       1  886397596
shape
(100000, 4)
columns


user_id      int64
movie_id     int64
rating       int64
timestamp    int64
dtype: object

In [22]:
# =====================
# preprocessing
# =====================

# Encoding user_id and movie_id to consecutive numbers (for Embedding)
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

df['user'] = user_encoder.fit_transform(df['user_id'])
df['item'] = item_encoder.fit_transform(df['movie_id'])

num_users = df['user'].nunique()
num_items = df['item'].nunique()

# Show the first few rows of the modified dataset
print(df.head())

   user_id  movie_id  rating  timestamp  user  item
0      196       242       3  881250949   195   241
1      186       302       3  891717742   185   301
2       22       377       1  878887116    21   376
3      244        51       2  880606923   243    50
4      166       346       1  886397596   165   345


In [23]:
df[['user_id', 'user']].drop_duplicates().sort_values('user_id').head(15)


Unnamed: 0,user_id,user
202,1,0
700,2,1
1257,3,2
1250,4,3
172,5,4
9,6,5
39,7,6
90,8,7
1892,9,8
40,10,9


In [24]:
print(df.head())

   user_id  movie_id  rating  timestamp  user  item
0      196       242       3  881250949   195   241
1      186       302       3  891717742   185   301
2       22       377       1  878887116    21   376
3      244        51       2  880606923   243    50
4      166       346       1  886397596   165   345


In [25]:
# =====================
# Train-Test Split
# =====================

# 3. Splitting into train and test sets
train, test = train_test_split(df[['user', 'item', 'rating']], test_size=0.2, random_state=42)

# Print the size of the train and test datasets
print(f'Train size: {len(train)}, Test size: {len(test)}')


Train size: 80000, Test size: 20000


In [26]:
# =====================
# Generate Network
# =====================


# Input layers
user_input = keras.Input(shape=(1,), name='user_input')
item_input = keras.Input(shape=(1,), name='item_input')

# Embedding layers
embedding_dim = 32  # Size of the embedding vectors

user_embedding = layers.Embedding(input_dim=num_users, output_dim=embedding_dim, name='user_embedding')(user_input)
item_embedding = layers.Embedding(input_dim=num_items, output_dim=embedding_dim, name='item_embedding')(item_input)

# Flatten the embeddings
user_vec = layers.Flatten()(user_embedding)
item_vec = layers.Flatten()(item_embedding)

# Concatenate user and item embeddings
concat = layers.Concatenate()([user_vec, item_vec])

# MLP layers (Multi-Layer Perceptron)
x = layers.Dense(64, activation='relu')(concat)
x = layers.Dense(32, activation='relu')(x)
x = layers.Dense(16, activation='relu')(x)

# Output layer
output = layers.Dense(1, activation='linear', name='prediction')(x)

# Define the model
model = keras.Model(inputs=[user_input, item_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Show model summary
model.summary()


In [27]:
# =====================
# Train Model
# =====================

# Prepare the data for training
train_user = train['user'].values
train_item = train['item'].values
train_rating = train['rating'].values

# Train the model
history = model.fit(
    [train_user, train_item], 
    train_rating, 
    epochs=10,  # You can adjust the number of epochs
    batch_size=64,  # You can adjust the batch size
    validation_split=0.1,  # 10% of data for validation
    verbose=1
)

# Print the training history
print("Training completed!")


Epoch 1/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 2.9915 - mae: 1.2620 - val_loss: 0.9273 - val_mae: 0.7718
Epoch 2/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 920us/step - loss: 0.8768 - mae: 0.7424 - val_loss: 0.8985 - val_mae: 0.7556
Epoch 3/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 891us/step - loss: 0.8518 - mae: 0.7285 - val_loss: 0.8975 - val_mae: 0.7579
Epoch 4/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 900us/step - loss: 0.8061 - mae: 0.7060 - val_loss: 0.8870 - val_mae: 0.7395
Epoch 5/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 904us/step - loss: 0.7725 - mae: 0.6901 - val_loss: 0.8713 - val_mae: 0.7369
Epoch 6/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 948us/step - loss: 0.7294 - mae: 0.6692 - val_loss: 0.8697 - val_mae: 0.7337
Epoch 7/10
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0

In [28]:
# 13. Prepare the data for testing
test_user = test['user'].values
test_item = test['item'].values
test_rating = test['rating'].values

# 14. Evaluate the model on test data
test_loss, test_mae = model.evaluate(
    [test_user, test_item], 
    test_rating, 
    verbose=1
)

print(f'Test Loss (MSE): {test_loss}')
print(f'Test MAE: {test_mae}')


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 645us/step - loss: 0.9210 - mae: 0.7516
Test Loss (MSE): 0.9417839050292969
Test MAE: 0.7581946849822998
