In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import  MultiLabelBinarizer,LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, Model ,Sequential, metrics
import pickle

2025-11-14 15:27:42.181871: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-14 15:27:47.648472: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-14 15:27:57.119265: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
movies = pd.read_csv('/home/vihan-tandon/Desktop/Movie_Recommender/movie_dataset/movies.csv')
ratings = pd.read_csv('/home/vihan-tandon/Desktop/Movie_Recommender/movie_dataset/ratings.csv',usecols = ['userId','movieId','rating']) 
print(f"Movie shape{movies.shape}")
print(f"Ratings shape{ratings.shape}")
# print(movies.head())
# print(ratings.head())

Movie shape(9742, 3)
Ratings shape(100836, 3)


In [3]:
movies['genres'] = movies['genres'].str.split('|')
mlb = MultiLabelBinarizer()

genre_onehot = mlb.fit_transform(movies['genres'])
genre_df = pd.DataFrame(genre_onehot ,columns=mlb.classes_)
movies = pd.concat([movies,genre_df],axis=1)
# print(movies.head())

In [4]:
merged_data = pd.merge(ratings,movies, on = 'movieId')
# print(merged_data.head())

In [5]:
user_encoder = LabelEncoder()
movie_encoder = LabelEncoder()

merged_data['user_idx'] = user_encoder.fit_transform(merged_data['userId'])
merged_data['movie_idx'] = movie_encoder.fit_transform(merged_data['movieId'])
# print(merged_data.head())

In [6]:
train_data,test_data = train_test_split(merged_data,test_size=0.2,random_state=42)

genre_columns = mlb.classes_

user_train = train_data['user_idx'].values
movie_train = train_data['movie_idx'].values
genre_train = train_data[genre_columns].values
y_train = train_data['rating'].values

user_test = test_data['user_idx'].values
movie_test = test_data['movie_idx'].values
genre_test = test_data[genre_columns].values
y_test = test_data['rating'].values

print(f"Train data shape: {user_train.shape}, {movie_train.shape}, {genre_train.shape}, {y_train.shape}")
print(f"Test data shape: {user_test.shape}, {movie_test.shape}, {genre_test.shape}, {y_test.shape}")

Train data shape: (80668,), (80668,), (80668, 20), (80668,)
Test data shape: (20168,), (20168,), (20168, 20), (20168,)


# Neural Network(Tower Model)

In [7]:
num_users = merged_data['user_idx'].nunique()
num_movies = merged_data['movie_idx'].nunique()
num_genres = len(genre_columns)

user_input = layers.Input(shape=(1,), name='user_input')        
user_embedding = layers.Embedding(num_users, 32)(user_input)    
vu = layers.Flatten()(user_embedding)

movie_input = layers.Input(shape=(1,), name='movie_input')      
movie_embedding = layers.Embedding(num_movies, 32)(movie_input) 
m = layers.Flatten()(movie_embedding)


genre_input = layers.Input(shape=(num_genres,), name='genre_input')  
movie_concat = layers.Concatenate()([m, genre_input])        
vm = layers.Dense(32, activation='relu')(movie_concat)

output = layers.Dot(axes=1)([vu,vm])

model = Model(inputs=[ user_input,movie_input,genre_input],outputs=output)
model.compile(optimizer='adam',loss='mse',metrics=['mae'])

model.summary()

In [8]:
user_train_input = user_train  # shape: (num_samples,)
user_test_input  = user_test
movie_train_input = movie_train  # shape: (num_samples,)
movie_test_input  = movie_test

In [9]:
history = model.fit(
    [user_train_input, movie_train_input, genre_train],
    y_train,
    batch_size=128,
    epochs=10,
    validation_data=([user_test_input, movie_test_input, genre_test], y_test),
    verbose=1
)

Epoch 1/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 3.4801 - mae: 1.3923 - val_loss: 0.9126 - val_mae: 0.7382
Epoch 2/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.7864 - mae: 0.6839 - val_loss: 0.8249 - val_mae: 0.6991
Epoch 3/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.7099 - mae: 0.6479 - val_loss: 0.8092 - val_mae: 0.6905
Epoch 4/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.6762 - mae: 0.6303 - val_loss: 0.7885 - val_mae: 0.6818
Epoch 5/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.6513 - mae: 0.6171 - val_loss: 0.7792 - val_mae: 0.6809
Epoch 6/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.6312 - mae: 0.6069 - val_loss: 0.7765 - val_mae: 0.6794
Epoch 7/10
[1m631/631[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - 

In [12]:
model.save('movie_recommender_model.keras')

with open('user_encoder.pkl', 'wb') as f:
    pickle.dump(user_encoder, f)

with open('movie_encoder.pkl', 'wb') as f:
    pickle.dump(movie_encoder, f)

with open('genre_columns.pkl', 'wb') as f:
    pickle.dump(genre_columns, f)

movies.to_pickle('movies_data.pkl')

In [19]:
movie_idx = 0
user_idx = 10

movie_id = movie_encoder.inverse_transform([movie_idx])[0]

genre_vector = movies.loc[movies['movieId'] == movie_id, genre_columns].values

user_array = np.array([[user_idx]])
movie_array = np.array([[movie_idx]])
genre_array = genre_vector  

rating_pred = model.predict([user_array, movie_array, genre_array])
print(f"Predicted rating: {rating_pred[0][0]:.2f} for  {movies.loc[movies['movieId'] == movie_id, 'title'].values[0]} by user_id {user_encoder.inverse_transform([user_idx])[0]}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
Predicted rating: 4.44 for  Toy Story (1995) by user_id 11
