## Build recommendation system to recommend movies to user based on his behaviour

### Loading essential datasets

In [1]:
import numpy as np
import pandas as pd
import os
os.chdir('/home/admin1/PycharmProjects/Recommender System/ml-25m/')

#### movies file in dataset describing movie with it's genres

In [2]:
movies = pd.read_csv('movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
movies.shape

(62423, 3)

#### ratings file in dataset describing mapping between rating by user to different movies

In [4]:
ratings_data = pd.read_csv('ratings.csv')
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [5]:
ratings_data.isna().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [6]:
ratings_data.shape

(25000095, 4)

In [7]:
ratings_data.drop('timestamp', axis=1, inplace=True)

### Taking out subset of dataset 

In [8]:
ratings_data = ratings_data.loc[(ratings_data['userId'] <= 10000) & (ratings_data['movieId'] <= 10000)]

#### Removing rarely watched movies

In [9]:
for mvid in ratings_data['movieId']:
    indices = ratings_data.loc[ratings_data['movieId'] == mvid].index 
    if len(indices) < 20:
        ratings_data.drop(indices, inplace=True)

In [10]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating
0,1,296,5.0
1,1,306,3.5
2,1,307,5.0
3,1,665,5.0
4,1,899,3.5


In [11]:
ratings_data.shape

(1123057, 3)

In [33]:
ratings_data.to_csv('ratings_data.csv')

In [12]:
# from sklearn.preprocessing import StandardScaler
# sc_x = StandardScaler()
# ratings_data['rating'] = sc_x.fit_transform(ratings_data['rating'].values.reshape(-1,1))

#### Defining total users and movies

In [13]:
total_users = ratings_data['userId'].max()
total_users

10000

In [14]:
total_movies = ratings_data['movieId'].max()
total_movies

9018

In [15]:
from tensorflow.keras.layers import Input, Embedding, Dense, Dot, Flatten
from tensorflow.keras import Model

In [16]:
user_input = Input(shape=[1])
user_embedding = Embedding(total_users + 1, 10)(user_input)
user_vec = Flatten()(user_embedding)

movie_input = Input(shape=[1])
movie_embedding = Embedding(total_movies + 1, 10)(movie_input)
movie_vec = Flatten()(movie_embedding)

prod = Dot(name='Dot-Product', axes=1)([movie_vec, user_vec])

model = Model([user_input, movie_input], prod)
model.compile('adam', 'mean_absolute_error')

In [17]:
epochs = 30
history = model.fit([ratings_data['userId'].values, ratings_data['movieId'].values], ratings_data['rating'].values
                    ,epochs=epochs)

Train on 1123057 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [34]:
model.save('MovieRecommendationModel.h5')

In [18]:
movie_data = np.array(ratings_data['movieId'].unique())

### For given user id predicting ratings

In [19]:
uid = 2
user = np.full(len(movie_data), uid)

In [20]:
predictions = model.predict([user, movie_data])

In [21]:
predictions = np.array([a[0] for a in predictions])
predictions[:]

array([4.5752234, 3.7409558, 3.3697357, ..., 2.7096612, 3.4875867,
       2.176269 ], dtype=float32)

In [22]:
predictions.shape

(4267,)

In [23]:
mvids = ratings_data['movieId'].unique()
mvids[:10]

array([ 296,  306,  307,  665,  899, 1088, 1175, 1217, 1237, 1250])

In [24]:
get_title = lambda mvid: movies.loc[movies['movieId'] == mvid].title.tolist()[0]

mvtitles = [get_title(mvid) for mvid in mvids]

In [25]:
prediction_data = pd.DataFrame({'userId': uid, 'movieId':mvids, 'title': mvtitles,'rating':predictions})
prediction_data[:10]

Unnamed: 0,userId,movieId,title,rating
0,2,296,Pulp Fiction (1994),4.575223
1,2,306,Three Colors: Red (Trois couleurs: Rouge) (1994),3.740956
2,2,307,Three Colors: Blue (Trois couleurs: Bleu) (1993),3.369736
3,2,665,Underground (1995),4.200428
4,2,899,Singin' in the Rain (1952),3.872983
5,2,1088,Dirty Dancing (1987),2.435432
6,2,1175,Delicatessen (1991),4.408549
7,2,1217,Ran (1985),4.634685
8,2,1237,"Seventh Seal, The (Sjunde inseglet, Det) (1957)",4.100123
9,2,1250,"Bridge on the River Kwai, The (1957)",4.241301


### Recommending movies based on ratings

In [26]:
recommendations = prediction_data.sort_values(by='rating', ascending=False)[:60]
recommendations

Unnamed: 0,userId,movieId,title,rating
2802,2,2593,"Monster, The (Mostro, Il) (1994)",6.460388
1413,2,2166,Return to Paradise (1998),6.280026
210,2,7153,"Lord of the Rings: The Return of the King, The...",6.184223
30,2,5952,"Lord of the Rings: The Two Towers, The (2002)",6.160151
186,2,4993,"Lord of the Rings: The Fellowship of the Ring,...",5.98148
1370,2,1050,Looking for Richard (1996),5.865583
4009,2,1236,Trust (1990),5.861531
1525,2,2314,Beloved (1998),5.805889
3969,2,4350,Forgotten Silver (1996),5.700303
3338,2,3613,Things Change (1988),5.642447


### Checking predicted ratings for highly rated movies by the user

In [27]:
watched_movies = ratings_data.loc[(ratings_data['userId'] == uid) & (ratings_data['rating'] >= 5)].movieId

### Actual rating

In [32]:
ratings_data.loc[(ratings_data['userId'] == uid) & (ratings_data['movieId'].isin(watched_movies))]

Unnamed: 0,userId,movieId,rating
72,2,110,5.0
76,2,260,5.0
79,2,318,5.0
80,2,333,5.0
85,2,457,5.0
89,2,527,5.0
101,2,1136,5.0
102,2,1196,5.0
103,2,1197,5.0
106,2,1210,5.0


### Predicted rating

In [29]:
prediction_data.loc[prediction_data['movieId'].isin(watched_movies)]

Unnamed: 0,userId,movieId,title,rating
30,2,5952,"Lord of the Rings: The Two Towers, The (2002)",6.160151
34,2,6539,Pirates of the Caribbean: The Curse of the Bla...,5.026112
56,2,110,Braveheart (1995),4.503234
60,2,260,Star Wars: Episode IV - A New Hope (1977),5.20858
63,2,318,"Shawshank Redemption, The (1994)",5.087287
64,2,333,Tommy Boy (1995),2.854502
69,2,457,"Fugitive, The (1993)",4.285693
73,2,527,Schindler's List (1993),4.955453
85,2,1136,Monty Python and the Holy Grail (1975),5.021272
86,2,1196,Star Wars: Episode V - The Empire Strikes Back...,5.193318
