In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

# Import our utilities functions
import utils

## Prepare the Data

In [2]:
# Get the original DataFrame
df = utils.get_data()
df.head()

Unnamed: 0,user_id,movie_id,rating
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [3]:
# Get the normalized ratings
normalized_ratings = utils.get_normalized_data()
normalized_ratings.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,2062,2063,2064,2065,2066,2067,2068,2069,2070,2071
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.4,0.8,0.7,0.6,0.8,0.7,0.7,0.6,0.5,0.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.6,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Get the weights and bias tensors
W = utils.weights()
hb = utils.hidden_bias()
vb = utils.visible_bias()

In [5]:
# Get the users so we can send the users back into the model
users = normalized_ratings.index
users

Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
       ...
       1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1507, 1508],
      dtype='int64', name='user_id', length=1508)

In [6]:
# Create an empty DataFrame to store the recommendations
recommendation_scores = pd.DataFrame(columns=["movie_id", "user_id", "RecommendationScore"])
recommendation_scores

Unnamed: 0,movie_id,user_id,RecommendationScore


In [7]:
# Get recommendations for every user
for user in users:
    # Get user's ratings
    user_ratings = normalized_ratings.loc[user]

    # Generate the recommendations
    rec = utils.generate_recommendation(user_ratings, W, vb, hb)

    # Construct user DataFrame
    new_recommendation = pd.DataFrame({"movie_id": normalized_ratings.columns, "user_id": user})
    new_recommendation = new_recommendation.assign(RecommendationScore = rec[0].numpy())

    # Add recommendation to DataFrame
    recommendation_scores = pd.concat([recommendation_scores, new_recommendation])

recommendation_scores

Unnamed: 0,movie_id,user_id,RecommendationScore
0,1,1,0.129210
1,2,1,0.358627
2,3,1,0.044379
3,4,1,0.156728
4,5,1,0.354294
...,...,...,...
2066,2067,1508,0.000444
2067,2068,1508,0.000516
2068,2069,1508,0.000298
2069,2070,1508,0.000387


In [8]:
# Merge all recommendation scores with original dataset ratings
# Inner merge because we cannot perform calculations on NaN values to evaluate the model
merged_df = recommendation_scores.merge(df, on=['movie_id', 'user_id'], how='inner')
merged_df

Unnamed: 0,movie_id,user_id,RecommendationScore,rating
0,1,1,0.129210,2.0
1,2,1,0.358627,4.0
2,3,1,0.044379,3.5
3,4,1,0.156728,3.0
4,5,1,0.354294,4.0
...,...,...,...,...
35489,669,1508,0.000787,1.0
35490,686,1508,0.001314,2.5
35491,693,1508,0.007873,3.5
35492,751,1508,0.000362,1.0


In [9]:
# normalize rating column
merged_df["rating"] = merged_df["rating"] / 5
merged_df

Unnamed: 0,movie_id,user_id,RecommendationScore,rating
0,1,1,0.129210,0.4
1,2,1,0.358627,0.8
2,3,1,0.044379,0.7
3,4,1,0.156728,0.6
4,5,1,0.354294,0.8
...,...,...,...,...
35489,669,1508,0.000787,0.2
35490,686,1508,0.001314,0.5
35491,693,1508,0.007873,0.7
35492,751,1508,0.000362,0.2


In [10]:
# Calculate RMSE
from sklearn.metrics import mean_squared_error

In [11]:
rmse = mean_squared_error(merged_df["rating"], merged_df["RecommendationScore"], squared=False)
print(rmse)

0.39297354632444953
