In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np

# Import our utilities functions
import utils

## Prepare the Data

In [2]:
# Get the original DataFrame for new users
df = utils.get_new_data()
df.head()

Unnamed: 0,user_id,movie_id,rating
0,1601,244,2.5
1,1601,2,2.5
2,1601,255,3.0
3,1601,205,3.0
4,1601,84,4.0


In [3]:
# Get the normalized ratings
normalized_ratings = utils.get_normalized_new_data()
normalized_ratings.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,2062,2063,2064,2065,2066,2067,2068,2069,2070,2071
1601,0.5,0.5,0.3,0.4,0.2,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1602,0.8,0.8,0.0,1.0,0.5,1.0,0.9,0.8,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1603,0.4,0.4,0.4,0.0,0.0,0.9,1.0,0.9,0.2,0.1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1604,0.0,1.0,0.6,0.8,1.0,1.0,0.0,0.8,0.7,0.6,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1605,0.0,0.0,0.6,0.6,1.0,0.0,0.6,0.8,0.0,0.7,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
# Get the weights and bias tensors
W = utils.weights()
hb = utils.hidden_bias()
vb = utils.visible_bias()

In [5]:
# Get new users
user_list = normalized_ratings.index

In [6]:
# Select a test user
test_user = user_list[0]

# Get the ratings row for the test user
user_ratings = normalized_ratings.loc[test_user]
user_ratings

movie_id
1       0.5
2       0.5
3       0.3
4       0.4
5       0.2
       ... 
2067    0.0
2068    0.0
2069    0.0
2070    0.0
2071    0.0
Name: 1601, Length: 2071, dtype: float64

In [7]:
# Get the recommendations for the user
rec = utils.generate_recommendation(user_ratings, W, vb, hb)

# Construct user recommendation DataFrame
user_recommendation = pd.DataFrame({"movie_id": normalized_ratings.columns, "user_id": test_user})
user_recommendation = user_recommendation.assign(RecommendationScore = rec[0].numpy())

# View sorted user recommendation
user_recommendation.sort_values(["RecommendationScore"], ascending=False).head(20)

Unnamed: 0,movie_id,user_id,RecommendationScore
214,215,1601,0.852949
1,2,1601,0.819344
210,211,1601,0.814067
211,212,1601,0.811236
251,252,1601,0.710508
8,9,1601,0.703516
235,236,1601,0.695782
4,5,1601,0.672138
11,12,1601,0.667892
215,216,1601,0.660423


In [8]:
# Merge user recommendation scores with original dataset ratings
merged_df = user_recommendation.merge(df, on=['movie_id', 'user_id'], how='outer')
merged_df.sort_values(["RecommendationScore"], ascending=False).head(20)

Unnamed: 0,movie_id,user_id,RecommendationScore,rating
214,215,1601,0.852949,3.0
1,2,1601,0.819344,2.5
210,211,1601,0.814067,
211,212,1601,0.811236,4.5
251,252,1601,0.710508,
8,9,1601,0.703516,
235,236,1601,0.695782,5.0
4,5,1601,0.672138,1.0
11,12,1601,0.667892,2.0
215,216,1601,0.660423,


In [9]:
# Reduce the merged DataFrame to locate the unrated movies that have a
# recommendation score higher than 0.5 to find the movies to recommend
movies_to_recommend = merged_df.loc[
    (merged_df["RecommendationScore"] > 0.5) & (pd.isna(merged_df["rating"])),
    "movie_id"
].to_list()
movies_to_recommend

[9, 13, 211, 216, 219, 251, 252]