## Loading and Formatting data

In [2]:
MOVIE_RATING_URL = 'https://raw.githubusercontent.com/sureshgorakala/RecommenderSystems_R/master/movie_rating.csv'

import pandas as pd

ratings = pd.read_csv(MOVIE_RATING_URL)

# View the data
ratings

Unnamed: 0,critic,title,rating
0,Jack Matthews,Lady in the Water,3.0
1,Jack Matthews,Snakes on a Plane,4.0
2,Jack Matthews,You Me and Dupree,3.5
3,Jack Matthews,Superman Returns,5.0
4,Jack Matthews,The Night Listener,3.0
5,Mick LaSalle,Lady in the Water,3.0
6,Mick LaSalle,Snakes on a Plane,4.0
7,Mick LaSalle,Just My Luck,2.0
8,Mick LaSalle,Superman Returns,3.0
9,Mick LaSalle,You Me and Dupree,2.0


In [3]:
# Converting the data to matrix format containing title as rows,
# critic as columns, and ratings as the cell values.
movie_ratings = ratings.pivot_table(index=['title'], columns=['critic'], values='rating')

movie_ratings

critic,Claudia Puig,Gene Seymour,Jack Matthews,Lisa Rose,Mick LaSalle,Toby
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Just My Luck,3.0,1.5,,3.0,2.0,
Lady in the Water,,3.0,3.0,2.5,3.0,
Snakes on a Plane,3.5,3.5,4.0,3.5,4.0,4.5
Superman Returns,4.0,5.0,5.0,3.5,3.0,4.0
The Night Listener,4.5,3.0,3.0,3.0,3.0,
You Me and Dupree,2.5,3.5,3.5,2.5,2.0,1.0


## Calculating similarity between users

In [4]:
sim_users = movie_ratings.corr()

sim_users

critic,Claudia Puig,Gene Seymour,Jack Matthews,Lisa Rose,Mick LaSalle,Toby
critic,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Claudia Puig,1.0,0.31497,0.028571,0.566947,0.566947,0.893405
Gene Seymour,0.31497,1.0,0.963796,0.396059,0.411765,0.381246
Jack Matthews,0.028571,0.963796,1.0,0.747018,0.211289,0.662849
Lisa Rose,0.566947,0.396059,0.747018,1.0,0.594089,0.991241
Mick LaSalle,0.566947,0.411765,0.211289,0.594089,1.0,0.924473
Toby,0.893405,0.381246,0.662849,0.991241,0.924473,1.0


## Predicting the unknown ratings for users

### Take Toby as an example

In [10]:
# Extract the titles which Toby has not rated
titles_na_critic = movie_ratings[movie_ratings['Toby'].isnull()].index.values

titles_na_critic

array(['Just My Luck', 'Lady in the Water', 'The Night Listener'],
      dtype=object)

In [11]:
# Subset all the critics who have rated the aforementioned movies
ratings_t = ratings[ratings['title'].isin(titles_na_critic)]

ratings_t

Unnamed: 0,critic,title,rating
0,Jack Matthews,Lady in the Water,3.0
4,Jack Matthews,The Night Listener,3.0
5,Mick LaSalle,Lady in the Water,3.0
7,Mick LaSalle,Just My Luck,2.0
10,Mick LaSalle,The Night Listener,3.0
12,Claudia Puig,Just My Luck,3.0
15,Claudia Puig,The Night Listener,4.5
16,Lisa Rose,Lady in the Water,2.5
18,Lisa Rose,Just My Luck,3.0
20,Lisa Rose,The Night Listener,3.0


In [24]:
# Add similarity
sim_toby = sim_users[['Toby']].rename(columns={'Toby': 'similarity'})
ratings_t = ratings_t.merge(sim_toby, left_on='critic', right_on='critic')

ratings_t

Unnamed: 0,critic,title,rating,similarity
0,Jack Matthews,Lady in the Water,3.0,0.662849
1,Jack Matthews,The Night Listener,3.0,0.662849
2,Mick LaSalle,Lady in the Water,3.0,0.924473
3,Mick LaSalle,Just My Luck,2.0,0.924473
4,Mick LaSalle,The Night Listener,3.0,0.924473
5,Claudia Puig,Just My Luck,3.0,0.893405
6,Claudia Puig,The Night Listener,4.5,0.893405
7,Lisa Rose,Lady in the Water,2.5,0.991241
8,Lisa Rose,Just My Luck,3.0,0.991241
9,Lisa Rose,The Night Listener,3.0,0.991241


In [25]:
# Multiply rating with similarity value
ratings_t['sim_rating'] = ratings_t.apply(lambda row: row['rating']*row['similarity'], axis=1)

ratings_t

Unnamed: 0,critic,title,rating,similarity,sim_rating
0,Jack Matthews,Lady in the Water,3.0,0.662849,1.988547
1,Jack Matthews,The Night Listener,3.0,0.662849,1.988547
2,Mick LaSalle,Lady in the Water,3.0,0.924473,2.77342
3,Mick LaSalle,Just My Luck,2.0,0.924473,1.848947
4,Mick LaSalle,The Night Listener,3.0,0.924473,2.77342
5,Claudia Puig,Just My Luck,3.0,0.893405,2.680215
6,Claudia Puig,The Night Listener,4.5,0.893405,4.020323
7,Lisa Rose,Lady in the Water,2.5,0.991241,2.478102
8,Lisa Rose,Just My Luck,3.0,0.991241,2.973722
9,Lisa Rose,The Night Listener,3.0,0.991241,2.973722


In [29]:
result = ratings_t.groupby('title').apply(lambda x: sum(x['sim_rating'])/sum(x['similarity']))
result

title
Just My Luck          2.530981
Lady in the Water     2.832550
The Night Listener    3.347790
dtype: float64

In [33]:
# We could recommend these new titles, the ratings for which 
# are great than the average rating given by Toby
ratings[ratings['critic']=='Toby']['rating'].mean()

3.1666666666666665

### generate_recommendations function

In [34]:
def generate_recommendations(user):
    # Extract the titles which user not rated. 
    title_na_critic = movie_ratings[movie_ratings[user].isnull()].index.values
    
    # For those titles, seperate all the ratings given by other critics.
    neighbor_ratings = ratings[ratings['title'].isin(title_na_critic)]
    
    # Extract the similarity values of other critics with user.
    neighbor_sim = sim_users[[user]].rename(columns={user: 'similarity'})
    # Merge similarity.
    neighbor_ratings = neighbor_ratings.merge(neighbor_sim, left_on='critic', right_on='critic')
    # Multiply rating with similarity.
    neighbor_ratings['sim_rating'] = neighbor_ratings.apply(lambda x: x['rating'] * x['similarity'], axis=1)
    
    # Sum up the total ratings for each movie, and divid this summed up value with 
    # the sum of similarity critic values.
    return neighbor_ratings.groupby('title').apply(lambda x: sum(x['sim_rating'])/sum(x['similarity']))

In [38]:
for user in ratings['critic'].unique():
    print('Recommendations for {}'.format(user))
    try:
        result = generate_recommendations(user)
    except:
        pass
    else:
        print(result)

Recommendations for Jack Matthews
title
Just My Luck    2.150559
dtype: float64
Recommendations for Mick LaSalle
Recommendations for Claudia Puig
title
Lady in the Water    2.808131
dtype: float64
Recommendations for Lisa Rose
Recommendations for Toby
title
Just My Luck          2.530981
Lady in the Water     2.832550
The Night Listener    3.347790
dtype: float64
Recommendations for Gene Seymour
