# Building A Simple Movie Recommender

## Load Data

In [7]:
import numpy as np
import pandas as pd

In [8]:
data = pd.read_csv('ratings.csv')
data.head(10)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100


In [9]:
movies_title_genre = pd.read_csv("movies.csv")
movies_title_genre.head(10)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
5,6,Heat (1995),Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children
8,9,Sudden Death (1995),Action
9,10,GoldenEye (1995),Action|Adventure|Thriller


In [10]:
data = data.merge(movies_title_genre,on='movieId',how='left')
data.head(10)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
5,1,70,3.0,964982400,From Dusk Till Dawn (1996),Action|Comedy|Horror|Thriller
6,1,101,5.0,964980868,Bottle Rocket (1996),Adventure|Comedy|Crime|Romance
7,1,110,4.0,964982176,Braveheart (1995),Action|Drama|War
8,1,151,5.0,964984041,Rob Roy (1995),Action|Drama|Romance|War
9,1,157,5.0,964984100,Canadian Bacon (1995),Comedy|War


# Feature Engineering

## Average Rating

In [11]:
Average_ratings = pd.DataFrame(data.groupby('title')['rating'].mean())
Average_ratings.head(10)

Unnamed: 0_level_0,rating
title,Unnamed: 1_level_1
'71 (2014),4.0
'Hellboy': The Seeds of Creation (2004),4.0
'Round Midnight (1986),3.5
'Salem's Lot (2004),5.0
'Til There Was You (1997),4.0
'Tis the Season for Love (2015),1.5
"'burbs, The (1989)",3.176471
'night Mother (1986),3.0
(500) Days of Summer (2009),3.666667
*batteries not included (1987),3.285714


## Total Number Of Rating

In [12]:
Average_ratings['Total Ratings'] = pd.DataFrame(data.groupby('title')['rating'].count())
Average_ratings.head(10)

Unnamed: 0_level_0,rating,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
'71 (2014),4.0,1
'Hellboy': The Seeds of Creation (2004),4.0,1
'Round Midnight (1986),3.5,2
'Salem's Lot (2004),5.0,1
'Til There Was You (1997),4.0,2
'Tis the Season for Love (2015),1.5,1
"'burbs, The (1989)",3.176471,17
'night Mother (1986),3.0,1
(500) Days of Summer (2009),3.666667,42
*batteries not included (1987),3.285714,7


## Calculation The Correlation

In [13]:
movie_user = data.pivot_table(index='userId',columns='title',values='rating')

In [35]:
movie_user.head(10)

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,4.0,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,1.0,,,
10,,,,,,,,,,,...,,,,,,,,,,


In [40]:
correlations = movie_user.corrwith(movie_user['Toy Story (1995)']) 
correlations.head(10)

title
'71 (2014)                                      NaN
'Hellboy': The Seeds of Creation (2004)         NaN
'Round Midnight (1986)                          NaN
'Salem's Lot (2004)                             NaN
'Til There Was You (1997)                       NaN
'Tis the Season for Love (2015)                 NaN
'burbs, The (1989)                         0.240563
'night Mother (1986)                            NaN
(500) Days of Summer (2009)                0.353833
*batteries not included (1987)            -0.427425
dtype: float64

In [37]:
recommendation = pd.DataFrame(correlations,columns = ['Correlation'])
recommendation.dropna(inplace=True)
recommendation = recommendation.join(Average_ratings['Total Ratings'])
recommendation.head()

Unnamed: 0_level_0,Correlation,Total Ratings
title,Unnamed: 1_level_1,Unnamed: 2_level_1
"'burbs, The (1989)",0.240563,17
(500) Days of Summer (2009),0.353833,42
*batteries not included (1987),-0.427425,7
10 Cent Pistol (2015),1.0,2
10 Cloverfield Lane (2016),-0.285732,14


# Building The Recommender

## Testing The Recommendation System

In [41]:
recc = recommendation[recommendation['Total Ratings']>100].sort_values('Correlation', ascending = False)

In [48]:
recc = recc.merge(movies_title_genre,on='title', how='left')
recc.head(10)

Unnamed: 0,title,Correlation,Total Ratings,movieId,genres
0,Toy Story (1995),1.0,215,1,Adventure|Animation|Children|Comedy|Fantasy
1,"Incredibles, The (2004)",0.643301,125,8961,Action|Adventure|Animation|Children|Comedy
2,Finding Nemo (2003),0.618701,141,6377,Adventure|Animation|Children|Comedy
3,Aladdin (1992),0.611892,183,588,Adventure|Animation|Children|Comedy|Musical
4,"Monsters, Inc. (2001)",0.490231,132,4886,Adventure|Animation|Children|Comedy|Fantasy
5,Mrs. Doubtfire (1993),0.446261,144,500,Comedy|Drama
6,"Amelie (Fabuleux destin d'Amélie Poulain, Le) ...",0.438237,120,4973,Comedy|Romance
7,American Pie (1999),0.420117,103,2706,Comedy|Romance
8,Die Hard: With a Vengeance (1995),0.410939,144,165,Action|Crime|Thriller
9,E.T. the Extra-Terrestrial (1982),0.409216,122,1097,Children|Drama|Sci-Fi
