# Movie Recommender Systems 

A basic recommendation system that suggests movies/items most similiar to movie choice.

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## Get the Data

In [2]:
#importing movie metadata
meta= pd.read_csv("movies_metadata.csv")
meta= meta[['id', 'original_title', 'original_language']]
meta= meta.rename(columns={'id':'movieId'})
meta = meta[meta['original_language']== 'en'] #just want movies in English
meta.head()

  meta= pd.read_csv("movies_metadata.csv")


Unnamed: 0,movieId,original_title,original_language
0,862,Toy Story,en
1,8844,Jumanji,en
2,15602,Grumpier Old Men,en
3,31357,Waiting to Exhale,en
4,11862,Father of the Bride Part II,en


In [3]:
#importing movie ratings
ratings= pd.read_csv("ratings_small.csv")
ratings= ratings[['userId', 'movieId', 'rating']]

In [4]:
#convert data types before merging
meta.movieId =pd.to_numeric(meta.movieId, errors='coerce')
ratings.movieId = pd.to_numeric(ratings.movieId, errors= 'coerce')

We can merge them together:

In [5]:
#create a single dataset merging the previous 2
data= pd.merge(ratings, meta, on='movieId', how='inner')
data.head()

Unnamed: 0,userId,movieId,rating,original_title,original_language
0,1,1371,2.5,Rocky III,en
1,4,1371,4.0,Rocky III,en
2,7,1371,3.0,Rocky III,en
3,19,1371,4.0,Rocky III,en
4,21,1371,3.0,Rocky III,en


In [6]:
#movie matrix so that I can use the recommender function later
matrix= data.pivot_table(index='userId', columns='original_title', values='rating')
matrix.head()

original_title,!Women Art Revolution,'Gator Bait,'Twas the Night Before Christmas,10 Items or Less,10 Things I Hate About You,"10,000 BC",11'09''01 - September 11,12 + 1,12 Angry Men,1408,...,Young and Innocent,Zaat,Zabriskie Point,Zapped Again!,Zardoz,Zodiac,eXistenZ,xXx,¡Three Amigos!,Мой сводный брат Франкенштейн
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,3.5,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,3.5,,,,,,,,,


In [7]:
matrix.dtypes

original_title
!Women Art Revolution               float64
'Gator Bait                         float64
'Twas the Night Before Christmas    float64
10 Items or Less                    float64
10 Things I Hate About You          float64
                                     ...   
Zodiac                              float64
eXistenZ                            float64
xXx                                 float64
¡Three Amigos!                      float64
Мой сводный брат Франкенштейн       float64
Length: 2034, dtype: object

In [8]:
matrix = matrix.notnull()
matrix.head()

original_title,!Women Art Revolution,'Gator Bait,'Twas the Night Before Christmas,10 Items or Less,10 Things I Hate About You,"10,000 BC",11'09''01 - September 11,12 + 1,12 Angry Men,1408,...,Young and Innocent,Zaat,Zabriskie Point,Zapped Again!,Zardoz,Zodiac,eXistenZ,xXx,¡Three Amigos!,Мой сводный брат Франкенштейн
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False


In [9]:
frequent_itemsets = apriori(matrix, min_support=0.07, use_colnames=True)

In [10]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.131148,"(20,000 Leagues Under the Sea)"
1,0.129657,(2001: A Space Odyssey)
2,0.070045,(28 Weeks Later)
3,0.298063,(48 Hrs.)
4,0.298063,(5 Card Stud)
...,...,...
18510,0.070045,"(Terminator 3: Rise of the Machines, The Milli..."
18511,0.070045,"(To Kill a Mockingbird, The Million Dollar Hot..."
18512,0.070045,"(Terminator 3: Rise of the Machines, The Milli..."
18513,0.070045,"(Terminator 3: Rise of the Machines, To Kill a..."


In [12]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(48 Hrs.),"(20,000 Leagues Under the Sea)",0.298063,0.131148,0.076006,0.255,1.944375,0.036916,1.166245,0.691936
1,"(20,000 Leagues Under the Sea)",(48 Hrs.),0.131148,0.298063,0.076006,0.579545,1.944375,0.036916,1.669473,0.559009
2,"(20,000 Leagues Under the Sea)",(A Nightmare on Elm Street),0.131148,0.268256,0.081967,0.625,2.329861,0.046786,1.951316,0.656947
3,(A Nightmare on Elm Street),"(20,000 Leagues Under the Sea)",0.268256,0.131148,0.081967,0.305556,2.329861,0.046786,1.251148,0.780041
4,"(20,000 Leagues Under the Sea)",(Back to the Future Part II),0.131148,0.211624,0.077496,0.590909,2.792254,0.049742,1.92714,0.738752


In [13]:
rules[ (rules['lift'] >= 6) &
       (rules['confidence'] >= 0.8) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
86297,"(Live and Let Die, Psycho)","(Big Fish, Rain Man)",0.086438,0.132638,0.071535,0.827586,6.239442,0.06007,5.0307,0.919182
86785,"(Big Fish, Night on Earth)","(Rain Man, Psycho)",0.09389,0.131148,0.076006,0.809524,6.172619,0.063693,4.561475,0.924826
164137,"(Titanic, Big Fish, Night on Earth)","(A Nightmare on Elm Street, Psycho)",0.083458,0.131148,0.070045,0.839286,6.399554,0.059099,5.406193,0.920567
169448,"(Romeo + Juliet, Lost in Translation, A Nightm...","(Reservoir Dogs, The Conversation)",0.077496,0.149031,0.070045,0.903846,6.064808,0.058495,8.850075,0.905269
205927,"(Titanic, Big Fish, Night on Earth)","(Rain Man, Psycho)",0.083458,0.131148,0.071535,0.857143,6.535714,0.06059,6.081967,0.924119
206369,"(Romeo + Juliet, Rain Man, Psycho)","(Titanic, Big Fish)",0.077496,0.150522,0.070045,0.903846,6.00476,0.05838,8.834575,0.903482
207389,"(To Kill a Mockingbird, The Conversation, Psycho)","(Silent Hill, Big Fish)",0.086438,0.125186,0.070045,0.810345,6.473112,0.059224,4.612654,0.925515
236108,"(Syriana, Silent Hill, Titanic)","(To Kill a Mockingbird, Reservoir Dogs)",0.09538,0.138599,0.080477,0.84375,6.087702,0.067257,5.512966,0.923851
240678,"(48 Hrs., Reservoir Dogs, Back to the Future P...","(Titanic, Rain Man, A Nightmare on Elm Street)",0.086438,0.135618,0.073025,0.844828,6.229443,0.061303,5.570459,0.9189
250419,"(Big Fish, Reservoir Dogs, Back to the Future ...","(Titanic, Rain Man, A Nightmare on Elm Street)",0.080477,0.135618,0.070045,0.87037,6.417786,0.059131,6.668086,0.918066


In [12]:
rules.sum()

antecedent support    4.952688e+04
consequent support    4.952688e+04
support               2.211741e+04
confidence            1.514094e+05
lift                  9.217685e+05
leverage              1.460901e+04
conviction                     inf
dtype: float64

In [14]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
0,(48 Hrs.),"(20,000 Leagues Under the Sea)",0.298063,0.131148,0.076006,0.255000,1.944375,0.036916,1.166245,0.691936,1
1,"(20,000 Leagues Under the Sea)",(48 Hrs.),0.131148,0.298063,0.076006,0.579545,1.944375,0.036916,1.669473,0.559009,1
2,"(20,000 Leagues Under the Sea)",(A Nightmare on Elm Street),0.131148,0.268256,0.081967,0.625000,2.329861,0.046786,1.951316,0.656947,1
3,(A Nightmare on Elm Street),"(20,000 Leagues Under the Sea)",0.268256,0.131148,0.081967,0.305556,2.329861,0.046786,1.251148,0.780041,1
4,"(20,000 Leagues Under the Sea)",(Back to the Future Part II),0.131148,0.211624,0.077496,0.590909,2.792254,0.049742,1.927140,0.738752,1
...,...,...,...,...,...,...,...,...,...,...,...
282091,(The Million Dollar Hotel),"(To Kill a Mockingbird, Terminator 3: Rise of ...",0.463487,0.087928,0.074516,0.160772,1.828438,0.033762,1.086798,0.844500,1
282092,(The Conversation),"(To Kill a Mockingbird, Terminator 3: Rise of ...",0.292101,0.081967,0.074516,0.255102,3.112245,0.050573,1.232428,0.958737,1
282093,(The Hours),"(To Kill a Mockingbird, Terminator 3: Rise of ...",0.301043,0.083458,0.074516,0.247525,2.965877,0.049391,1.218037,0.948316,1
282094,(Silent Hill),"(To Kill a Mockingbird, Terminator 3: Rise of ...",0.320417,0.083458,0.074516,0.232558,2.786545,0.047774,1.194283,0.943421,1


In [15]:
rules[ (rules['antecedent_len'] >= 3) &
       (rules['confidence'] > 0.75) &
       (rules['lift'] > 1.2) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
36210,"(48 Hrs., 5 Card Stud, Terminator 3: Rise of t...",(License to Wed),0.081967,0.301043,0.071535,0.872727,2.899010,0.046859,5.491803,0.713542,3
36211,"(48 Hrs., 5 Card Stud, License to Wed)",(Terminator 3: Rise of the Machines),0.081967,0.482861,0.071535,0.872727,1.807407,0.031956,4.063232,0.486607,3
36212,"(48 Hrs., Terminator 3: Rise of the Machines, ...",(5 Card Stud),0.080477,0.298063,0.071535,0.888889,2.982222,0.047548,6.317437,0.722853,3
36224,"(48 Hrs., 5 Card Stud, License to Wed)",(The 39 Steps),0.081967,0.433681,0.070045,0.854545,1.970447,0.034497,3.893443,0.536474,3
36226,"(48 Hrs., The 39 Steps, License to Wed)",(5 Card Stud),0.076006,0.298063,0.070045,0.921569,3.091863,0.047390,8.949702,0.732224,3
...,...,...,...,...,...,...,...,...,...,...,...
281992,"(To Kill a Mockingbird, The Million Dollar Hot...","(Rain Man, Terminator 3: Rise of the Machines)",0.092399,0.226528,0.074516,0.806452,3.560059,0.053585,3.996274,0.792315,5
281993,"(To Kill a Mockingbird, The Million Dollar Hot...","(Silent Hill, Terminator 3: Rise of the Machines)",0.093890,0.225037,0.074516,0.793651,3.526753,0.053387,3.755589,0.790691,5
281994,"(To Kill a Mockingbird, The Million Dollar Hot...","(Terminator 3: Rise of the Machines, The Hours)",0.093890,0.230999,0.074516,0.793651,3.435740,0.052827,3.726700,0.782401,5
281995,"(To Kill a Mockingbird, The Million Dollar Hot...","(Terminator 3: Rise of the Machines, The Conve...",0.096870,0.229508,0.074516,0.769231,3.351648,0.052283,3.338798,0.776898,5


In [16]:
rules[rules['antecedents'] == {'Terminator 3: Rise of the Machines'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
16,(Terminator 3: Rise of the Machines),"(20,000 Leagues Under the Sea)",0.482861,0.131148,0.096870,0.200617,1.529707,0.033544,1.086904,0.669608,1
37,(Terminator 3: Rise of the Machines),(2001: A Space Odyssey),0.482861,0.129657,0.081967,0.169753,1.309245,0.019361,1.048294,0.456746,1
149,(Terminator 3: Rise of the Machines),(48 Hrs.),0.482861,0.298063,0.229508,0.475309,1.594660,0.085585,1.337810,0.721097,1
273,(Terminator 3: Rise of the Machines),(5 Card Stud),0.482861,0.298063,0.192250,0.398148,1.335787,0.048327,1.166296,0.486093,1
317,(Terminator 3: Rise of the Machines),(A Brief History of Time),0.482861,0.098361,0.076006,0.157407,1.600309,0.028511,1.070077,0.725377,1
...,...,...,...,...,...,...,...,...,...,...,...
281459,(Terminator 3: Rise of the Machines),"(The Million Dollar Hotel, Reservoir Dogs, The...",0.482861,0.078987,0.071535,0.148148,1.875611,0.033395,1.081190,0.902738,1
281585,(Terminator 3: Rise of the Machines),"(The Million Dollar Hotel, The Conversation, T...",0.482861,0.080477,0.070045,0.145062,1.802526,0.031186,1.075543,0.860936,1
281837,(Terminator 3: Rise of the Machines),"(The Million Dollar Hotel, The Conversation, T...",0.482861,0.077496,0.070045,0.145062,1.871854,0.032625,1.079030,0.900668,1
281963,(Terminator 3: Rise of the Machines),"(To Kill a Mockingbird, The Conversation, The ...",0.482861,0.083458,0.070045,0.145062,1.738150,0.029746,1.072057,0.821203,1


In [17]:
rules[rules['consequents'] == {'Terminator 3: Rise of the Machines'}]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len
17,"(20,000 Leagues Under the Sea)",(Terminator 3: Rise of the Machines),0.131148,0.482861,0.096870,0.738636,1.529707,0.033544,1.978617,0.398549,1
36,(2001: A Space Odyssey),(Terminator 3: Rise of the Machines),0.129657,0.482861,0.081967,0.632184,1.309245,0.019361,1.405971,0.271389,1
148,(48 Hrs.),(Terminator 3: Rise of the Machines),0.298063,0.482861,0.229508,0.770000,1.594660,0.085585,2.248429,0.531254,1
272,(5 Card Stud),(Terminator 3: Rise of the Machines),0.298063,0.482861,0.192250,0.645000,1.335787,0.048327,1.456728,0.358120,1
316,(A Brief History of Time),(Terminator 3: Rise of the Machines),0.098361,0.482861,0.076006,0.772727,1.600309,0.028511,2.275410,0.416043,1
...,...,...,...,...,...,...,...,...,...,...,...
281346,"(The Million Dollar Hotel, Reservoir Dogs, The...",(Terminator 3: Rise of the Machines),0.078987,0.482861,0.071535,0.905660,1.875611,0.033395,5.481669,0.506877,6
281472,"(The Million Dollar Hotel, The Conversation, T...",(Terminator 3: Rise of the Machines),0.080477,0.482861,0.070045,0.870370,1.802526,0.031186,3.989355,0.484189,6
281724,"(The Million Dollar Hotel, The Conversation, T...",(Terminator 3: Rise of the Machines),0.077496,0.482861,0.070045,0.903846,1.871854,0.032625,5.378241,0.504898,6
281850,"(To Kill a Mockingbird, The Conversation, The ...",(Terminator 3: Rise of the Machines),0.083458,0.482861,0.070045,0.839286,1.738150,0.029746,3.217751,0.463345,6
