# Movie Recommender Systems 

A basic recommendation system that suggests movies/items most similiar to movie choice.

## Import Libraries

In [38]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

## Get the Data

In [31]:
#importing movie metadata
meta= pd.read_csv("movies_metadata.csv")
meta= meta[['id', 'original_title', 'original_language']]
meta= meta.rename(columns={'id':'movieId'})
meta = meta[meta['original_language']== 'en'] #just want movies in English
meta.head()

Unnamed: 0,movieId,original_title,original_language
0,862,Toy Story,en
1,8844,Jumanji,en
2,15602,Grumpier Old Men,en
3,31357,Waiting to Exhale,en
4,11862,Father of the Bride Part II,en


In [32]:
#importing movie ratings
ratings= pd.read_csv("ratings_small.csv")
ratings= ratings[['userId', 'movieId', 'rating']]

In [33]:
#convert data types before merging
meta.movieId =pd.to_numeric(meta.movieId, errors='coerce')
ratings.movieId = pd.to_numeric(ratings.movieId, errors= 'coerce')

We can merge them together:

In [34]:
#create a single dataset merging the previous 2
data= pd.merge(ratings, meta, on='movieId', how='inner')
data.head()

Unnamed: 0,userId,movieId,rating,original_title,original_language
0,1,1371,2.5,Rocky III,en
1,4,1371,4.0,Rocky III,en
2,7,1371,3.0,Rocky III,en
3,19,1371,4.0,Rocky III,en
4,21,1371,3.0,Rocky III,en


In [35]:
#movie matrix so that I can use the recommender function later
matrix= data.pivot_table(index='userId', columns='original_title', values='rating')


original_title,!Women Art Revolution,'Gator Bait,'Twas the Night Before Christmas,10 Items or Less,10 Things I Hate About You,"10,000 BC",11'09''01 - September 11,12 + 1,12 Angry Men,1408,...,Young and Innocent,Zaat,Zabriskie Point,Zapped Again!,Zardoz,Zodiac,eXistenZ,xXx,¡Three Amigos!,Мой сводный брат Франкенштейн
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,3.5,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,3.5,,,,,,,,,


In [44]:
matrix = matrix.notnull()

In [45]:
frequent_itemsets = apriori(matrix, min_support=0.07, use_colnames=True)

In [50]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.131148,"(20,000 Leagues Under the Sea)"
1,0.129657,(2001: A Space Odyssey)
2,0.070045,(28 Weeks Later)
3,0.298063,(48 Hrs.)
4,0.298063,(5 Card Stud)
5,0.098361,(A Brief History of Time)
6,0.152012,(A Clockwork Orange)
7,0.268256,(A Nightmare on Elm Street)
8,0.196721,(A River Runs Through It)
9,0.089419,(A Time to Kill)


In [46]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(48 Hrs.),"(20,000 Leagues Under the Sea)",0.298063,0.131148,0.076006,0.255,1.944375,0.036916,1.166245
1,"(20,000 Leagues Under the Sea)",(48 Hrs.),0.131148,0.298063,0.076006,0.579545,1.944375,0.036916,1.669473
2,"(20,000 Leagues Under the Sea)",(A Nightmare on Elm Street),0.131148,0.268256,0.081967,0.625,2.329861,0.046786,1.951316
3,(A Nightmare on Elm Street),"(20,000 Leagues Under the Sea)",0.268256,0.131148,0.081967,0.305556,2.329861,0.046786,1.251148
4,(Back to the Future Part II),"(20,000 Leagues Under the Sea)",0.211624,0.131148,0.077496,0.366197,2.792254,0.049742,1.370856


In [47]:
rules[ (rules['lift'] >= 6) &
       (rules['confidence'] >= 0.8) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
86294,"(Live and Let Die, Psycho)","(Big Fish, Rain Man)",0.086438,0.132638,0.071535,0.827586,6.239442,0.06007,5.0307
86782,"(Night on Earth, Big Fish)","(Rain Man, Psycho)",0.09389,0.131148,0.076006,0.809524,6.172619,0.063693,4.561475
164136,"(Night on Earth, Big Fish, Titanic)","(Psycho, A Nightmare on Elm Street)",0.083458,0.131148,0.070045,0.839286,6.399554,0.059099,5.406193
169447,"(Romeo + Juliet, Lost in Translation, A Nightm...","(The Conversation, Reservoir Dogs)",0.077496,0.149031,0.070045,0.903846,6.064808,0.058495,8.850075
205926,"(Night on Earth, Big Fish, Titanic)","(Rain Man, Psycho)",0.083458,0.131148,0.071535,0.857143,6.535714,0.06059,6.081967
206373,"(Romeo + Juliet, Rain Man, Psycho)","(Big Fish, Titanic)",0.077496,0.150522,0.070045,0.903846,6.00476,0.05838,8.834575
207392,"(The Conversation, To Kill a Mockingbird, Psycho)","(Big Fish, Silent Hill)",0.086438,0.125186,0.070045,0.810345,6.473112,0.059224,4.612654
236106,"(Silent Hill, Titanic, Syriana)","(To Kill a Mockingbird, Reservoir Dogs)",0.09538,0.138599,0.080477,0.84375,6.087702,0.067257,5.512966
240695,"(Back to the Future Part II, 48 Hrs., Reservoi...","(Titanic, Rain Man, A Nightmare on Elm Street)",0.086438,0.135618,0.073025,0.844828,6.229443,0.061303,5.570459
250419,"(Back to the Future Part II, Big Fish, Rain Man)","(Titanic, A Nightmare on Elm Street, Reservoir...",0.084948,0.131148,0.070045,0.824561,6.287281,0.058904,4.952459


In [49]:
rules.sum()

antecedent support    4.952688e+04
consequent support    4.952688e+04
support               2.211741e+04
confidence            1.514094e+05
lift                  9.217685e+05
leverage              1.460901e+04
conviction                     inf
dtype: float64