# MOVIE RECOMMENDER ENGINE

## Library

In [1]:
%matplotlib inline
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import seaborn as sns

from IPython.display import Image, HTML, display
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise.model_selection import cross_validate
from surprise import Reader, SVD, Dataset


import warnings; warnings.simplefilter('ignore')

## Preprocess DataFrame

In [2]:
md = pd.read_csv('../the-movies-dataset/movies_metadata.csv')

In [3]:
md.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 24 columns):
adult                    45466 non-null object
belongs_to_collection    4494 non-null object
budget                   45466 non-null object
genres                   45466 non-null object
homepage                 7782 non-null object
id                       45466 non-null object
imdb_id                  45449 non-null object
original_language        45455 non-null object
original_title           45466 non-null object
overview                 44512 non-null object
popularity               45461 non-null object
poster_path              45080 non-null object
production_companies     45463 non-null object
production_countries     45463 non-null object
release_date             45379 non-null object
revenue                  45460 non-null float64
runtime                  45203 non-null float64
spoken_languages         45460 non-null object
status                   45379 non-null objec

In [4]:
md.head().transpose()[:20]

Unnamed: 0,0,1,2,3,4
adult,False,False,False,False,False
belongs_to_collection,"{'id': 10194, 'name': 'Toy Story Collection', ...",,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"{'id': 96871, 'name': 'Father of the Bride Col..."
budget,30000000,65000000,0,16000000,0
genres,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...","[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...","[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...","[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...","[{'id': 35, 'name': 'Comedy'}]"
homepage,http://toystory.disney.com/toy-story,,,,
id,862,8844,15602,31357,11862
imdb_id,tt0114709,tt0113497,tt0113228,tt0114885,tt0113041
original_language,en,en,en,en,en
original_title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II
overview,"Led by Woody, Andy's toys live happily in his ...",When siblings Judy and Peter discover an encha...,A family wedding reignites the ancient feud be...,"Cheated on, mistreated and stepped on, the wom...",Just when George Banks has recovered from his ...


### HTML imgage for views

In [5]:
base_poster_url = 'http://image.tmdb.org/t/p/w185/'
md['poster_path'] = "<img src='" + base_poster_url + md['poster_path'] + "' style='height:100px;'>"

In [6]:
md['release_date'][:20]

0     1995-10-30
1     1995-12-15
2     1995-12-22
3     1995-12-22
4     1995-02-10
5     1995-12-15
6     1995-12-15
7     1995-12-22
8     1995-12-22
9     1995-11-16
10    1995-11-17
11    1995-12-22
12    1995-12-22
13    1995-12-22
14    1995-12-22
15    1995-11-22
16    1995-12-13
17    1995-12-09
18    1995-11-10
19    1995-11-21
Name: release_date, dtype: object

### Split date into year values

In [7]:
# when split release_date into 3 element array just take the first element which mean the year 
md['year'] = pd.to_datetime(md['release_date'], errors = 'coerce').apply(lambda x: str(x).split('-')[0] if x!= np.nan else np.nan)

In [8]:
pd.DataFrame(data = md['genres']).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45466 entries, 0 to 45465
Data columns (total 1 columns):
genres    45466 non-null object
dtypes: object(1)
memory usage: 355.3+ KB


In [9]:
md.loc[0]['genres']

"[{'id': 16, 'name': 'Animation'}, {'id': 35, 'name': 'Comedy'}, {'id': 10751, 'name': 'Family'}]"

In [10]:
md.loc[md['genres'].notnull() == False]

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year


In [11]:
md['genres'] = md['genres'].apply(ast.literal_eval)

In [12]:
md['genres'] = md['genres'].apply(lambda x: [i['name'] for i in x] if isinstance(x, list) else [])

### Get List Genres

In [13]:
md['genres'][:20]

0            [Animation, Comedy, Family]
1           [Adventure, Fantasy, Family]
2                      [Romance, Comedy]
3               [Comedy, Drama, Romance]
4                               [Comedy]
5       [Action, Crime, Drama, Thriller]
6                      [Comedy, Romance]
7     [Action, Adventure, Drama, Family]
8          [Action, Adventure, Thriller]
9          [Adventure, Action, Thriller]
10              [Comedy, Drama, Romance]
11                      [Comedy, Horror]
12        [Family, Animation, Adventure]
13                      [History, Drama]
14                   [Action, Adventure]
15                        [Drama, Crime]
16                      [Drama, Romance]
17                       [Crime, Comedy]
18            [Crime, Comedy, Adventure]
19               [Action, Comedy, Crime]
Name: genres, dtype: object

In [14]:
s = md.apply(lambda x: pd.Series(x['genres']), axis = 1)

In [15]:
# Series in Pandas - series is one-dimensional labled array of holding any type.
# if data is an nArray : data = np.array(['a','b','c','d']) => s = pd.Series(data)
# => output : 
# 0   a
# 1   b
# 2   c
# 3   d
s[:20]

Unnamed: 0,0,1,2,3,4,5,6,7
0,Animation,Comedy,Family,,,,,
1,Adventure,Fantasy,Family,,,,,
2,Romance,Comedy,,,,,,
3,Comedy,Drama,Romance,,,,,
4,Comedy,,,,,,,
5,Action,Crime,Drama,Thriller,,,,
6,Comedy,Romance,,,,,,
7,Action,Adventure,Drama,Family,,,,
8,Action,Adventure,Thriller,,,,,
9,Adventure,Action,Thriller,,,,,


In [16]:
s = pd.DataFrame(data = s)

In [17]:
s = s.stack().reset_index(level=1, drop = True)

### List genres

In [18]:
s[:5]

0    Animation
0       Comedy
0       Family
1    Adventure
1      Fantasy
dtype: object

In [19]:
s.name = 'genre'

In [20]:
gen_md = md.drop('genres', axis = 1).join(s)

In [21]:
gen_md[:5]

Unnamed: 0,adult,belongs_to_collection,budget,homepage,id,imdb_id,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,genre
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Animation
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Comedy
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Family
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Adventure
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Fantasy


In [22]:
gen_md['genre'].value_counts().shape[0]

32

In [23]:
pop_gen = pd.DataFrame(gen_md['genre'].value_counts()).reset_index()

In [24]:
pop_gen.column = ['genre','movies']

In [25]:
pop_gen['index'][:5]

0       Drama
1      Comedy
2    Thriller
3     Romance
4      Action
Name: index, dtype: object

## Demographic Filtering - Weighted Rating

In [26]:
# IMDB Top Movies Chart Fomula : 
# WR = (v/(v+m).R) + (m/(v+m).C)
# v : số  lượng người bình chọn cho 1 bộ phim 
# m : số lượng bình chọn cần thiếu để được xét vào list - Một phim phải 
# có lượng vote > 95% các phim khác trong list
# R : rating  

In [27]:
vote_counts = md[md['vote_count'].notnull()]['vote_count']

In [28]:
vote_averages = md[md['vote_average'].notnull()]['vote_average']

In [29]:
# mean() function can be used to calculate 
# mean/average of a given list of numbers

C = vote_averages.mean()

In [30]:
# rating trung binh tren toan tap dataset
C

5.618207215134184

In [31]:
# Tính phân vị - tìm giá trị m sao cho 95% giá trị trong list bé hơn m
# 5% còn lại lớn hơn nó 
m = vote_counts.quantile(0.95)

In [32]:
m

434.0

In [33]:
qualified = md[(md['vote_count'] >= m) & (md['vote_count'].notnull()) 
            & (md['vote_average'].notnull())]

In [34]:
qualified = qualified[['poster_path','title','year','vote_count','vote_average','genres']]

In [35]:
qualified.shape

(2274, 6)

In [36]:
# sumary : một phim cần phải đạt trên 434 votes để  được xét 
#        : điểm rating trung bình là 5.2 
#        : có 2274 phim đạt yêu cầu 

In [37]:
def weighted_rating(x,m,C):
    v = x['vote_count']
    R = x['vote_average']
    
    return (v/(m+v) * R) + (m/(m+v) * C)

In [38]:
qualified['wr'] = qualified.apply(lambda x: weighted_rating(x,m,C), axis=1)

In [39]:
# ascending : tăng dần 

qualified = qualified.sort_values('wr', ascending = False)

In [40]:

HTML(qualified.head(10).to_html(escape = False))

Unnamed: 0,poster_path,title,year,vote_count,vote_average,genres,wr
314,,The Shawshank Redemption,1994,8358.0,8.5,"[Drama, Crime]",8.357746
834,,The Godfather,1972,6024.0,8.5,"[Drama, Crime]",8.306334
12481,,The Dark Knight,2008,12269.0,8.3,"[Drama, Action, Crime, Thriller]",8.208376
2843,,Fight Club,1999,9678.0,8.3,[Drama],8.184899
292,,Pulp Fiction,1994,8670.0,8.3,"[Thriller, Crime]",8.172155
351,,Forrest Gump,1994,8147.0,8.2,"[Comedy, Drama, Romance]",8.069421
522,,Schindler's List,1993,4436.0,8.3,"[Drama, History, War]",8.061007
23673,,Whiplash,2014,4376.0,8.3,[Drama],8.058025
5481,,Spirited Away,2001,3968.0,8.3,"[Fantasy, Adventure, Animation, Family]",8.035598
1154,,The Empire Strikes Back,1980,5998.0,8.2,"[Adventure, Action, Science Fiction]",8.025793


In [41]:
gen_md[:5]

Unnamed: 0,adult,belongs_to_collection,budget,homepage,id,imdb_id,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,genre
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Animation
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Comedy
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.9469,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,Family
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Adventure
1,False,,65000000,,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.0155,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,Fantasy


In [42]:
gen_md.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 93548 entries, 0 to 45465
Data columns (total 25 columns):
adult                    93548 non-null object
belongs_to_collection    11162 non-null object
budget                   93548 non-null object
homepage                 16556 non-null object
id                       93548 non-null object
imdb_id                  93513 non-null object
original_language        93531 non-null object
original_title           93548 non-null object
overview                 92218 non-null object
popularity               93531 non-null object
poster_path              93083 non-null object
production_companies     93539 non-null object
production_countries     93539 non-null object
release_date             93432 non-null object
revenue                  93527 non-null float64
runtime                  93194 non-null float64
spoken_languages         93527 non-null object
status                   93402 non-null object
tagline                  46805 non-null obj

### Filter By Genre

In [43]:
def build_chart(genre, percentile):
    df = gen_md[gen_md['genre'] == genre]
    vote_counts = df[df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = df[df['vote_average'].notnull()]['vote_average'].astype('int')
    
    C = vote_averages.mean()
    m = vote_counts.quantile(percentile)
    
    qualified = df[(df['vote_count'] >= m) & (df['vote_count'].notnull()) & (df['vote_average'].notnull())]
    qualified = qualified[['poster_path','title','year','vote_count','vote_average','genre']]
    
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    
    qualified['wr'] = qualified.apply(lambda x: weighted_rating(x,m,C), axis=1)
    
    qualified = qualified.sort_values('wr', ascending= False)
    
    return qualified



In [44]:
list_genre =  pop_gen['index']
list_genre.shape

(32,)

In [45]:
HTML(build_chart(list_genre.loc[0],0.9).head(10).to_html(escape = False))

Unnamed: 0,poster_path,title,year,vote_count,vote_average,genre,wr
314,,The Shawshank Redemption,1994,8358,8.5,Drama,8.445674
834,,The Godfather,1972,6024,8.5,Drama,8.425154
10309,,Dilwale Dulhania Le Jayenge,1995,661,9.1,Drama,8.41927
12481,,The Dark Knight,2008,12269,8.3,Drama,8.265271
2843,,Fight Club,1999,9678,8.3,Drama,8.25612
522,,Schindler's List,1993,4436,8.3,Drama,8.206017
23673,,Whiplash,2014,4376,8.3,Drama,8.204772
2211,,Life Is Beautiful,1997,3643,8.3,Drama,8.186394
1178,,The Godfather: Part II,1974,3418,8.3,Drama,8.17924
1152,,One Flew Over the Cuckoo's Nest,1975,3001,8.3,Drama,8.163284


## Movie Description Based Recommender

In [46]:
links = pd.read_csv('../the-movies-dataset/links_small.csv')
links[:10]

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
5,6,113277,949.0
6,7,114319,11860.0
7,8,112302,45325.0
8,9,114576,9091.0
9,10,113189,710.0


In [47]:
links = links[links['tmdbId'].notnull()]['tmdbId'].astype('int')
links

0          862
1         8844
2        15602
3        31357
4        11862
         ...  
9120    402672
9121    315011
9122    391698
9123    137608
9124    410803
Name: tmdbId, Length: 9112, dtype: int64

In [48]:
md = md.drop([19730, 29503, 35587])
md['id'] = md['id'].astype('int')


In [49]:
smd = pd.read_csv('../the-movies-dataset/movies_metadata_merge_tmvdb.csv')
smd['description'] = smd['description'].fillna('')
graph = smd[['poster_path','title','year','vote_count','vote_average','genres']]

In [50]:
smd['description']

0       Led by Woody, Andy's toys live happily in his ...
1       When siblings Judy and Peter discover an encha...
2       A family wedding reignites the ancient feud be...
3       Cheated on, mistreated and stepped on, the wom...
4       Just when George Banks has recovered from his ...
                              ...                        
9094    From the mind behind Evangelion comes a hit la...
9095    The band stormed Europe in 1963, and, in 1964,...
9096    When Molly Hale's sadness of her father's disa...
9097    All your favorite Pokémon characters are back,...
9098    While holidaying in the French Alps, a Swedish...
Name: description, Length: 9099, dtype: object

In [51]:
def graph_result(result):
    list_result_dataframe = []
     
    for i in result:
         list_result_dataframe.append(graph.iloc[i]) 

    list_result_dataframe = pd.DataFrame(list_result_dataframe)                
    return HTML(list_result_dataframe.to_html(escape = False))

### Content-Based By Title And Description Using Sklearn

In [52]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1,2),min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(smd['description'])

In [53]:
tfidf_matrix.shape
# có 9099 vector tương ướng với 9099 overview,
# mỗi vector có 268124 chiều 

(9099, 268124)

In [54]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Compute the cosine similarity matrix

In [55]:
cosine_sim[0]

array([1.        , 0.00680476, 0.        , ..., 0.        , 0.00344913,
       0.        ])

In [56]:
cosine_sim[1]

array([0.00680476, 1.        , 0.01531062, ..., 0.00357057, 0.00762326,
       0.        ])

In [57]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [58]:
indices

title
Toy Story                                                0
Jumanji                                                  1
Grumpier Old Men                                         2
Waiting to Exhale                                        3
Father of the Bride Part II                              4
                                                      ... 
Shin Godzilla                                         9094
The Beatles: Eight Days a Week - The Touring Years    9095
Pokémon: Spell of the Unknown                         9096
Pokémon 4Ever: Celebi - Voice of the Forest           9097
Force Majeure                                         9098
Length: 9099, dtype: int64

In [59]:
smd[smd['title'] == "The Avengers" ]

Unnamed: 0.1,level_0,Unnamed: 0,index,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,description,year
1708,1708,1708,2044,False,,60000000,"[{'id': 53, 'name': 'Thriller'}]",,9320,tt0118661,...,89.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Saving the World in Style.,The Avengers,False,4.4,205.0,"British Ministry agent John Steed, under direc...",1998
7869,7869,7869,17818,False,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[{'id': 878, 'name': 'Science Fiction'}, {'id'...",http://marvel.com/avengers_movie/,24428,tt0848228,...,143.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Some assembly required.,The Avengers,False,7.4,12000.0,When an unexpected enemy emerges and threatens...,2012


In [60]:
indices['The Dark Knight']

idx = []
idx.append(indices['The Avengers'])
for i in idx:
    print(i)

The Avengers    1708
The Avengers    7869
dtype: int64


In [61]:
def get_recommendations(title,k):
    # Get the index of the movie that matches the title
    idx = []
    idx.append(indices[title])
    result = {}
    
    for i in idx:
                # Get the pairwsie similarity scores of all movies with that movie
                sim_scores = list(enumerate(cosine_sim[i]))

                # Sort the movies based on the similarity scores
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

                # Get the scores of the 10 most similar movies
                #(element number 0 is the movie it self)
                sim_scores = sim_scores[1:11]

                movie_indices = [k[0] for k in sim_scores]
                result[i,title] = movie_indices[:k]
                
    return result    

In [62]:
get_recommendations('The Dark Knight',10)

{(6900, 'The Dark Knight'): [7931,
  132,
  1113,
  8227,
  7565,
  524,
  7901,
  2579,
  2696,
  8165]}

In [63]:
result = get_recommendations('The Dark Knight',10)
html = ""

for i in result:
        print(i[0]," - ",i[1])
        display(graph_result(result[i]))
        print("-------------------------------")   

6900  -  The Dark Knight


Unnamed: 0,poster_path,title,year,vote_count,vote_average,genres
7931,,The Dark Knight Rises,2012,9263.0,7.6,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}]"
132,,Batman Forever,1995,1529.0,5.2,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 14, 'name': 'Fantasy'}]"
1113,,Batman Returns,1992,1706.0,6.6,"[{'id': 28, 'name': 'Action'}, {'id': 14, 'name': 'Fantasy'}]"
8227,,"Batman: The Dark Knight Returns, Part 2",2013,426.0,7.9,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"
7565,,Batman: Under the Red Hood,2010,459.0,7.6,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"
524,,Batman,1989,2145.0,7.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'name': 'Action'}]"
7901,,Batman: Year One,2011,255.0,7.1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 16, 'name': 'Animation'}, {'id': 80, 'name': 'Crime'}, {'id': 878, 'name': 'Science Fiction'}]"
2579,,Batman: Mask of the Phantasm,1993,218.0,7.4,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 16, 'name': 'Animation'}, {'id': 10751, 'name': 'Family'}]"
2696,,JFK,1991,513.0,7.5,"[{'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}, {'id': 36, 'name': 'History'}]"
8165,,"Batman: The Dark Knight Returns, Part 1",2012,410.0,7.7,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"


-------------------------------


### Content-Based By Title And Description From Scatch

In [64]:
from ipynb.fs.full.Description_Title_Content_Base_Recommender import cosine_similarity_ver2
graph_result(list(cosine_similarity_ver2(11,"The Dark Knight")))






Unnamed: 0,poster_path,title,year,vote_count,vote_average,genres
7931,,The Dark Knight Rises,2012,9263.0,7.6,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}, {'id': 53, 'name': 'Thriller'}]"
8165,,"Batman: The Dark Knight Returns, Part 1",2012,410.0,7.7,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"
132,,Batman Forever,1995,1529.0,5.2,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 14, 'name': 'Fantasy'}]"
8227,,"Batman: The Dark Knight Returns, Part 2",2013,426.0,7.9,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"
6144,,Batman Begins,2005,7511.0,7.5,"[{'id': 28, 'name': 'Action'}, {'id': 80, 'name': 'Crime'}, {'id': 18, 'name': 'Drama'}]"
2579,,Batman: Mask of the Phantasm,1993,218.0,7.4,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 16, 'name': 'Animation'}, {'id': 10751, 'name': 'Family'}]"
1113,,Batman Returns,1992,1706.0,6.6,"[{'id': 28, 'name': 'Action'}, {'id': 14, 'name': 'Fantasy'}]"
524,,Batman,1989,2145.0,7.0,"[{'id': 14, 'name': 'Fantasy'}, {'id': 28, 'name': 'Action'}]"
7901,,Batman: Year One,2011,255.0,7.1,"[{'id': 28, 'name': 'Action'}, {'id': 12, 'name': 'Adventure'}, {'id': 16, 'name': 'Animation'}, {'id': 80, 'name': 'Crime'}, {'id': 878, 'name': 'Science Fiction'}]"
7565,,Batman: Under the Red Hood,2010,459.0,7.6,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'name': 'Animation'}]"


## Metadata Based Recommender

### Merging cast crew keyword data into main dataframe

In [65]:
smd =  pd.read_csv('../the-movies-dataset/movies_metadata_merge_crew_keywords.csv')
graph = smd[['poster_path','title','year','vote_count','vote_average','genres']]

### Using Sklearn Library

In [None]:
from ipynb.fs.full.Cast_Director_Gernes_Content_Base_Recommender import  get_recommendations
result = get_recommendations("The Dark Knight",11)
for i in result:
    list_movie_result = result[i] 
graph_result(list_movie_result)    

### From Scatch

In [None]:
from ipynb.fs.full.Cast_Director_Gernes_Content_Base_Recommender import  get_recommendations_ver2

In [None]:
result = get_recommendations_ver2("The Dark Knight",11)
for i in result:
    list_movie_result = result[i] 
graph_result(list_movie_result)    

## Collaborative Filtering 