# MOVIE RECOMMENDER ENGINE

## Library

In [1]:
%matplotlib inline
import ast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import seaborn as sns
import operator
import collections


from IPython.display import Image, HTML, display
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from surprise.model_selection import cross_validate
from surprise import Reader, SVD, Dataset


import warnings; warnings.simplefilter('ignore')

## LOAD DataFrame

In [2]:
md = pd.read_csv('../the-movies-dataset/movies_metadata_equal_ratings_ver1.csv')
train = pd.read_csv('../the-movies-dataset/ratings_train.csv')

In [3]:
N = len(md)

In [4]:
N

9025

In [5]:
md.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9025 entries, 0 to 9024
Data columns (total 26 columns):
adult                    9025 non-null bool
belongs_to_collection    1670 non-null object
budget                   9025 non-null int64
genres                   9025 non-null object
homepage                 1954 non-null object
id                       9025 non-null int64
imdb_id                  9025 non-null object
original_language        9025 non-null object
original_title           9025 non-null object
overview                 9013 non-null object
popularity               9025 non-null float64
poster_path              9022 non-null object
production_companies     9025 non-null object
production_countries     9025 non-null object
release_date             9025 non-null object
revenue                  9025 non-null float64
runtime                  9025 non-null float64
spoken_languages         9025 non-null object
status                   9023 non-null object
tagline             

In [6]:
md.head().transpose()[:20]

Unnamed: 0,0,1,2,3,4
adult,False,False,False,False,False
belongs_to_collection,"{'id': 10194, 'name': 'Toy Story Collection', ...",,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"{'id': 96871, 'name': 'Father of the Bride Col..."
budget,30000000,65000000,0,16000000,0
genres,"['Animation', 'Comedy', 'Family']","['Adventure', 'Fantasy', 'Family']","['Romance', 'Comedy']","['Comedy', 'Drama', 'Romance']",['Comedy']
homepage,http://toystory.disney.com/toy-story,,,,
id,862,8844,15602,31357,11862
imdb_id,tt0114709,tt0113497,tt0113228,tt0114885,tt0113041
original_language,en,en,en,en,en
original_title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II
overview,"Led by Woody, Andy's toys live happily in his ...",When siblings Judy and Peter discover an encha...,A family wedding reignites the ancient feud be...,"Cheated on, mistreated and stepped on, the wom...",Just when George Banks has recovered from his ...


In [7]:
md['genres'] = md['genres'].apply(ast.literal_eval)

In [8]:
base_poster_url = 'http://image.tmdb.org/t/p/w185'
md['poster_path'] = "<img src='" + base_poster_url + md['poster_path'] + "' style='height:100px;'>"


In [9]:
md['poster_path']

0       <img src='http://image.tmdb.org/t/p/w185/rhIRb...
1       <img src='http://image.tmdb.org/t/p/w185/vzmL6...
2       <img src='http://image.tmdb.org/t/p/w185/6ksm1...
3       <img src='http://image.tmdb.org/t/p/w185/16XOM...
4       <img src='http://image.tmdb.org/t/p/w185/e64sO...
                              ...                        
9020    <img src='http://image.tmdb.org/t/p/w185/jcP3H...
9021    <img src='http://image.tmdb.org/t/p/w185/yWp7P...
9022    <img src='http://image.tmdb.org/t/p/w185/q1lrN...
9023    <img src='http://image.tmdb.org/t/p/w185/q2XVe...
9024    <img src='http://image.tmdb.org/t/p/w185/uv7sy...
Name: poster_path, Length: 9025, dtype: object

## Demographic Filtering - Weighted Rating

In [10]:
# IMDB Top Movies Chart Fomula : 
# WR = (v/(v+m).R) + (m/(v+m).C)
# v : số  lượng người bình chọn cho 1 bộ phim 
# m : số lượng bình chọn cần thiếu để được xét vào list - Một phim phải 
# có lượng vote > 95% các phim khác trong list
# R : rating  

In [11]:
vote_counts = md[md['vote_count'].notnull()]['vote_count']

In [12]:
vote_averages = md[md['vote_average'].notnull()]['vote_average']

In [13]:
# mean() function can be used to calculate 
# mean/average of a given list of numbers

C = vote_averages.mean()

In [14]:
# rating trung binh tren toan tap dataset
C

6.362227146814405

In [15]:
# Tính phân vị - tìm giá trị m sao cho 95% giá trị trong list bé hơn m
# 5% còn lại lớn hơn nó 
m = vote_counts.quantile(0.95)

In [16]:
m

2085.599999999995

In [17]:
qualified = md[(md['vote_count'] >= m) & (md['vote_count'].notnull()) 
            & (md['vote_average'].notnull())]

In [18]:
qualified = qualified[['poster_path','title','year','vote_count','vote_average','genres']]

In [19]:
qualified.shape

(452, 6)

In [20]:
# sumary : một phim cần phải đạt trên 434 votes để  được xét 
#        : điểm rating trung bình là 5.2 
#        : có 2274 phim đạt yêu cầu 

In [21]:
def weighted_rating(x,m,C):
    v = x['vote_count']
    R = x['vote_average']
    
    return (v/(m+v) * R) + (m/(m+v) * C)

In [22]:
qualified['wr'] = md.apply(lambda x: weighted_rating(x,m,C), axis=1)

In [22]:
# ascending : tăng dần 

qualified = qualified.sort_values('wr', ascending = False)

In [23]:
HTML(qualified.to_html(escape = False))

Unnamed: 0,poster_path,title,year,vote_count,vote_average,genres,wr
284,,The Shawshank Redemption,1994,8358.0,8.5,"[Drama, Crime]",8.073084
6873,,The Dark Knight,2008,12269.0,8.3,"[Drama, Action, Crime, Thriller]",8.018458
2369,,Fight Club,1999,9678.0,8.3,[Drama],7.956447
692,,The Godfather,1972,6024.0,8.5,"[Drama, Crime]",7.950215
266,,Pulp Fiction,1994,8670.0,8.3,"[Thriller, Crime]",7.92425
7526,,Inception,2010,14075.0,8.1,"[Action, Thriller, Science Fiction, Mystery, Adventure]",7.875732
8472,,Interstellar,2014,11187.0,8.1,"[Adventure, Drama, Science Fiction]",7.826934
321,,Forrest Gump,1994,8147.0,8.2,"[Comedy, Drama, Romance]",7.825427
5009,,The Lord of the Rings: The Return of the King,2003,8226.0,8.1,"[Adventure, Fantasy, Action]",7.748522
949,,The Empire Strikes Back,1980,5998.0,8.2,"[Adventure, Action, Science Fiction]",7.725848


In [24]:
s = md.apply(lambda x: pd.Series(x['genres']),axis=1).stack().reset_index(level=1, drop=True)
s.name = 'genre'
gen_md = md.drop('genres', axis=1).join(s)

In [25]:
gen_md.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22321 entries, 0 to 9024
Data columns (total 26 columns):
adult                    22321 non-null bool
belongs_to_collection    4553 non-null object
budget                   22321 non-null int64
homepage                 4957 non-null object
id                       22321 non-null int64
imdb_id                  22321 non-null object
original_language        22321 non-null object
original_title           22321 non-null object
overview                 22298 non-null object
popularity               22321 non-null float64
poster_path              22314 non-null object
production_companies     22321 non-null object
production_countries     22321 non-null object
release_date             22321 non-null object
revenue                  22321 non-null float64
runtime                  22321 non-null float64
spoken_languages         22321 non-null object
status                   22317 non-null object
tagline                  17830 non-null object
ti

### Filter By Genre

In [26]:
def build_chart(genre, percentile):
    df = gen_md[gen_md['genre'] == genre]
    vote_counts = df[df['vote_count'].notnull()]['vote_count'].astype('int')
    vote_averages = df[df['vote_average'].notnull()]['vote_average'].astype('int')
    
    C = vote_averages.mean()
    m = vote_counts.quantile(percentile)
    
    qualified = df[(df['vote_count'] >= m) & (df['vote_count'].notnull()) & (df['vote_average'].notnull())]
    qualified = qualified[['poster_path','title','year','vote_count','vote_average','genre']]
    
    qualified['vote_count'] = qualified['vote_count'].astype('int')
    
    qualified['wr'] = qualified.apply(lambda x: weighted_rating(x,m,C), axis=1)
    
    qualified = qualified.sort_values('wr', ascending= False)
    
    return qualified



In [27]:
pop_gen = pd.DataFrame(gen_md['genre'].value_counts()).reset_index()
pop_gen.column = ['genre','movies']

In [28]:
list_genre =  pop_gen['index']
list_genre.shape

(20,)

In [29]:
HTML(build_chart(list_genre.loc[0],0.9).head(10).to_html(escape = False))

Unnamed: 0,poster_path,title,year,vote_count,vote_average,genre,wr
284,,The Shawshank Redemption,1994,8358,8.5,Drama,8.285582
692,,The Godfather,1972,6024,8.5,Drama,8.212499
6873,,The Dark Knight,2008,12269,8.3,Drama,8.162215
2369,,Fight Club,1999,9678,8.3,Drama,8.12822
321,,Forrest Gump,1994,8147,8.2,Drama,8.008085
8472,,Interstellar,2014,11187,8.1,Drama,7.963518
472,,Schindler's List,1993,4436,8.3,Drama,7.957018
8551,,Whiplash,2014,4376,8.3,Drama,7.95306
7935,,The Intouchables,2011,5410,8.2,Drama,7.923822
1835,,Life Is Beautiful,1997,3643,8.3,Drama,7.896127


## Movie Description Based Recommender

In [10]:
def graph_result(result):
    md_graph = md[['poster_path','title','description','year','vote_count','vote_average']].copy()
    list_result_dataframe = []
     
    for i in result:
         list_result_dataframe.append(md_graph.iloc[i]) 

    list_result_dataframe = pd.DataFrame(list_result_dataframe)                
    return HTML(list_result_dataframe.to_html(escape = False))

### Content-Based By Title And Description Using Sklearn

In [17]:
md.description = md['description'].fillna('')

In [18]:
tf = TfidfVectorizer(analyzer='word',ngram_range=(1, 2),min_df=0, stop_words='english',use_idf=True)
tfidf_matrix = tf.fit_transform(md['description']+' '+md['title'])

In [19]:
tfidf_matrix.shape
# có 9099 vector tương ướng với 9099 overview,
# mỗi vector có 268124 chiều 

(9025, 281313)

In [20]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
# Compute the cosine similarity matrix

In [33]:
import scipy.sparse
scipy.sparse.save_npz('test_sparse.npz', cosine_sim)

AttributeError: 'numpy.ndarray' object has no attribute 'format'

In [21]:
cosine_sim.shape

(9025, 9025)

In [23]:
cosine_sim[0]

array([1.        , 0.00614994, 0.        , ..., 0.        , 0.        ,
       0.00598153])

In [24]:
cosine_sim[1]

array([0.00614994, 1.        , 0.01435999, ..., 0.        , 0.        ,
       0.00538877])

In [25]:
md = md.reset_index()
titles = md['title']
indices = pd.Series(md.index, index=md['title'])

In [26]:
indices

title
Toy Story                                                0
Jumanji                                                  1
Grumpier Old Men                                         2
Waiting to Exhale                                        3
Father of the Bride Part II                              4
                                                      ... 
Sharknado 4: The 4th Awakens                          9020
The Last Brickmaker in America                        9021
Rustom                                                9022
Mohenjo Daro                                          9023
The Beatles: Eight Days a Week - The Touring Years    9024
Length: 9025, dtype: int64

In [27]:
md[md['title'] == "The Avengers" ]

Unnamed: 0,index,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,description,year
1707,1707,False,,60000000,[Thriller],,9320,tt0118661,en,The Avengers,...,89.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Saving the World in Style.,The Avengers,False,4.4,205.0,"British Ministry agent John Steed, under direc...",1998
7839,7839,False,"{'id': 86311, 'name': 'The Avengers Collection...",220000000,"[Science Fiction, Action, Adventure]",http://marvel.com/avengers_movie/,24428,tt0848228,en,The Avengers,...,143.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Some assembly required.,The Avengers,False,7.4,12000.0,When an unexpected enemy emerges and threatens...,2012


In [28]:
def get_recommendations(title):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [29]:
get_recommendations('The Godfather').head(10)

973      The Godfather: Part II
8354                 The Family
3508                       Made
1581    The Godfather: Part III
4191         Johnny Dangerously
5651                       Fury
2411             American Movie
29               Shanghai Triad
7730              Henry's Crime
3287          Jaws: The Revenge
Name: title, dtype: object

In [30]:
def get_recommendations(k,title):
    # Get the index of the movie that matches the title
    idx = []
    idx.append(indices[title])
    print(idx)
    result = {}
    
    for i in idx:
                # Get the pairwsie similarity scores of all movies with that movie
                sim_scores = list(enumerate(cosine_sim[i]))

                # Sort the movies based on the similarity scores
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                
                # Get the scores of the 10 most similar movies
                #(element number 0 is the movie it self)
                sim_scores = sim_scores[1:11]

                movie_indices = [k[0] for k in sim_scores]
                result[i,title] = movie_indices[:k]
                
    return result    

In [31]:
get_recommendations(10,'The Dark Knight')

[6873]


{(6873, 'The Dark Knight'): [7901,
  132,
  524,
  8196,
  2578,
  1113,
  8134,
  7537,
  7871,
  6125]}

In [32]:
result = get_recommendations(10,'The Dark Knight')
html = ""

for i in result:
        print(i[0]," - ",i[1])
        display(graph_result(result[i]))
        print("-------------------------------")   

[6873]
6873  -  The Dark Knight


Unnamed: 0,poster_path,title,description,year,vote_count,vote_average
7901,,The Dark Knight Rises,"Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.The Legend Ends",2012,9263.0,7.6
132,,Batman Forever,"The Dark Knight of Gotham City confronts a dastardly duo: Two-Face and the Riddler. Formerly District Attorney Harvey Dent, Two-Face believes Batman caused the courtroom accident which left him disfigured on one side. And Edward Nygma, computer-genius and former employee of millionaire Bruce Wayne, is out to get the philanthropist; as The Riddler. Former circus acrobat Dick Grayson, his family killed by Two-Face, becomes Wayne's ward and Batman's new partner Robin.Courage now, truth always...",1995,1529.0,5.2
524,,Batman,"The Dark Knight of Gotham City begins his war on crime with his first major enemy being the clownishly homicidal Joker, who has seized control of Gotham's underworld.Have you ever danced with the devil in the pale moonlight?",1989,2145.0,7.0
8196,,"Batman: The Dark Knight Returns, Part 2",Batman has stopped the reign of terror that The Mutants had cast upon his city. Now an old foe wants a reunion and the government wants The Man of Steel to put a stop to Batman.Justice Returns... Vengeance Returns... Redemption Comes to Gotham.,2013,426.0,7.9
2578,,Batman: Mask of the Phantasm,"An old flame of Bruce Wayne's strolls into town, re-heating up the romance between the two. At the same time, a mass murderer with an axe for one hand begins systematically eliminating Gotham's crime bosses. Due to the person's dark appearance, he is mistaken for Batman. Now on the run, Batman must solve the mystery and deal with the romance between him and Andrea Beaumont.The Dark Knight fights to save Gotham city from its deadliest enemy.",1993,218.0,7.4
1113,,Batman Returns,"Having defeated the Joker, Batman now faces the Penguin - a warped and deformed individual who is intent on being accepted into Gotham society. Crooked businessman Max Schreck is coerced into helping him become Mayor of Gotham and they both attempt to expose Batman in a different light. Selina Kyle, Max's secretary, is thrown from the top of a building and is transformed into Catwoman - a mysterious figure who has the same personality disorder as Batman. Batman must attempt to clear his name, all the time deciding just what must be done with the Catwoman.The Bat, the Cat, the Penguin.",1992,1706.0,6.6
8134,,"Batman: The Dark Knight Returns, Part 1","Batman has not been seen for ten years. A new breed of criminal ravages Gotham City, forcing 55-year-old Bruce Wayne back into the cape and cowl. But, does he still have what it takes to fight crime in a new era?Old heroes never die. They just get darker.",2012,410.0,7.7
7537,,Batman: Under the Red Hood,"Batman faces his ultimate challenge as the mysterious Red Hood takes Gotham City by firestorm. One part vigilante, one part criminal kingpin, Red Hood begins cleaning up Gotham with the efficiency of Batman, but without following the same ethical code.Dare to Look Beneath the Hood.",2010,459.0,7.6
7871,,Batman: Year One,"Two men come to Gotham City: Bruce Wayne after years abroad feeding his lifelong obsession for justice and Jim Gordon after being too honest a cop with the wrong people elsewhere. After learning painful lessons about the city's corruption on its streets and police department respectively, this pair learn how to fight back their own way. With that, Gotham's evildoers from top to bottom are terrorized by the mysterious Batman and the equally heroic Gordon is assigned to catch him by comrades who both hate and fear him themselves. In the ensuing manhunt, both find much in common as the seeds of an unexpected friendship are laid with additional friends and rivals helping to start the legend.A merciless crime turns a man into an outlaw.",2011,255.0,7.1
6125,,Batman Begins,"Driven by tragedy, billionaire Bruce Wayne dedicates his life to uncovering and defeating the corruption that plagues his home, Gotham City. Unable to work within the system, he instead creates a new identity, a symbol of fear for the criminal underworld - The Batman.Evil fears the knight.",2005,7511.0,7.5


-------------------------------


### Content-Based By Title And Description From Scatch

In [45]:
from ipynb.fs.full.Description_Title_Content_Base_Recommender import cosine_similarity_ver2
cosine_similarity_ver2(11,"The Dark Knight")






array([7901, 8134,  132, 8196, 6125, 2578, 1113,  524, 7871, 7537])

In [46]:
graph_result(list(cosine_similarity_ver2(11,"The Dark Knight")))




Unnamed: 0,poster_path,title,description,year,vote_count,vote_average
7901,,The Dark Knight Rises,"Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent's crimes to protect the late attorney's reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham's finest. The Dark Knight resurfaces to protect a city that has branded him an enemy.The Legend Ends",2012,9263.0,7.6
8134,,"Batman: The Dark Knight Returns, Part 1","Batman has not been seen for ten years. A new breed of criminal ravages Gotham City, forcing 55-year-old Bruce Wayne back into the cape and cowl. But, does he still have what it takes to fight crime in a new era?Old heroes never die. They just get darker.",2012,410.0,7.7
132,,Batman Forever,"The Dark Knight of Gotham City confronts a dastardly duo: Two-Face and the Riddler. Formerly District Attorney Harvey Dent, Two-Face believes Batman caused the courtroom accident which left him disfigured on one side. And Edward Nygma, computer-genius and former employee of millionaire Bruce Wayne, is out to get the philanthropist; as The Riddler. Former circus acrobat Dick Grayson, his family killed by Two-Face, becomes Wayne's ward and Batman's new partner Robin.Courage now, truth always...",1995,1529.0,5.2
8196,,"Batman: The Dark Knight Returns, Part 2",Batman has stopped the reign of terror that The Mutants had cast upon his city. Now an old foe wants a reunion and the government wants The Man of Steel to put a stop to Batman.Justice Returns... Vengeance Returns... Redemption Comes to Gotham.,2013,426.0,7.9
6125,,Batman Begins,"Driven by tragedy, billionaire Bruce Wayne dedicates his life to uncovering and defeating the corruption that plagues his home, Gotham City. Unable to work within the system, he instead creates a new identity, a symbol of fear for the criminal underworld - The Batman.Evil fears the knight.",2005,7511.0,7.5
2578,,Batman: Mask of the Phantasm,"An old flame of Bruce Wayne's strolls into town, re-heating up the romance between the two. At the same time, a mass murderer with an axe for one hand begins systematically eliminating Gotham's crime bosses. Due to the person's dark appearance, he is mistaken for Batman. Now on the run, Batman must solve the mystery and deal with the romance between him and Andrea Beaumont.The Dark Knight fights to save Gotham city from its deadliest enemy.",1993,218.0,7.4
1113,,Batman Returns,"Having defeated the Joker, Batman now faces the Penguin - a warped and deformed individual who is intent on being accepted into Gotham society. Crooked businessman Max Schreck is coerced into helping him become Mayor of Gotham and they both attempt to expose Batman in a different light. Selina Kyle, Max's secretary, is thrown from the top of a building and is transformed into Catwoman - a mysterious figure who has the same personality disorder as Batman. Batman must attempt to clear his name, all the time deciding just what must be done with the Catwoman.The Bat, the Cat, the Penguin.",1992,1706.0,6.6
524,,Batman,"The Dark Knight of Gotham City begins his war on crime with his first major enemy being the clownishly homicidal Joker, who has seized control of Gotham's underworld.Have you ever danced with the devil in the pale moonlight?",1989,2145.0,7.0
7871,,Batman: Year One,"Two men come to Gotham City: Bruce Wayne after years abroad feeding his lifelong obsession for justice and Jim Gordon after being too honest a cop with the wrong people elsewhere. After learning painful lessons about the city's corruption on its streets and police department respectively, this pair learn how to fight back their own way. With that, Gotham's evildoers from top to bottom are terrorized by the mysterious Batman and the equally heroic Gordon is assigned to catch him by comrades who both hate and fear him themselves. In the ensuing manhunt, both find much in common as the seeds of an unexpected friendship are laid with additional friends and rivals helping to start the legend.A merciless crime turns a man into an outlaw.",2011,255.0,7.1
7537,,Batman: Under the Red Hood,"Batman faces his ultimate challenge as the mysterious Red Hood takes Gotham City by firestorm. One part vigilante, one part criminal kingpin, Red Hood begins cleaning up Gotham with the efficiency of Batman, but without following the same ethical code.Dare to Look Beneath the Hood.",2010,459.0,7.6


### User Vector

In [10]:
Yhat = np.load('tf_idf_pred_rating.npy')

In [11]:
len(Yhat)

9025

In [12]:
ratings_train = train[['userId','id','rating','movieIndex']]
ratings_matrix = ratings_train.values

In [13]:
# this fuction return movie_index in meta_data_dataset and ratings of user  
def get_items_rated_by_user(rate_matrix, user_id):
    y = rate_matrix[:,0] # get all the value in user_id column
    print(y)
    # get list of user_id index in y which value equal input 
    # user_id
    ids = np.where(y == user_id)[0]
    # get list movie_id matching user_id 
    item_ids = rate_matrix[ids, 3]
    item_ids = [int(i) for i in item_ids]
    
    scores = rate_matrix[ids,2]
    return (item_ids, scores)

In [14]:
def get_list_movie_have_not_rated(rate_matrix, userId,movie_count):
    user_rated, score = get_items_rated_by_user(rate_matrix,userId)
    result = []
    for i in range(movie_count):
         if i not in user_rated: result.append(i)
    return (userId, result)            

In [15]:
def TF_IDF_Content_Based_Recommender(rate_matrix, userId,movie_count):
    userId, list_not_rate =  get_list_movie_have_not_rated(rate_matrix, userId,movie_count)
    result = {}
    for i in list_not_rate:
        result[i] = Yhat[i, userId]
    
    result = sorted(result.items(), key=operator.itemgetter(1),reverse=True)
    result = dict(result)
    return result

In [16]:
def TF_IDF_Content_Based_Recommender2(rate_matrix, userId,movie_count):
    userId, list_not_rate =  get_list_movie_have_not_rated(rate_matrix, userId,movie_count)
    result = {}
    for i in list_not_rate:
        result[i] = Yhat[i, userId]
    
    result = dict(result)
    return result

## Metadata Based Recommender

### Merging cast crew keyword data into main dataframe

In [17]:
smd =  pd.read_csv('../the-movies-dataset/movies_metadata_merge_crew_keywords.csv')
graph = smd[['poster_path','title','year','vote_count','vote_average','genres']]

### Using Sklearn Library

In [18]:
from ipynb.fs.full.Cast_Director_Gernes_Content_Base_Recommender import  get_recommendations
result = get_recommendations("The Dark Knight",11)
for i in result:
    list_movie_result = result[i] 
graph_result(list_movie_result)    

KeyboardInterrupt: 

### From Scatch

In [None]:
from ipynb.fs.full.Cast_Director_Gernes_Content_Base_Recommender import  get_recommendations_ver2

In [None]:
result = get_recommendations_ver2("The Dark Knight",11)
for i in result:
    list_movie_result = result[i] 
graph_result(list_movie_result)    

## Collaborative Filtering 

In [19]:
f = open('KNN_Mean_predict_rating_result.pckl', 'rb')
KNN_predict_rating_result = pickle.load(f)
f.close()

In [20]:
type(KNN_predict_rating_result)

dict

In [21]:
user0 = {}
for i in KNN_predict_rating_result:
    if i[0] == 0: user0[i[1]] = KNN_predict_rating_result[i] 
len(user0)   

9010

In [22]:
def collaborative_fillter_recommender(userId,predict_rating_result):
    result = {}
    for i in predict_rating_result:
        if i[0] == userId:
            result[i[1]] = predict_rating_result[i]
    result = sorted(result.items(), key=operator.itemgetter(1),reverse=True)
    result = dict(result)
    return result       

## Hybrid recommender ver 1

In [23]:
def Hibrid_Recommender(user_id):
    const_x = 3
    content_based_list = TF_IDF_Content_Based_Recommender(ratings_matrix,user_id,N)
    collaborative_based_list = collaborative_fillter_recommender(user_id,KNN_predict_rating_result)
    hybrid_movie_list = collaborative_based_list
    
    #for i in collaborative_based_list:
          #collaborative_based_list[i] = content_based_list[i]*0.4 + collaborative_based_list[i]*0.6      
            #print(content_based_list[i],collaborative_based_list[i]) 
    print(len(content_based_list),len(collaborative_based_list))
    
    result = sorted(hybrid_movie_list.items(), key=operator.itemgetter(1),reverse=True)
    result = dict(result)
    return result

In [24]:
content_based_list = TF_IDF_Content_Based_Recommender(ratings_matrix,0,N)
collaborative_based_list = collaborative_fillter_recommender(0,KNN_predict_rating_result)

[  0.   0.   0. ... 670. 670. 670.]


In [25]:
TF_IDF_Content_Based_Recommender2(ratings_matrix,0,N)

[  0.   0.   0. ... 670. 670. 670.]


{0: 2.418331634215756,
 1: 2.420680837319464,
 2: 2.4215696422827664,
 3: 2.4241181973381685,
 4: 2.421797526304559,
 5: 2.4321669349767925,
 6: 2.421830450323965,
 7: 2.4229540676386176,
 8: 2.427536708988026,
 9: 2.409453117788023,
 10: 2.4192513080059443,
 11: 2.519410224120131,
 12: 2.422810951430595,
 13: 2.4235431003379446,
 14: 2.4171652114399365,
 15: 2.4215741004774776,
 16: 2.4306164505891616,
 17: 2.421810153803982,
 18: 2.418055729896069,
 19: 2.4151163703778016,
 20: 2.424113141344164,
 21: 2.4201186751147024,
 22: 2.4200810497560528,
 23: 2.423375484610004,
 24: 2.4243058135492177,
 25: 2.4242273168457222,
 26: 2.4182664774249734,
 27: 2.422278026192373,
 28: 2.406855768880845,
 29: 2.4200388175282646,
 31: 2.4230478979753483,
 32: 2.420679001589983,
 33: 2.4215577087735505,
 34: 2.4269139723909308,
 35: 2.397423427978889,
 36: 2.4152693672841337,
 37: 2.4169555949039836,
 38: 2.4214540237857416,
 39: 2.4206792407851885,
 40: 2.4234246209910646,
 41: 2.4188762045151453,
 

In [85]:
list_movie = []
for i in content_based_list:
    list_movie.append(i)
len(list_movie)    

8962

### Content Based

In [81]:
list_movie = []
for i in content_based_list:
    list_movie.append(i)
graph_result(list_movie[0:10])  

Unnamed: 0,poster_path,title,description,year,vote_count,vote_average
2114,,Dracula,The legend of vampire Count Dracula begins here with this original 1931 Dracula film from Bela Lugosi.The story of the strangest passion the world has ever known!,1931,254.0,7.2
4238,,Horror of Dracula,"After Jonathan Harker attacks Dracula at his castle (apparently somewhere in Germany), the vampire travels to a nearby city, where he preys on the family of Harker's fiancée. The only one who may be able to protect them is Dr. van Helsing, Harker's friend and fellow-student of vampires, who is determined to destroy Dracula, whatever the cost.Who will his bride be tonight?",1958,124.0,7.2
1644,,Popeye,"Popeye is a super-strong, spinach-scarfing sailor man who's searching for his father. During a storm that wrecks his ship, Popeye washes ashore and winds up rooming at the Oyl household, where he meets Olive. Before he can win her heart, he must first contend with Olive's fiancé, Bluto.The sailor man with the spinach can!",1980,157.0,5.3
3225,,Dracula 2000,"In the millenium version of this classic Gothic horror we find Abraham Van Helsing (Plummer), who has tangled with Count Dracula (Butler) in the past, working as an English antiques dealer. Simon (Miller) is a vampire hunter in training under his apprenticeship.The Most Seductive Evil of All Time Has Now Been Unleashed in Ours.",2000,150.0,4.6
1070,,Blood for Dracula,"Deathly ill Count Dracula and his slimy underling, Anton, travel to Italy in search of a virgin's blood. They're welcomed at the crumbling estate of indebted Marchese Di Fiore, who's desperate to marry off his daughters to rich suitors. But there, instead of pure women, the count encounters incestuous lesbians with vile blood and Marxist manservant Mario, who's suspicious of the aristocratic Dracula.He couldn't live without a virgin's blood..... ...So a virgin had to die!",1974,22.0,5.8
4734,,Dracula,"Romanticized adaptation of Bram Stoker's 1897 classic. Set in 1913 England, the bloodsucking, but handsome, charming and seductive, Count Dracula seeks an immortal bride.Throughout history he has filled the hearts of men with terror, and the hearts of women with desire.",1979,49.0,6.4
4151,,Nosferatu the Vampyre,"Jonathan Harker is sent away to Count Dracula's castle to sell him a house in Virna, where he lives. But Count Dracula is a vampire, an undead ghoul living of men's blood. Inspired by a photograph of Lucy Harker, Dracula moves to Virna, bringing with him death and plague. An unusually contemplative version of Dracula, in which the vampire bears the cross of not being able to get old and die.",1979,146.0,7.5
8611,,Dracula Untold,"Vlad Tepes is a great hero, but when he learns the Sultan is preparing for battle and needs to form an army of 1,000 boys, including Vlad's son, he vows to find a way to protect his family. Vlad turns to dark forces in order to get the power to destroy his enemies and agrees to go from hero to monster as he's turned into the mythological vampire Dracula.Every bloodline has a beginning",2014,2439.0,6.2
5142,,Van Helsing,Famed monster slayer Gabriel Van Helsing is dispatched to Transylvania to assist the last of the Valerious bloodline in defeating Count Dracula. Anna Valerious reveals that Dracula has formed an unholy alliance with Dr. Frankenstein's monster and is hell-bent on exacting a centuries-old curse on her family.The One Name They All Fear.,2004,1674.0,6.0
11,,Dracula: Dead and Loving It,"When a lawyer shows up at the vampire's doorstep, he falls prey to his charms and joins him in his search for fresh blood. Enter Dr. van Helsing, who may be the only one able to vanquish the count.",1995,210.0,5.7


In [82]:
len(list_movie)

9011

### Collaborative

In [62]:
list_movie = []
for i in collaborative_based_list:
    list_movie.append(i)
graph_result(list_movie[0:10])  

Unnamed: 0,poster_path,title,description,year,vote_count,vote_average
269,,Picture Bride,"Riyo, an orphaned 17-year old, sails from Yokohama to Hawaii in 1918 to marry Matsuji, a man she has never met. Hoping to escape a troubled past and start anew, Riyo is bitterly disappointed upon her arrival: her husband is twice her age. The miserable girl finds solace with her new friend Kana, a young mother who helps Riyo accept her new life.",1995,5.0,7.4
277,,"Red Firecracker, Green Firecracker","A woman inherits her father's fireworks factory, as he had no son. The business does well and everything works in an orderly fashion until one day, an itinerant painter is hired to decorate the doors and vases at the factory. The woman, forbidden to marry and thereby involve outsiders in the factory ownership, finds herself drawn to the headstrong painter. When they fall in love, the situation throws her entire life into disarray",1994,2.0,7.0
354,,The Secret Adventures of Tom Thumb,A boy born the size of a small doll is kidnapped by a genetic lab and must find a way back to his father in this inventive adventure filmed using stop motion animation techniques.A nursery crime of epic proportions...,1993,8.0,7.1
666,,Death in the Garden,"Amid a revolution in a South American mining outpost, a band of fugitives - a roguish adventurer, a local hooker, a priest, an aging diamond miner and his deaf-mute daughter - are forced to flee for their lives into the jungle. Starving, exhausted and stripped of their old identities, they wander desperately lured by one deceptive promise of salvation after another.Torrid!...Tantalizing!...Tempestuous!...",1956,8.0,7.4
700,,Vive L'Amour,"The film focuses on three city folks who unknowingly share the same apartment: Mei, a real estate agent who uses it for her sexual affairs; Ah-jung, her current lover; and Hsiao-ang, who's stolen the key and uses the apartment as a retreat.",1994,16.0,7.3
893,,Palookaville,"Jerry and his two pals, Russ and Syd, are just looking for some easy money to help them break out of their nowhere lives in their nowhere town. Despite a bungled jewelry store heist which exposes their incompetence as criminals, a fateful event (and an old black-and-white film) convinces them that they can pull off an armored-truck robbery. While they are busy plotting their caper, their dysfunctional families spin out of control, all around them.One foot in the door. The other one in the gutter.",1995,7.0,5.4
1154,,Angel Baby,Two schizophrenics meet during therapy and fall in love. Unfortunately they are on a road to nowhere...An extraordinary story of love's transcendent power.,1995,3.0,7.7
1240,,Dream with the Fishes,"Terry is a suicidal voyeur who treats a dying addict to a final binge, but Terry will only do this if he promises to kill him.An oddball odyssey about voyeurism, LSD and nude bowling!",1997,10.0,7.7
1248,,Gabbeh,"An elderly couple go about their routine of cleaning their gabbeh (a intricately-designed rug), while bickering gently with each other. Magically, a young woman appears, helping the two clean the rug. This young woman belongs to the clan whose history is depicted in the design of the gabbeh, and the rug recounts the story of the courtship of the young woman by a stranger from the clan.",1996,6.0,7.5
1802,,Bandit Queen,"Born a lower-caste girl in rural India's patriarchal society, ""married"" at 11, repeatedly raped and brutalized, Phooland Devi finds freedom only as an avenging warrior, the eponymous Bandit Queen. Devi becomes a kind a bloody Robin Hood; this extraordinary biographical film offers both a vivid portrait of a driven woman and a savage critique of the society that made her.Revenge was her reply.",1994,11.0,7.1


### Hybrid 

In [63]:
result = Hibrid_Recommender(0)

9011 9010


In [36]:
list_movie = []
for i in result:
    list_movie.append(i)
graph_result(list_movie[0:10])  

NameError: name 'result' is not defined

In [34]:
9025 - 14

9011