# Moteur de recommandations

In [1]:
%matplotlib inline

from IPython.display import display, HTML
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors

from fuzzywuzzy import fuzz

# Main data
X = pd.read_csv('data_raw.csv')

# Use only to compare results on key features
X_info = X[['movie_title', 'movie_imdb_link', 'imdb_score', 'title_year', 'duration', 
            'num_voted_users', 'genres', 'country', 'language', 'plot_keywords']]

# One-hot encodings
genres = pd.read_csv('ohe_genres.csv')
lang = pd.read_csv('ohe_lang.csv')
lang_red = pd.read_csv('ohe_lang_red.csv')
countries = pd.read_csv('ohe_countries.csv')
countries_red = pd.read_csv('ohe_countries_red.csv')



In [2]:
# PCA on language and country
pca1 = PCA(n_components=20)
lang_pca = pca1.fit_transform(lang)

pca2 = PCA(n_components=20)
countries_pca = pca2.fit_transform(countries)

## Fonctions utilitaires

In [3]:
def is_sequel(movie1, movie2):
    """ Tells if two movie titles are likely to be sequels. """
    #print(f"{movie1} | {movie2} | {fuzz.ratio(movie1, movie2)} | {fuzz.token_set_ratio(movie1, movie2)}")
    if fuzz.ratio(movie1, movie2) > 50 and fuzz.token_set_ratio(movie1, movie2) > 50:
        return True
    else:
        return False

In [4]:
def remove_sequels(id_movie, movie_list):
    """ Function to remove possible sequels in a movie list """
    final_list = [id_movie]
    sequels = []
    while len(final_list) < 6 and movie_list:
        movie = movie_list.pop(0)
        if any(is_sequel(X_info.iloc[movie].movie_title[:-1], 
                         X_info.iloc[x].movie_title[:-1]) for x in final_list):
            sequels.append(movie)
        else:
            final_list.append(movie)
    if not movie_list and len(final_list) < 6:
        final_list.extend(sequels[:6-len(final_list)])
    return final_list

In [5]:
def make_temp_df(X, id_movie, include_genres=True):
    """ 
        Generates a dataframe with features that tells if each movie is similar 
        to a particular movie (id_movie) in terms of language, country, keywords, genres.
    """
    X_temp = X.copy()[['imdb_score', 'title_year', 'duration', 'num_voted_users',
                       'language', 'country', 'genres', 'plot_keywords']]
    
    X_temp.fillna('', inplace=True)
    
    X_temp['language'] = X_temp.language == X_temp.iloc[id_movie].language
    X_temp['country'] = X_temp.country == X_temp.iloc[id_movie].country
    X_temp['keywords'] = X_temp.plot_keywords.apply(lambda x: x.split('|'))
    
    for i, kw in enumerate(X_temp.iloc[id_movie].plot_keywords.split('|')):
        X_temp[f'kw_{i}'] = X_temp.keywords.apply(lambda x: kw in x)
    
    if include_genres:
        for i, genre in enumerate(X_temp.iloc[id_movie].genres.split('|')):
            X_temp[f'genre_{i}'] = X_temp.genres.str.contains(genre)
    
    X_temp.drop(['plot_keywords', 'genres', 'keywords'], axis=1, inplace=True)
    
    return X_temp

## Méthode n°1: k-NNs sur données numériques + one-hot encodings

In [6]:
basic = X[['imdb_score', 'title_year', 'duration', 'num_voted_users']]

datasets = {
    'Minimal': basic,
    'Genres': pd.concat([basic, genres], axis=1),
    'Languages (no-PCA)': pd.concat([basic, lang], axis=1), 
    'Languages (PCA)': pd.concat([basic, pd.DataFrame(lang_pca)], axis=1), 
    'Languages (reduced)': pd.concat([basic, lang_red], axis=1),
    'Countries (no-PCA)': pd.concat([basic, countries], axis=1),
    'Countries (PCA)': pd.concat([basic, pd.DataFrame(countries_pca)], axis=1),
    'Countries (reduced)': pd.concat([basic, countries_red], axis=1),
    'Genres + Languages + Countries': pd.concat([basic, genres, lang, countries], axis=1)
}

for k, df in datasets.items():
    datasets[k] = StandardScaler().fit_transform(df)
    
neighbors = dict()
for name, df in datasets.items():
    neighbors[name] = NearestNeighbors(n_neighbors=16).fit(df)

In [7]:
def recommend(X, id_movie, subset=None):
    for name, df in datasets.items():
        if subset and name not in subset: continue
        print(f"\nMéthode n°1 - Dataset: {name}:")
        knn = neighbors[name].kneighbors(df[id_movie].reshape(1, -1), return_distance=False)
        movie_list = remove_sequels(id_movie, list(knn[0]))
        display(X_info.iloc[movie_list])

### 1) Language: PCA/no-PCA/reduced ? 

In [8]:
print("English movie")
recommend(X, 100, subset=['Languages (no-PCA)', 'Languages (PCA)', 'Languages (reduced)'])

print(100*"_")
print("Swedish movie")
recommend(X, 4583, subset=['Languages (no-PCA)', 'Languages (PCA)', 'Languages (reduced)'])

print(100*"_")
print("Dari (persian) movie")
recommend(X, 2126, subset=['Languages (no-PCA)', 'Languages (PCA)', 'Languages (reduced)'])

English movie

Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
100,Harry Potter and the Order of the Phoenix,http://www.imdb.com/title/tt0373889/?ref_=fn_t...,7.5,2007.0,138.0,355137,Adventure|Family|Fantasy|Mystery,UK,English,battle|magic|school|teacher|wizard
2524,The Hurt Locker,http://www.imdb.com/title/tt0887912/?ref_=fn_t...,7.6,2008.0,131.0,332065,Drama|History|Thriller|War,USA,English,army|bomb|dangerous job|death|iraq
139,Mission: Impossible - Ghost Protocol,http://www.imdb.com/title/tt1229238/?ref_=fn_t...,7.4,2011.0,133.0,365104,Action|Adventure|Thriller,USA,English,dubai|kremlin|race against time|russian|terrorist
243,Live Free or Die Hard,http://www.imdb.com/title/tt0337978/?ref_=fn_t...,7.2,2007.0,129.0,336235,Action|Adventure|Thriller,USA,English,fbi|hacker|independence day|police|terrorist
1107,American Hustle,http://www.imdb.com/title/tt1800241/?ref_=fn_t...,7.3,2013.0,138.0,358416,Crime|Drama,USA,English,based on true story|con artist|drunk wife|fbi ...
190,X-Men 2,http://www.imdb.com/title/tt0290334/?ref_=fn_t...,7.5,2003.0,134.0,405973,Action|Adventure|Fantasy|Sci-Fi|Thriller,Canada,English,mutant|prison|professor|school|x men



Méthode n°1 - Dataset: Languages (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
100,Harry Potter and the Order of the Phoenix,http://www.imdb.com/title/tt0373889/?ref_=fn_t...,7.5,2007.0,138.0,355137,Adventure|Family|Fantasy|Mystery,UK,English,battle|magic|school|teacher|wizard
2524,The Hurt Locker,http://www.imdb.com/title/tt0887912/?ref_=fn_t...,7.6,2008.0,131.0,332065,Drama|History|Thriller|War,USA,English,army|bomb|dangerous job|death|iraq
139,Mission: Impossible - Ghost Protocol,http://www.imdb.com/title/tt1229238/?ref_=fn_t...,7.4,2011.0,133.0,365104,Action|Adventure|Thriller,USA,English,dubai|kremlin|race against time|russian|terrorist
243,Live Free or Die Hard,http://www.imdb.com/title/tt0337978/?ref_=fn_t...,7.2,2007.0,129.0,336235,Action|Adventure|Thriller,USA,English,fbi|hacker|independence day|police|terrorist
1107,American Hustle,http://www.imdb.com/title/tt1800241/?ref_=fn_t...,7.3,2013.0,138.0,358416,Crime|Drama,USA,English,based on true story|con artist|drunk wife|fbi ...
190,X-Men 2,http://www.imdb.com/title/tt0290334/?ref_=fn_t...,7.5,2003.0,134.0,405973,Action|Adventure|Fantasy|Sci-Fi|Thriller,Canada,English,mutant|prison|professor|school|x men



Méthode n°1 - Dataset: Languages (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
100,Harry Potter and the Order of the Phoenix,http://www.imdb.com/title/tt0373889/?ref_=fn_t...,7.5,2007.0,138.0,355137,Adventure|Family|Fantasy|Mystery,UK,English,battle|magic|school|teacher|wizard
2524,The Hurt Locker,http://www.imdb.com/title/tt0887912/?ref_=fn_t...,7.6,2008.0,131.0,332065,Drama|History|Thriller|War,USA,English,army|bomb|dangerous job|death|iraq
139,Mission: Impossible - Ghost Protocol,http://www.imdb.com/title/tt1229238/?ref_=fn_t...,7.4,2011.0,133.0,365104,Action|Adventure|Thriller,USA,English,dubai|kremlin|race against time|russian|terrorist
243,Live Free or Die Hard,http://www.imdb.com/title/tt0337978/?ref_=fn_t...,7.2,2007.0,129.0,336235,Action|Adventure|Thriller,USA,English,fbi|hacker|independence day|police|terrorist
1107,American Hustle,http://www.imdb.com/title/tt1800241/?ref_=fn_t...,7.3,2013.0,138.0,358416,Crime|Drama,USA,English,based on true story|con artist|drunk wife|fbi ...
190,X-Men 2,http://www.imdb.com/title/tt0290334/?ref_=fn_t...,7.5,2003.0,134.0,405973,Action|Adventure|Fantasy|Sci-Fi|Thriller,Canada,English,mutant|prison|professor|school|x men


____________________________________________________________________________________________________
Swedish movie

Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
1997,Sorcerer,http://www.imdb.com/title/tt0076740/?ref_=fn_t...,7.7,1977.0,92.0,9513,Adventure|Drama|Thriller,USA,English,fire|jungle|money|oil drilling|truck
4722,Night of the Living Dead,http://www.imdb.com/title/tt0063350/?ref_=fn_t...,8.0,1968.0,96.0,87978,Drama|Horror|Mystery,USA,English,cemetery|farmhouse|radiation|running out of ga...



Méthode n°1 - Dataset: Languages (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
1753,The Passion of the Christ,http://www.imdb.com/title/tt0335345/?ref_=fn_t...,7.1,2004.0,120.0,179235,Drama,USA,Aramaic,anti semitism|cult film|grindhouse|suffering|t...
4598,Dogtooth,http://www.imdb.com/title/tt1379182/?ref_=fn_t...,7.3,2009.0,94.0,44864,Drama|Thriller,Greece,Greek,fellatio|isolated house|overprotective father|...



Méthode n°1 - Dataset: Languages (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
1997,Sorcerer,http://www.imdb.com/title/tt0076740/?ref_=fn_t...,7.7,1977.0,92.0,9513,Adventure|Drama|Thriller,USA,English,fire|jungle|money|oil drilling|truck
4722,Night of the Living Dead,http://www.imdb.com/title/tt0063350/?ref_=fn_t...,8.0,1968.0,96.0,87978,Drama|Horror|Mystery,USA,English,cemetery|farmhouse|radiation|running out of ga...


____________________________________________________________________________________________________
Dari (persian) movie

Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2126,The Kite Runner,http://www.imdb.com/title/tt0419887/?ref_=fn_t...,7.6,2007.0,128.0,68119,Drama,USA,Dari,afghanistan|based on novel|boy|friend|kite
4511,Osama,http://www.imdb.com/title/tt0368913/?ref_=fn_t...,7.4,2003.0,83.0,7559,Drama,Afghanistan,Dari,boy|girl|hair|school|taliban
2496,The Great Debaters,http://www.imdb.com/title/tt0427309/?ref_=fn_t...,7.6,2007.0,126.0,47626,Biography|Drama,USA,English,college|debate|student|texas|wiley college
2117,Reign Over Me,http://www.imdb.com/title/tt0490204/?ref_=fn_t...,7.5,2007.0,124.0,83786,Drama,USA,English,college|depression|grief|mental breakdown|post...
2188,"Synecdoche, New York",http://www.imdb.com/title/tt0383028/?ref_=fn_t...,7.5,2008.0,124.0,55842,Comedy|Drama|Romance,USA,English,actress|director|play|theatre director|warehouse
3168,The Wind That Shakes the Barley,http://www.imdb.com/title/tt0460989/?ref_=fn_t...,7.5,2006.0,127.0,36846,Drama|War,Ireland,English,civil war|colonialism|hatred|independence|rura...



Méthode n°1 - Dataset: Languages (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2126,The Kite Runner,http://www.imdb.com/title/tt0419887/?ref_=fn_t...,7.6,2007.0,128.0,68119,Drama,USA,Dari,afghanistan|based on novel|boy|friend|kite
4416,"4 Months, 3 Weeks and 2 Days",http://www.imdb.com/title/tt1032846/?ref_=fn_t...,7.9,2007.0,113.0,44763,Drama,Romania,Romanian,female rear nudity|friend|hotel|rainy night|towel
4298,Faith Like Potatoes,http://www.imdb.com/title/tt0850667/?ref_=fn_t...,6.9,2006.0,116.0,2050,Drama,South Africa,Zulu,food|food in title|potato
4300,The Raid: Redemption,http://www.imdb.com/title/tt1899353/?ref_=fn_t...,7.6,2011.0,102.0,148221,Action|Crime|Thriller,Indonesia,Indonesian,apartment|drug lord|gangster|police|swat team
3728,Samsara,http://www.imdb.com/title/tt0770802/?ref_=fn_t...,8.5,2011.0,102.0,22457,Documentary|Music,USA,,hall of mirrors|mont saint michel france|palac...
3891,Tsotsi,http://www.imdb.com/title/tt0468565/?ref_=fn_t...,7.3,2005.0,94.0,25063,Crime|Drama,UK,Zulu,baby|bathing a baby|criminal|slum|vomiting



Méthode n°1 - Dataset: Languages (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2126,The Kite Runner,http://www.imdb.com/title/tt0419887/?ref_=fn_t...,7.6,2007.0,128.0,68119,Drama,USA,Dari,afghanistan|based on novel|boy|friend|kite
2312,Mongol: The Rise of Genghis Khan,http://www.imdb.com/title/tt0416044/?ref_=fn_t...,7.3,2007.0,126.0,37635,Adventure|Biography|Drama|History|War,Russia,Mongolian,epic|genghis khan|one word title|slave|starving
4243,Journey from the Fall,http://www.imdb.com/title/tt0433398/?ref_=fn_t...,7.4,2006.0,135.0,775,Drama,USA,Vietnamese,1970s|1980s|nonlinear timeline|rescue|vietnam war
2837,Fateless,http://www.imdb.com/title/tt0367082/?ref_=fn_t...,7.1,2005.0,134.0,5603,Drama|Romance|War,Hungary,Hungarian,bus|death|gay slur|hatred|jewish
4416,"4 Months, 3 Weeks and 2 Days",http://www.imdb.com/title/tt1032846/?ref_=fn_t...,7.9,2007.0,113.0,44763,Drama,Romania,Romanian,female rear nudity|friend|hotel|rainy night|towel
3621,I Served the King of England,http://www.imdb.com/title/tt0284363/?ref_=fn_t...,7.4,2006.0,113.0,6183,Comedy|Drama|Romance|War,Czech Republic,Czech,hotel|legs|lingerie|waiter|wealth


Sur le film en anglais, les 3 datasets donnent les mêmes résultats.

Sur le film suèdois, les 3 datasets proposent tous les films en suèdois (3), et c'est pour les 2 propositions restantes que le dataset 'PCA' va différer des 2 autres. En effet il va se retreindre à des films de langues moins représentées que l'anglais, alors que les autres vont piocher dans tous les films (puisque les films suèdois ont été 'épuisés').

Sur le film en dari (dialecte perse), les 3 datasets sont différents. Le 'no-PCA' va proposer l'autre film en dari, puis peu importe pour le reste (anglais en l'occurence). Le 'PCA' va proposer uniquement des films de langues très peu représentées (roumain, zulu, indonésien), et se restreindre à ce type de langues. Le 'reduced' va proposer uniquement des films dans la catégorie 'other', ici ce sera mongol, vietnamien, hongrois, roumain et philippin. Dans les deux cas, ils ne proposeront pas le film en dari. 

##### Conclusion: Pour les langues les plus représentées, comme l'anglais, les 3 sont équivalents. Pour les langues moins représentées, le fait d'utiliser les datasets 'PCA' ou 'reduced' va restreindre les possibiltés à d'autres langues peu représentées, ce n'est pas forcément ce qu'on veut. Le 'no-PCA' va proposer systématiquement les films de la même langue. Ce n'est pas forcément ce qu'on veut non plus, mais c'est un peu mieux. 

### 2) Pays: PCA/no-PCA/reduced ? 

In [9]:
print("USA movie")
recommend(X, 0, subset=['Countries (no-PCA)', 'Countries (PCA)', 'Countries (reduced)'])

print(100*"_")
print("New Zealand movie")
recommend(X, 17, subset=['Countries (no-PCA)', 'Countries (PCA)', 'Countries (reduced)'])

print(100*"_")
print("Thailand movie")
recommend(X, 2581, subset=['Countries (no-PCA)', 'Countries (PCA)', 'Countries (reduced)'])

USA movie

Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping



Méthode n°1 - Dataset: Countries (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping



Méthode n°1 - Dataset: Countries (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping


____________________________________________________________________________________________________
New Zealand movie

Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
17,The Hobbit: The Battle of the Five Armies,http://www.imdb.com/title/tt2310332/?ref_=fn_t...,7.5,2014.0,164.0,354228,Adventure|Fantasy,New Zealand,English,army|elf|hobbit|middle earth|orc
4470,King Kong,http://www.imdb.com/title/tt0360717/?ref_=fn_t...,7.2,2005.0,201.0,316027,Action|Adventure|Drama|Romance,New Zealand,English,animal name in title|ape abducts a woman|goril...
1872,The World's Fastest Indian,http://www.imdb.com/title/tt0412080/?ref_=fn_t...,7.9,2005.0,127.0,44198,Biography|Drama|Sport,New Zealand,English,mortgage|motorcycle|speed|trailer|utah
3284,The Piano,http://www.imdb.com/title/tt0107822/?ref_=fn_t...,7.6,1993.0,121.0,63931,Drama|Music|Romance,New Zealand,English,adultery|daughter|male rear nudity|new zealand...
4219,Housebound,http://www.imdb.com/title/tt3504048/?ref_=fn_t...,6.8,2014.0,107.0,23323,Comedy|Horror|Mystery,New Zealand,English,bloody violence|exploding head|haunted house|p...
3579,Heavenly Creatures,http://www.imdb.com/title/tt0110005/?ref_=fn_t...,7.4,1994.0,108.0,50197,Biography|Crime|Drama|Romance|Thriller,New Zealand,English,fantasy life|friend|friendship|love|teenage crush



Méthode n°1 - Dataset: Countries (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
17,The Hobbit: The Battle of the Five Armies,http://www.imdb.com/title/tt2310332/?ref_=fn_t...,7.5,2014.0,164.0,354228,Adventure|Fantasy,New Zealand,English,army|elf|hobbit|middle earth|orc
4470,King Kong,http://www.imdb.com/title/tt0360717/?ref_=fn_t...,7.2,2005.0,201.0,316027,Action|Adventure|Drama|Romance,New Zealand,English,animal name in title|ape abducts a woman|goril...
1872,The World's Fastest Indian,http://www.imdb.com/title/tt0412080/?ref_=fn_t...,7.9,2005.0,127.0,44198,Biography|Drama|Sport,New Zealand,English,mortgage|motorcycle|speed|trailer|utah
3284,The Piano,http://www.imdb.com/title/tt0107822/?ref_=fn_t...,7.6,1993.0,121.0,63931,Drama|Music|Romance,New Zealand,English,adultery|daughter|male rear nudity|new zealand...
4219,Housebound,http://www.imdb.com/title/tt3504048/?ref_=fn_t...,6.8,2014.0,107.0,23323,Comedy|Horror|Mystery,New Zealand,English,bloody violence|exploding head|haunted house|p...
3579,Heavenly Creatures,http://www.imdb.com/title/tt0110005/?ref_=fn_t...,7.4,1994.0,108.0,50197,Biography|Crime|Drama|Romance|Thriller,New Zealand,English,fantasy life|friend|friendship|love|teenage crush



Méthode n°1 - Dataset: Countries (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
17,The Hobbit: The Battle of the Five Armies,http://www.imdb.com/title/tt2310332/?ref_=fn_t...,7.5,2014.0,164.0,354228,Adventure|Fantasy,New Zealand,English,army|elf|hobbit|middle earth|orc
3291,The Great Gatsby,http://www.imdb.com/title/tt1343092/?ref_=fn_t...,7.3,2013.0,143.0,362933,Drama|Romance,Australia,English,ingratitude|mansion|party|title appears in wri...
4470,King Kong,http://www.imdb.com/title/tt0360717/?ref_=fn_t...,7.2,2005.0,201.0,316027,Action|Adventure|Drama|Romance,New Zealand,English,animal name in title|ape abducts a woman|goril...
110,The Matrix Revolutions,http://www.imdb.com/title/tt0242653/?ref_=fn_t...,6.7,2003.0,129.0,364948,Action|Sci-Fi,Australia,English,battle|epic|fight|future|machine
176,Australia,http://www.imdb.com/title/tt0455824/?ref_=fn_t...,6.6,2008.0,165.0,102338,Adventure|Drama|Romance|War,Australia,English,australia|cattle|darwin|drover|japanese
114,Mad Max: Fury Road,http://www.imdb.com/title/tt1392190/?ref_=fn_t...,8.1,2015.0,120.0,552503,Action|Adventure|Sci-Fi|Thriller,Australia,English,australia|desert|escape|on the run|post apocal...


____________________________________________________________________________________________________
Thailand movie

Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2581,Ong-bak 2,http://www.imdb.com/title/tt0785035/?ref_=fn_t...,6.2,2008.0,110.0,24570,Action,Thailand,Thai,cult film|elephant|jungle|martial arts|stylize...
3465,The Protector,http://www.imdb.com/title/tt0427954/?ref_=fn_t...,7.1,2005.0,111.0,30210,Action|Crime|Drama|Thriller,Thailand,Thai,die hard scenario|elephant|long take|police|sy...
3149,Skin Trade,http://www.imdb.com/title/tt1641841/?ref_=fn_t...,5.7,2014.0,96.0,5228,Action|Crime|Thriller,Thailand,English,bangkok thailand|detective|human trafficking|r...
4089,The Lost Medallion: The Adventures of Billy St...,http://www.imdb.com/title/tt1390539/?ref_=fn_t...,4.8,2013.0,97.0,1278,Adventure|Family|Fantasy,Thailand,English,cave|cobra|island|medallion|waterfall
3131,The Legend of Suriyothai,http://www.imdb.com/title/tt0290879/?ref_=fn_t...,6.6,2001.0,300.0,1666,Action|Adventure|Drama|History|War,Thailand,Thai,16th century|burmese|invasion|queen|thailand
1959,The Game Plan,http://www.imdb.com/title/tt0492956/?ref_=fn_t...,6.2,2007.0,110.0,44021,Comedy|Family|Sport,USA,English,family relationships|football movie|publicist|...



Méthode n°1 - Dataset: Countries (PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2581,Ong-bak 2,http://www.imdb.com/title/tt0785035/?ref_=fn_t...,6.2,2008.0,110.0,24570,Action,Thailand,Thai,cult film|elephant|jungle|martial arts|stylize...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
3465,The Protector,http://www.imdb.com/title/tt0427954/?ref_=fn_t...,7.1,2005.0,111.0,30210,Action|Crime|Drama|Thriller,Thailand,Thai,die hard scenario|elephant|long take|police|sy...
3461,Winter in Wartime,http://www.imdb.com/title/tt0795441/?ref_=fn_t...,7.1,2008.0,103.0,9230,Drama|History|War,Netherlands,Dutch,horse|nazi|occupation|resistance movement|uncl...
3149,Skin Trade,http://www.imdb.com/title/tt1641841/?ref_=fn_t...,5.7,2014.0,96.0,5228,Action|Crime|Thriller,Thailand,English,bangkok thailand|detective|human trafficking|r...
4231,The Holy Girl,http://www.imdb.com/title/tt0300270/?ref_=fn_t...,6.7,2004.0,106.0,2720,Drama,Argentina,Spanish,16 year old|doctor|hotel|tinnitus|title direct...



Méthode n°1 - Dataset: Countries (reduced):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2581,Ong-bak 2,http://www.imdb.com/title/tt0785035/?ref_=fn_t...,6.2,2008.0,110.0,24570,Action,Thailand,Thai,cult film|elephant|jungle|martial arts|stylize...
2578,The Assassin,http://www.imdb.com/title/tt3508840/?ref_=fn_t...,6.4,2015.0,105.0,9427,Action|Drama,Taiwan,Mandarin,black magic|china|love|refusal to kill|wuxia
3465,The Protector,http://www.imdb.com/title/tt0427954/?ref_=fn_t...,7.1,2005.0,111.0,30210,Action|Crime|Drama|Thriller,Thailand,Thai,die hard scenario|elephant|long take|police|sy...
2573,Listening,http://www.imdb.com/title/tt3153582/?ref_=fn_t...,5.6,2014.0,100.0,1933,Drama|Sci-Fi|Thriller,Cambodia,English,divorce papers|fainting|mind control|playgroun...
3149,Skin Trade,http://www.imdb.com/title/tt1641841/?ref_=fn_t...,5.7,2014.0,96.0,5228,Action|Crime|Thriller,Thailand,English,bangkok thailand|detective|human trafficking|r...
4805,Cavite,http://www.imdb.com/title/tt0428303/?ref_=fn_t...,6.3,2005.0,80.0,589,Thriller,Philippines,English,jihad|mindanao|philippines|security guard|squa...


###### Conclusion: Comme pour la langue, le dataset 'PCA' aura tendance à regrouper des pays très différents comme similaires, ce qui n'est pas vraiment souhaitable. La version 'reduced' par contre semble plus intéressante, car elle regroupe les pays les moins représentés par région géographique, pour en faire des groupes plus représentatifs. Cependant, le découpage reste très subjectif et pas forcément adapté. On préfèrera utiliser la version 'no-PCA' pour le moment.

### 3) Genres vs Pays vs Langue ?

In [10]:
print(100*"*")
print("TEST 1: BLOCKBUSTER")
print(100*"*")
recommend(X, 0, subset=['Minimal', 'Genres', 
                        'Countries (no-PCA)', 'Languages (no-PCA)', 
                        'Genres + Languages + Countries'])

print(100*"*")
print("TEST 2: GENRE PEU COURANT: NEWS")
print(100*"*")
recommend(X, 2132, subset=['Minimal', 'Genres', 
                           'Countries (no-PCA)', 'Languages (no-PCA)', 
                           'Genres + Languages + Countries'])

print(100*"*")
print("TEST 3: LANGUE PEU COURANTE: SUEDOIS")
print(100*"*")
recommend(X, 4583, subset=['Minimal', 'Genres', 
                           'Countries (no-PCA)', 'Languages (no-PCA)', 
                           'Genres + Languages + Countries'])

print(100*"*")
print("TEST 4: PAYS PEU COURANT: RUSSIE")
print(100*"*")
recommend(X, 1297, subset=['Minimal', 'Genres', 
                           'Countries (no-PCA)', 'Languages (no-PCA)', 
                           'Genres + Languages + Countries'])

****************************************************************************************************
TEST 1: BLOCKBUSTER
****************************************************************************************************

Méthode n°1 - Dataset: Minimal:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping



Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
213,Star Wars: Episode III - Revenge of the Sith,http://www.imdb.com/title/tt0121766/?ref_=fn_t...,7.6,2005.0,140.0,520104,Action|Adventure|Fantasy|Sci-Fi,USA,English,elongated cry of no|friends become enemies|kic...
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
185,Pirates of the Caribbean: The Curse of the Bla...,http://www.imdb.com/title/tt0325980/?ref_=fn_t...,8.1,2003.0,143.0,809474,Action|Adventure|Fantasy,USA,English,caribbean|curse|governor|pirate|undead
39,X-Men: Days of Future Past,http://www.imdb.com/title/tt1877832/?ref_=fn_t...,8.0,2014.0,149.0,514125,Action|Adventure|Fantasy|Sci-Fi|Thriller,USA,English,dystopia|super strength|supernatural power|tim...



Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping



Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
84,Interstellar,http://www.imdb.com/title/tt0816692/?ref_=fn_t...,8.6,2014.0,169.0,928227,Adventure|Drama|Sci-Fi,USA,English,black hole|father daughter relationship|saving...
265,Django Unchained,http://www.imdb.com/title/tt1853728/?ref_=fn_t...,8.5,2012.0,165.0,955174,Drama|Western,USA,English,dynamite|historically inaccurate|ku klux klan|...
597,Saving Private Ryan,http://www.imdb.com/title/tt0120815/?ref_=fn_t...,8.6,1998.0,169.0,881236,Action|Drama|War,USA,English,army|invasion|killed in action|normandy|soldier
539,Inglourious Basterds,http://www.imdb.com/title/tt0361748/?ref_=fn_t...,8.3,2009.0,153.0,885175,Adventure|Drama|War,USA,English,france|german|nazis|revenge|scalping



Méthode n°1 - Dataset: Genres + Languages + Countries:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
213,Star Wars: Episode III - Revenge of the Sith,http://www.imdb.com/title/tt0121766/?ref_=fn_t...,7.6,2005.0,140.0,520104,Action|Adventure|Fantasy|Sci-Fi,USA,English,elongated cry of no|friends become enemies|kic...
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
185,Pirates of the Caribbean: The Curse of the Bla...,http://www.imdb.com/title/tt0325980/?ref_=fn_t...,8.1,2003.0,143.0,809474,Action|Adventure|Fantasy,USA,English,caribbean|curse|governor|pirate|undead
39,X-Men: Days of Future Past,http://www.imdb.com/title/tt1877832/?ref_=fn_t...,8.0,2014.0,149.0,514125,Action|Adventure|Fantasy|Sci-Fi|Thriller,USA,English,dystopia|super strength|supernatural power|tim...


****************************************************************************************************
TEST 2: GENRE PEU COURANT: NEWS
****************************************************************************************************

Méthode n°1 - Dataset: Minimal:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
3423,City Island,http://www.imdb.com/title/tt1174730/?ref_=fn_t...,7.4,2009.0,104.0,27301,Comedy|Drama,USA,English,anger|family reunion|father son reunion|huggin...
3383,The Messenger,http://www.imdb.com/title/tt0790712/?ref_=fn_t...,7.2,2009.0,105.0,29608,Drama|Romance|War,USA,English,captain|casualty notification|casualty notific...
1099,Micmacs,http://www.imdb.com/title/tt1149361/?ref_=fn_t...,7.2,2009.0,105.0,24657,Action|Comedy|Crime,France,French,bullet|contortionist|gag humor|human cannonbal...
1396,The Young Victoria,http://www.imdb.com/title/tt0962736/?ref_=fn_t...,7.3,2009.0,100.0,44931,Biography|Drama|History|Romance,UK,English,19th century|courtship|female protagonist|prin...
4477,Starsuckers,http://www.imdb.com/title/tt1510934/?ref_=fn_t...,7.4,2009.0,103.0,474,Documentary,UK,English,celebrity|consumerism|fashion|media manipulation



Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
4413,Food Chains,http://www.imdb.com/title/tt2141739/?ref_=fn_t...,7.1,2014.0,83.0,265,Documentary|News,USA,English,two word title
4129,The Square,http://www.imdb.com/title/tt2486682/?ref_=fn_t...,8.1,2013.0,108.0,6678,Documentary|Drama|History|News,Egypt,Arabic,
4687,The Trials of Darryl Hunt,http://www.imdb.com/title/tt0446055/?ref_=fn_t...,7.7,2006.0,106.0,771,Crime|Documentary,USA,English,false accusation|murder|north carolina|trial|w...
4059,Inside Job,http://www.imdb.com/title/tt1645089/?ref_=fn_t...,8.3,2010.0,105.0,55382,Crime|Documentary,USA,English,florida|iceland|interview|new york city new yo...
3846,Bowling for Columbine,http://www.imdb.com/title/tt0310793/?ref_=fn_t...,8.0,2002.0,120.0,123090,Crime|Documentary|Drama,Germany,English,bank|columbine|columbine high school killings|...



Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
3423,City Island,http://www.imdb.com/title/tt1174730/?ref_=fn_t...,7.4,2009.0,104.0,27301,Comedy|Drama,USA,English,anger|family reunion|father son reunion|huggin...
3383,The Messenger,http://www.imdb.com/title/tt0790712/?ref_=fn_t...,7.2,2009.0,105.0,29608,Drama|Romance|War,USA,English,captain|casualty notification|casualty notific...
1396,The Young Victoria,http://www.imdb.com/title/tt0962736/?ref_=fn_t...,7.3,2009.0,100.0,44931,Biography|Drama|History|Romance,UK,English,19th century|courtship|female protagonist|prin...
4477,Starsuckers,http://www.imdb.com/title/tt1510934/?ref_=fn_t...,7.4,2009.0,103.0,474,Documentary,UK,English,celebrity|consumerism|fashion|media manipulation
2870,The Secret Life of Bees,http://www.imdb.com/title/tt0416212/?ref_=fn_t...,7.3,2008.0,110.0,19440,Drama,USA,English,beekeeping|insect in title|racism|sister|south...



Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
3423,City Island,http://www.imdb.com/title/tt1174730/?ref_=fn_t...,7.4,2009.0,104.0,27301,Comedy|Drama,USA,English,anger|family reunion|father son reunion|huggin...
3383,The Messenger,http://www.imdb.com/title/tt0790712/?ref_=fn_t...,7.2,2009.0,105.0,29608,Drama|Romance|War,USA,English,captain|casualty notification|casualty notific...
2870,The Secret Life of Bees,http://www.imdb.com/title/tt0416212/?ref_=fn_t...,7.3,2008.0,110.0,19440,Drama,USA,English,beekeeping|insect in title|racism|sister|south...
687,This Is It,http://www.imdb.com/title/tt1477715/?ref_=fn_t...,7.3,2009.0,111.0,33158,Documentary|Music,USA,English,2000s|box office hit|comeback|rehearsal|year 2009
3337,Hesher,http://www.imdb.com/title/tt1403177/?ref_=fn_t...,7.1,2010.0,106.0,43965,Drama,USA,English,bully|car|car accident|fire|vomiting



Méthode n°1 - Dataset: Genres + Languages + Countries:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
4413,Food Chains,http://www.imdb.com/title/tt2141739/?ref_=fn_t...,7.1,2014.0,83.0,265,Documentary|News,USA,English,two word title
4687,The Trials of Darryl Hunt,http://www.imdb.com/title/tt0446055/?ref_=fn_t...,7.7,2006.0,106.0,771,Crime|Documentary,USA,English,false accusation|murder|north carolina|trial|w...
4059,Inside Job,http://www.imdb.com/title/tt1645089/?ref_=fn_t...,8.3,2010.0,105.0,55382,Crime|Documentary,USA,English,florida|iceland|interview|new york city new yo...
3987,Fuel,http://www.imdb.com/title/tt1294164/?ref_=fn_t...,7.6,2008.0,112.0,578,Documentary,USA,English,alternative energy|energy|fuel|green|oil
4325,An Inconvenient Truth,http://www.imdb.com/title/tt0497116/?ref_=fn_t...,7.5,2006.0,96.0,67654,Documentary,USA,English,climate|earth|global warming|science|truth


****************************************************************************************************
TEST 3: LANGUE PEU COURANTE: SUEDOIS
****************************************************************************************************

Méthode n°1 - Dataset: Minimal:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
3886,Days of Heaven,http://www.imdb.com/title/tt0077405/?ref_=fn_t...,8.0,1978.0,94.0,37594,Drama|Romance,USA,English,farm|farm worker|grasshopper|harvest|wealth
1997,Sorcerer,http://www.imdb.com/title/tt0076740/?ref_=fn_t...,7.7,1977.0,92.0,9513,Adventure|Drama|Thriller,USA,English,fire|jungle|money|oil drilling|truck
4722,Night of the Living Dead,http://www.imdb.com/title/tt0063350/?ref_=fn_t...,8.0,1968.0,96.0,87978,Drama|Horror|Mystery,USA,English,cemetery|farmhouse|radiation|running out of ga...
3955,Blazing Saddles,http://www.imdb.com/title/tt0071230/?ref_=fn_t...,7.8,1974.0,93.0,95294,Comedy|Western,USA,English,railroad|reference to douglas fairbanks|refere...
4611,Nothing But a Man,http://www.imdb.com/title/tt0058414/?ref_=fn_t...,8.1,1964.0,95.0,891,Drama|Romance,USA,English,1960s|father|railroad|town|worker



Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4458,The Conformist,http://www.imdb.com/title/tt0065571/?ref_=fn_t...,8.1,1970.0,106.0,17813,Drama,Italy,Italian,fascist|gun|italy|nudity|sexuality
4723,Une Femme Mariée,http://www.imdb.com/title/tt0058701/?ref_=fn_t...,7.4,1964.0,94.0,1962,Drama,France,French,acting|actor|face slap|fashion|magazine
4391,The Party's Over,http://www.imdb.com/title/tt0060816/?ref_=fn_t...,7.3,1965.0,94.0,340,Drama,UK,English,beatnik|businessman|necrophilia|party|partying
3778,Midnight Cowboy,http://www.imdb.com/title/tt0064665/?ref_=fn_t...,7.9,1969.0,113.0,76616,Drama,USA,English,gay|homosexual|homosexuality|hustler|male rear...
3595,Out of the Blue,http://www.imdb.com/title/tt0081291/?ref_=fn_t...,7.2,1980.0,94.0,1599,Drama,Canada,English,explosion|hitchhiking|punk|trauma|truck driver



Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
1997,Sorcerer,http://www.imdb.com/title/tt0076740/?ref_=fn_t...,7.7,1977.0,92.0,9513,Adventure|Drama|Thriller,USA,English,fire|jungle|money|oil drilling|truck
4722,Night of the Living Dead,http://www.imdb.com/title/tt0063350/?ref_=fn_t...,8.0,1968.0,96.0,87978,Drama|Horror|Mystery,USA,English,cemetery|farmhouse|radiation|running out of ga...



Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
3665,Lilya 4-Ever,http://www.imdb.com/title/tt0300140/?ref_=fn_t...,7.9,2002.0,109.0,35464,Crime|Drama,Sweden,Russian,abandoned by mother|poverty|sex trade|teenage ...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
1997,Sorcerer,http://www.imdb.com/title/tt0076740/?ref_=fn_t...,7.7,1977.0,92.0,9513,Adventure|Drama|Thriller,USA,English,fire|jungle|money|oil drilling|truck



Méthode n°1 - Dataset: Genres + Languages + Countries:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
3665,Lilya 4-Ever,http://www.imdb.com/title/tt0300140/?ref_=fn_t...,7.9,2002.0,109.0,35464,Crime|Drama,Sweden,Russian,abandoned by mother|poverty|sex trade|teenage ...
3778,Midnight Cowboy,http://www.imdb.com/title/tt0064665/?ref_=fn_t...,7.9,1969.0,113.0,76616,Drama,USA,English,gay|homosexual|homosexuality|hustler|male rear...


****************************************************************************************************
TEST 4: PAYS PEU COURANT: RUSSIE
****************************************************************************************************

Méthode n°1 - Dataset: Minimal:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
1690,"Welcome Home, Roscoe Jenkins",http://www.imdb.com/title/tt0494652/?ref_=fn_t...,5.3,2008.0,114.0,10791,Comedy|Drama|Romance,USA,English,african american protagonist|comma in title|fo...
4152,Sublime,http://www.imdb.com/title/tt0822858/?ref_=fn_t...,5.3,2007.0,113.0,6267,Horror|Thriller,USA,English,birthday|birthday party|colonoscopy|hospital|s...
2325,I Come with the Rain,http://www.imdb.com/title/tt1024744/?ref_=fn_t...,5.5,2009.0,114.0,2541,Thriller,France,English,gangster|hong kong|missing person|missing son|...
219,Asterix at the Olympic Games,http://www.imdb.com/title/tt0463872/?ref_=fn_t...,5.1,2008.0,116.0,20567,Adventure|Comedy|Family|Fantasy,France,French,1st century b.c.|lightsaber|local blockbuster|...
2208,The Tempest,http://www.imdb.com/title/tt1274300/?ref_=fn_t...,5.4,2010.0,110.0,6147,Comedy|Drama|Fantasy|Romance,USA,English,banishment|sorcery|spirit|staff|storm at sea



Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
451,Timeline,http://www.imdb.com/title/tt0300556/?ref_=fn_t...,5.6,2003.0,116.0,53057,Action|Adventure|Sci-Fi,USA,English,castle|france|professor|student|time machine
80,Independence Day: Resurgence,http://www.imdb.com/title/tt1628841/?ref_=fn_t...,5.5,2016.0,120.0,58137,Action|Adventure|Sci-Fi,USA,English,alien|battle|defense|independence day|mothership
53,Jupiter Ascending,http://www.imdb.com/title/tt1617661/?ref_=fn_t...,5.4,2015.0,127.0,139593,Action|Adventure|Sci-Fi,USA,English,box office flop|critically bashed|planet earth...
434,The Time Machine,http://www.imdb.com/title/tt0268695/?ref_=fn_t...,5.9,2002.0,96.0,103787,Action|Adventure|Sci-Fi,USA,English,hunter|scientist|time machine|time travel|time...
177,After Earth,http://www.imdb.com/title/tt1815862/?ref_=fn_t...,4.9,2013.0,100.0,158720,Action|Adventure|Sci-Fi,USA,English,box office flop|father son team|fear|race agai...



Méthode n°1 - Dataset: Languages (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
1765,Viy,http://www.imdb.com/title/tt1224378/?ref_=fn_t...,5.4,2014.0,107.0,3170,Adventure|Fantasy|Mystery|Thriller,Russia,Russian,cartographer|creature|foreign language adaptat...
3682,Night Watch,http://www.imdb.com/title/tt0403358/?ref_=fn_t...,6.5,2004.0,104.0,47097,Fantasy|Thriller,Russia,Russian,battle|bridge|mother son relationship|supernat...
1889,Space Dogs,http://www.imdb.com/title/tt1272051/?ref_=fn_t...,5.2,2010.0,85.0,891,Adventure|Animation|Comedy|Family,Russia,Russian,dog|flea|meteor|space|space dog
3349,Snow Queen,http://www.imdb.com/title/tt2243621/?ref_=fn_t...,5.3,2012.0,80.0,1159,Adventure|Animation|Family,Russia,Russian,3d
1625,A Warrior's Tail,http://www.imdb.com/title/tt4075322/?ref_=fn_t...,4.1,2015.0,85.0,393,Adventure|Animation|Fantasy,Russia,Russian,battle|creature|hyena|monkey|village



Méthode n°1 - Dataset: Countries (no-PCA):


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
1765,Viy,http://www.imdb.com/title/tt1224378/?ref_=fn_t...,5.4,2014.0,107.0,3170,Adventure|Fantasy|Mystery|Thriller,Russia,Russian,cartographer|creature|foreign language adaptat...
2171,Machete Kills,http://www.imdb.com/title/tt2002718/?ref_=fn_t...,5.6,2013.0,107.0,59248,Action|Comedy|Crime|Thriller,Russia,English,arms dealer|battle|bomb|machete|missile
3682,Night Watch,http://www.imdb.com/title/tt0403358/?ref_=fn_t...,6.5,2004.0,104.0,47097,Fantasy|Thriller,Russia,Russian,battle|bridge|mother son relationship|supernat...
1889,Space Dogs,http://www.imdb.com/title/tt1272051/?ref_=fn_t...,5.2,2010.0,85.0,891,Adventure|Animation|Comedy|Family,Russia,Russian,dog|flea|meteor|space|space dog
3349,Snow Queen,http://www.imdb.com/title/tt2243621/?ref_=fn_t...,5.3,2012.0,80.0,1159,Adventure|Animation|Family,Russia,Russian,3d



Méthode n°1 - Dataset: Genres + Languages + Countries:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
3753,The Geographer Drank His Globe Away,http://www.imdb.com/title/tt3155604/?ref_=fn_t...,7.5,2013.0,120.0,2488,Adventure|Drama,Russia,Russian,rafting|russia|school|student|teacher
3343,Hard to Be a God,http://www.imdb.com/title/tt2328813/?ref_=fn_t...,6.7,2013.0,177.0,2518,Drama|Sci-Fi,Russia,Russian,fictional war|hanging|male full frontal nudity...
3682,Night Watch,http://www.imdb.com/title/tt0403358/?ref_=fn_t...,6.5,2004.0,104.0,47097,Fantasy|Thriller,Russia,Russian,battle|bridge|mother son relationship|supernat...
1765,Viy,http://www.imdb.com/title/tt1224378/?ref_=fn_t...,5.4,2014.0,107.0,3170,Adventure|Fantasy|Mystery|Thriller,Russia,Russian,cartographer|creature|foreign language adaptat...
2580,The Return,http://www.imdb.com/title/tt0376968/?ref_=fn_t...,8.0,2003.0,99.0,31589,Drama|Mystery|Thriller,Russia,Russian,boy|photograph|return|russian|wilderness


Pour le blockbuster, tous les datasets retournent des films similaires en termes de popularité, d'époque et de durée. Cependant, les datasets qui incluent le genre donnent des résultats un peu plus pertinents.

Pour le genre peu courant (News), là aussi les datasets les plus intéressants sont ceux qui incluent le genre (avec une petite préférence pour le dataset qui inclut tout sur ce film). Les autres datasets par contre retournent tout et n'importe quoi, car cette fois la popularité/date/durée sont dans les valeurs moyennes. 

Pour la langue peu courante (suèdois), tous les résultats sont décevants. Si la langue est dans le dataset, il va forcément sortir les films de la même langue en priorité, et ce n'est pas forcément pertinent. Seul le dataset 'Genres' fait un peu mieux, puisqu'il propose au moins des films de la même catégorie. Mais sans l'utilisation des mots-clés par exemple, ce n'est quand même pas terrible.

Même constat pour le pays peu courant (russie), seul le dataset 'Genres' fait un peu mieux que les autres. 

###### Conclusion: Les résultats les plus intéressants sont donnés par le dataset "Genres". En effet, si l'on inclue le pays ou la langue, le nombre de colonnes issues du one-hot encoding pour la langue et/ou le pays va être important et influencer trop le résultat. A tel point que pour des pays/langues un peu moins représentatives, l'algorithme va proposer uniquement des films du même pays/langue sans qu'il y ait forcément de rapport. Le genre propose lui des résultats intéressants, mais qui pourrait être amélioré avec les mots-clés par exemple. Il faudrait aussi trouver une manière de garder la langue/pays tout en diminuant le poids qu'ils ont dans l'algorithme.

## Méthode n°2: Création d'un dataframe de "similitudes" puis k-NN dessus

###### Principe: On se base sur un dataframe construit à la volée par-rapport au film recherché (make_temp_df). Ce dataframe va indiquer si les films ont les mêmes mots-clés, les mêmes genres, etc.. Ensuite on standardise, et on recherche les k-plus proches voisins.

In [24]:
def recommend2(X, id_movie):
    X_temp = make_temp_df(X, id_movie)
    
   
        
    X_temp_std = StandardScaler().fit_transform(X_temp)
    neighbors = NearestNeighbors(n_neighbors=16).fit(X_temp_std)
    
    print(f"\nMéthode n°2:")
    knn = neighbors.kneighbors(X_temp_std[id_movie].reshape(1, -1), return_distance=False)
    
    movie_list = remove_sequels(id_movie, list(knn[0]))
    display(pd.DataFrame(X_temp_std, columns=X_temp.columns).iloc[movie_list])
    display(X_info.iloc[movie_list])

In [25]:
recommend2(X, 0)
recommend2(X, 2132)
recommend2(X, 4583)
recommend2(X, 1297)


Méthode n°2:


Unnamed: 0,imdb_score,title_year,duration,num_voted_users,language,country,kw_0,kw_1,kw_2,kw_3,kw_4,genre_0,genre_1,genre_2,genre_3
0,1.330645,0.526411,3.101105,5.756094,0.265083,0.567993,40.041645,10.788431,28.304888,69.368581,69.368581,1.84376,2.117021,2.731062,2.706841
126,-1.981109,0.606718,-0.224209,0.252222,0.265083,0.567993,40.041645,-0.092692,-0.03533,-0.014416,-0.014416,1.84376,2.117021,2.731062,-0.369434
4791,-3.055191,0.687025,1.549292,-0.600173,0.265083,0.567993,40.041645,-0.092692,-0.03533,-0.014416,-0.014416,-0.54237,-0.472362,-0.366158,-0.369434
551,-0.548999,0.687025,0.352179,0.510496,0.265083,0.567993,-0.024974,-0.092692,28.304888,-0.014416,-0.014416,1.84376,-0.472362,-0.366158,2.706841
705,-0.190972,0.927947,-0.135534,0.109355,0.265083,0.567993,-0.024974,-0.092692,28.304888,-0.014416,-0.014416,1.84376,-0.472362,-0.366158,-0.369434
1087,-0.101465,-0.03574,0.307841,-0.385313,0.265083,0.567993,-0.024974,-0.092692,28.304888,-0.014416,-0.014416,-0.54237,-0.472362,-0.366158,-0.369434


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
126,The Last Airbender,http://www.imdb.com/title/tt0938283/?ref_=fn_t...,4.2,2010.0,103.0,118951,Action|Adventure|Family|Fantasy,USA,English,avatar|fire|kingdom|tribe|water
4791,The Ridges,http://www.imdb.com/title/tt1781935/?ref_=fn_t...,3.0,2011.0,143.0,125,Drama|Horror|Thriller,USA,English,avatar|college|death|tron|university
551,Battle Los Angeles,http://www.imdb.com/title/tt1217613/?ref_=fn_t...,5.8,2011.0,116.0,154955,Action|Sci-Fi,USA,English,alien|extraterrestrial|invasion|marine|mission
705,Jack Ryan: Shadow Recruit,http://www.imdb.com/title/tt1205537/?ref_=fn_t...,6.2,2014.0,105.0,99035,Action|Drama|Thriller,USA,English,covert analyst|marine|russian|spy|stock market
1087,High Crimes,http://www.imdb.com/title/tt0257756/?ref_=fn_t...,6.3,2002.0,115.0,30077,Crime|Drama|Mystery|Thriller,USA,English,defense lawyer|lawyer|marine|murder|villager



Méthode n°2:


Unnamed: 0,imdb_score,title_year,duration,num_voted_users,language,country,kw_0,kw_1,kw_2,kw_3,kw_4,genre_0,genre_1,genre_2
2132,0.883111,0.526411,-0.135534,-0.349016,0.265083,0.567993,49.045897,49.045897,69.368581,69.368581,69.368581,2.179518,6.280556,40.041645
2389,0.883111,-1.24035,0.795554,0.25365,0.265083,0.567993,-0.020389,49.045897,-0.014416,-0.014416,-0.014416,2.179518,-0.159222,-0.024974
1792,1.688672,-1.561579,1.504955,3.254258,0.265083,0.567993,49.045897,-0.020389,-0.014416,-0.014416,-0.014416,2.179518,-0.159222,-0.024974
4413,0.61459,0.927947,-1.110959,-0.599169,0.265083,0.567993,-0.020389,-0.020389,-0.014416,-0.014416,-0.014416,-0.458817,6.280556,40.041645
4129,1.509659,0.84764,-0.002521,-0.553166,-3.772401,-1.760585,-0.020389,-0.020389,-0.014416,-0.014416,-0.014416,-0.458817,6.280556,40.041645
4687,1.151631,0.285489,-0.091196,-0.595539,0.265083,0.567993,-0.020389,-0.020389,-0.014416,-0.014416,-0.014416,2.179518,6.280556,-0.024974


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
2389,Wall Street,http://www.imdb.com/title/tt0094291/?ref_=fn_t...,7.4,1987.0,126.0,119150,Crime|Drama,USA,English,1980s|argument|critique of capitalism|f word|f...
1792,Scarface,http://www.imdb.com/title/tt0086250/?ref_=fn_t...,8.3,1983.0,142.0,537442,Crime|Drama,USA,English,assassination attempt|capitalism|cocaine|cuban...
4413,Food Chains,http://www.imdb.com/title/tt2141739/?ref_=fn_t...,7.1,2014.0,83.0,265,Documentary|News,USA,English,two word title
4129,The Square,http://www.imdb.com/title/tt2486682/?ref_=fn_t...,8.1,2013.0,108.0,6678,Documentary|Drama|History|News,Egypt,Arabic,
4687,The Trials of Darryl Hunt,http://www.imdb.com/title/tt0446055/?ref_=fn_t...,7.7,2006.0,106.0,771,Crime|Documentary,USA,English,false accusation|murder|north carolina|trial|w...



Méthode n°2:


Unnamed: 0,imdb_score,title_year,duration,num_voted_users,language,country,kw_0,kw_1,kw_2,kw_3,kw_4,genre_0
4583,1.599166,-2.44496,-0.756259,-0.457859,34.673477,31.009676,23.103631,4.946272,21.915748,69.368581,49.045897,0.977604
1797,1.509659,0.365796,2.214355,2.074565,-0.02884,-0.032248,-0.043283,-0.202172,-0.045629,-0.014416,49.045897,0.977604
4565,1.062124,0.124874,1.105917,-0.50392,34.673477,31.009676,-0.043283,-0.202172,-0.045629,-0.014416,-0.020389,0.977604
3675,0.256563,0.606718,0.706879,-0.504637,34.673477,31.009676,-0.043283,-0.202172,-0.045629,-0.014416,-0.020389,0.977604
2414,0.167056,0.365796,7.180157,-0.471653,34.673477,31.009676,-0.043283,-0.202172,-0.045629,-0.014416,-0.020389,0.977604
3665,1.330645,-0.03574,0.041816,-0.34667,-0.02884,31.009676,-0.043283,-0.202172,-0.045629,-0.014416,-0.020389,0.977604


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
1797,There Will Be Blood,http://www.imdb.com/title/tt0469494/?ref_=fn_t...,8.1,2007.0,158.0,372990,Drama,USA,English,alcoholic|alcoholism|money|oil|turn of the cen...
4565,As It Is in Heaven,http://www.imdb.com/title/tt0382330/?ref_=fn_t...,7.6,2004.0,133.0,13543,Comedy|Drama|Music|Romance,Sweden,Swedish,amazing grace the hymn|choir|heart attack|home...
3675,Easy Money,http://www.imdb.com/title/tt1291652/?ref_=fn_t...,6.7,2010.0,124.0,13443,Action|Crime|Drama|Thriller,Sweden,Swedish,double life|drugs|financial crisis|first part|...
2414,Arn: The Knight Templar,http://www.imdb.com/title/tt0837106/?ref_=fn_t...,6.6,2007.0,270.0,18041,Action|Adventure|Drama|Romance|War,Sweden,Swedish,first part|holy land|knight templar|monastery|...
3665,Lilya 4-Ever,http://www.imdb.com/title/tt0300140/?ref_=fn_t...,7.9,2002.0,109.0,35464,Crime|Drama,Sweden,Russian,abandoned by mother|poverty|sex trade|teenage ...



Méthode n°2:


Unnamed: 0,imdb_score,title_year,duration,num_voted_users,language,country,kw_0,kw_1,kw_2,kw_3,kw_4,genre_0,genre_1,genre_2
1297,-0.996533,0.526411,0.307841,-0.5696,20.89367,20.89367,69.368581,20.002083,5.410216,31.009676,26.202508,1.84376,2.117021,2.706841
1197,-1.533574,-0.116048,-0.179871,-0.508848,-0.047861,-0.047861,-0.014416,-0.049995,-0.184835,31.009676,-0.038164,1.84376,2.117021,-0.369434
2136,-0.638506,0.526411,-0.889272,-0.412867,-0.047861,-0.047861,-0.014416,-0.049995,-0.184835,31.009676,-0.038164,1.84376,-0.472362,-0.369434
3010,0.793604,0.285489,0.529529,0.0061,-0.047861,-0.047861,-0.014416,-0.049995,-0.184835,31.009676,-0.038164,-0.54237,2.117021,-0.369434
1066,0.972618,-0.517584,1.726642,0.507139,-0.047861,-0.047861,-0.014416,-0.049995,-0.184835,31.009676,-0.038164,-0.54237,-0.472362,-0.369434
1765,-0.907026,0.927947,-0.046859,-0.57833,20.89367,20.89367,-0.014416,-0.049995,-0.184835,-0.032248,-0.038164,-0.54237,2.117021,-0.369434


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
1197,The Musketeer,http://www.imdb.com/title/tt0246544/?ref_=fn_t...,4.7,2001.0,104.0,12856,Action|Adventure|Romance,Germany,English,guard|king|murder|musketeer|swordsman
2136,Armored,http://www.imdb.com/title/tt0913354/?ref_=fn_t...,5.7,2009.0,88.0,26236,Action|Crime|Thriller,USA,English,armored truck|blood splatter|die hard scenario...
3010,Rescue Dawn,http://www.imdb.com/title/tt0462504/?ref_=fn_t...,7.3,2006.0,120.0,84641,Adventure|Biography|Drama|War,USA,English,bombing|guard|jungle|laos|prisoner
1066,Sleepers,http://www.imdb.com/title/tt0117665/?ref_=fn_t...,7.5,1996.0,147.0,154487,Crime|Drama|Thriller,USA,English,boy|guard|kicked in the crotch|prank|sexual abuse
1765,Viy,http://www.imdb.com/title/tt1224378/?ref_=fn_t...,5.4,2014.0,107.0,3170,Adventure|Fantasy|Mystery|Thriller,Russia,Russian,cartographer|creature|foreign language adaptat...


On se rend compte que les résultats avec des mots-clés en commun sont systématiquement dans les résultats. S'il n'y en a pas ce sera plutôt la même langue/pays qui sera privilégiée (pour des pays/langues moins représentés).

###### Conclusion: La prise en compte des mots-clés est un succès mais le poids donné à ceux-ci dans le calcul des distances est très élevé - cela s'explique par le fait qu'un mot-clé en particulier va être très peu présent dans la base de données, donc après standardisation les films qui possèdent ce mot-clé seront très loin de la moyenne en termes d'écart-types. Du coup les autres features comme le genre, le pays ou la langue passent après, alors qu'elles sont au moins aussi importantes.

## Méthode n°3: k-NN sur genres/mots-clés/pays/langue + k-NN sur année/durée/votes + pondération sur note

###### Principe: On construit le dataframe à la volée, comme pour la méthode n°2, puis on fait un premier k-NN plus large sur les genres/mots-clés/pays/langues sans faire de standardisation (poids égal à chaque information). Ensuite on fait un 2e k-NN sur l'année/durée/votes pour garder des films plutôt similaires dans les films retenus. Enfin, on va pondérer les distances de chaque film avec un coefficient qui dépendra de la note IMDB (score entre 0 et 10), ce qui permettra de favoriser un peu les films avec une meilleure note. 

In [15]:
def recommend3(X, id_movie, nb_neighbors=31):
    X_temp = make_temp_df(X, id_movie)
    
    # First k-NN on genres+keywords+country+language without scaling first
    neighbors1 = NearestNeighbors(n_neighbors=nb_neighbors).fit(X_temp.iloc[:, 4:])
    knn1 = neighbors1.kneighbors(X_temp.iloc[id_movie, 4:].values.reshape(1, -1), return_distance=False)
    
    # Scaling on numerical features (except imdb_score)
    X_temp_std = StandardScaler().fit_transform(X_temp[['title_year', 'duration', 'num_voted_users']])
    X_temp_std = pd.concat([X_temp[['imdb_score']], pd.DataFrame(X_temp_std), X_temp.iloc[:, 4:]], axis=1)
        
    # Second k-NN only on numerical features (except imdb_score)
    neighbors2 = NearestNeighbors(n_neighbors=nb_neighbors).fit(X_temp_std.iloc[knn1[0], 1:4])
    knn2 = neighbors2.kneighbors(X_temp_std.iloc[id_movie, 1:4].values.reshape(1, -1))
    
    # Update distances with a coefficient depending on imdb_score
    # to help movies with a good score be at the top of the list
    scores = list(zip([knn1[0][x] for x in knn2[1].ravel()], knn2[0].ravel())) 
    new_scores = []
    for id, score in scores:
        new_scores.append((id, score / X_temp.iloc[id].imdb_score * 10))
    new_scores = sorted(new_scores, key=lambda x: x[1])

    # Remove any probable sequels
    movie_list = remove_sequels(id_movie, [x[0] for x in new_scores])
    
    print(f"\nMéthode n°3:")
    display(X_info.iloc[movie_list])

In [16]:
recommend3(X, 0)
recommend3(X, 2132)
recommend3(X, 4583)
recommend3(X, 1297)


Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
2853,Star Wars: Episode IV - A New Hope,http://www.imdb.com/title/tt0076759/?ref_=fn_t...,8.7,1977.0,125.0,911097,Action|Adventure|Fantasy|Sci-Fi,USA,English,death star|empire|galactic war|princess|rebellion
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
143,Star Trek,http://www.imdb.com/title/tt0796366/?ref_=fn_t...,8.0,2009.0,127.0,504419,Action|Adventure|Sci-Fi,USA,English,box office hit|future|lifted by the throat|sta...
109,X-Men Origins: Wolverine,http://www.imdb.com/title/tt0458525/?ref_=fn_t...,6.7,2009.0,119.0,361924,Action|Adventure|Fantasy|Sci-Fi|Thriller,USA,English,army|civil war|claw fight|commando|wolverine t...
389,Hellboy II: The Golden Army,http://www.imdb.com/title/tt0411477/?ref_=fn_t...,7.0,2008.0,120.0,208422,Action|Adventure|Fantasy|Horror|Sci-Fi,USA,English,creature|elf|prince|rebellion|superhero



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
4059,Inside Job,http://www.imdb.com/title/tt1645089/?ref_=fn_t...,8.3,2010.0,105.0,55382,Crime|Documentary,USA,English,florida|iceland|interview|new york city new yo...
1416,Black Water Transit,http://www.imdb.com/title/tt0490087/?ref_=fn_t...,7.2,2009.0,108.056863,219,Crime|Drama,USA,English,based on novel
4687,The Trials of Darryl Hunt,http://www.imdb.com/title/tt0446055/?ref_=fn_t...,7.7,2006.0,106.0,771,Crime|Documentary,USA,English,false accusation|murder|north carolina|trial|w...
2333,Madea Goes to Jail,http://www.imdb.com/title/tt1142800/?ref_=fn_t...,4.1,2009.0,103.0,9544,Comedy|Crime|Drama,USA,English,adaptation directed by original author|cross d...
2218,The Frozen Ground,http://www.imdb.com/title/tt2005374/?ref_=fn_t...,6.4,2013.0,105.0,43879,Crime|Drama|Mystery|Thriller,USA,English,anchorage alaska|based on true story|pole danc...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
4169,From Here to Eternity,http://www.imdb.com/title/tt0045793/?ref_=fn_t...,7.8,1953.0,118.0,33987,Drama|Romance|War,USA,English,boxing|captain|hawaii|love|sergeant
4008,Show Boat,http://www.imdb.com/title/tt0044030/?ref_=fn_t...,7.0,1951.0,108.0,3538,Drama|Family|Musical|Romance,USA,English,gambling|interracial marriage|love|miscegenati...
4782,In the Company of Men,http://www.imdb.com/title/tt0119361/?ref_=fn_t...,7.3,1997.0,97.0,11550,Comedy|Drama,Canada,English,business trip|love|misogynist|office|secretary
4755,The Brothers McMullen,http://www.imdb.com/title/tt0112585/?ref_=fn_t...,6.6,1995.0,98.0,6375,Comedy|Drama|Romance,USA,English,abusive father|critically acclaimed|loss of fa...
4353,Dream with the Fishes,http://www.imdb.com/title/tt0119019/?ref_=fn_t...,7.0,1997.0,97.0,1524,Comedy|Drama,USA,English,dying|to do list|twenty something|underwear|vo...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
3753,The Geographer Drank His Globe Away,http://www.imdb.com/title/tt3155604/?ref_=fn_t...,7.5,2013.0,120.0,2488,Adventure|Drama,Russia,Russian,rafting|russia|school|student|teacher
4466,Destiny,http://www.imdb.com/title/tt2983582/?ref_=fn_t...,8.1,2014.0,108.056863,3089,Action|Adventure|Fantasy|Sci-Fi,USA,English,first person shooter
48,Star Trek Beyond,http://www.imdb.com/title/tt2660888/?ref_=fn_t...,7.5,2016.0,122.0,53607,Action|Adventure|Sci-Fi|Thriller,USA,English,hatred|sequel|space opera|star trek|third part
1765,Viy,http://www.imdb.com/title/tt1224378/?ref_=fn_t...,5.4,2014.0,107.0,3170,Adventure|Fantasy|Mystery|Thriller,Russia,Russian,cartographer|creature|foreign language adaptat...
3352,Stargate: The Ark of Truth,http://www.imdb.com/title/tt0942903/?ref_=fn_t...,7.4,2008.0,97.0,15862,Action|Adventure|Drama|Fantasy|Sci-Fi,USA,English,2000s|evil god|space opera|stargate|wormhole


Par-rapport à la méthode n°2, les résultats sont meilleurs pour le blockbuster (Avatar) et le film russe (Obitaemyy ostrov), pour lesquels la combinaison de genres permet de proposer des films similaires. Ils sont moins bons pour le documentaire (Capitalism: A Love Story) car il se focalise sur des films avec le genre 'Crime', les deux autres genres 'Documentary' et 'News' étant trop peu représentés. Et ils sont équivalents pour le film suèdois (je ne suis pas sûr qu'on puisse trouver de films semblables dans le dataset d'ailleurs).

###### Conclusion: La méthode a l'air de fonctionner mieux, mais un problème que l'on peut rencontrer est le suivant: par exemple si le film recherché a les genres 1 et 2, des films avec les genres 1, 3, 4, 5, 6 peuvent se retrouver dans la sélection pcq ils ont un genre en commun. Pour éviter ce phénomène, on pourrait revenir au one-hot encoding des genres.

## Méthode n°4: comme méthode n°3 mais avec du one-hot encoding sur genres

###### Principe: On reprend la méthode n°3 mais sauf que cette fois le genre n'est pas représenté par des colonnes de similitudes par-rapport au film recherché, mais par le one-hot encoding (utilisé dans la méthode n°1 par exemple). Cela va mécaniquement donner plus de poids au genre (24 features sur une trentaine) mais ce n'est pas très grave car c'est sûrement l'information la plus importante pour trouver des films similaires.

In [17]:
def recommend4(X, id_movie, nb_neighbors=31):
    X_temp = pd.concat([make_temp_df(X, id_movie, include_genres=False),
                        genres], axis=1)
    
    # First k-NN on genres+keywords+country+language without scaling first
    neighbors1 = NearestNeighbors(n_neighbors=nb_neighbors, algorithm='brute').fit(X_temp.iloc[:, 4:])
    knn1 = neighbors1.kneighbors(X_temp.iloc[id_movie, 4:].values.reshape(1, -1), return_distance=False)
        
    # Scaling on numerical features (except imdb_score)
    X_temp_std = StandardScaler().fit_transform(X_temp[['title_year', 'duration', 'num_voted_users']])
    X_temp_std = pd.concat([X_temp[['imdb_score']], pd.DataFrame(X_temp_std), X_temp.iloc[:, 4:]], axis=1)
        
    # Second k-NN only on numerical features (except imdb_score)
    neighbors2 = NearestNeighbors(n_neighbors=nb_neighbors, algorithm='brute').fit(X_temp_std.iloc[knn1[0], 1:4])
    knn2 = neighbors2.kneighbors(X_temp_std.iloc[id_movie, 1:4].values.reshape(1, -1))
    
    # Update distances with a coefficient depending on imdb_score
    # to help movies with a good score be at the top of the list
    scores = list(zip([knn1[0][x] for x in knn2[1].ravel()], knn2[0].ravel())) 
    new_scores = []
    for id, score in scores:
        new_scores.append((id, score / X_temp.iloc[id].imdb_score * 10))
    new_scores = sorted(new_scores, key=lambda x: x[1])

    # Remove any probable sequels
    movie_list = remove_sequels(id_movie, [x[0] for x in new_scores])
    
    print(f"\nMéthode n°4:")
    display(X_info.iloc[movie_list])

In [18]:
recommend4(X, 0)
recommend4(X, 2132)
recommend4(X, 4583)
recommend4(X, 1297)


Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
185,Pirates of the Caribbean: The Curse of the Bla...,http://www.imdb.com/title/tt0325980/?ref_=fn_t...,8.1,2003.0,143.0,809474,Action|Adventure|Fantasy,USA,English,caribbean|curse|governor|pirate|undead
83,Guardians of the Galaxy,http://www.imdb.com/title/tt2015381/?ref_=fn_t...,8.1,2014.0,121.0,682155,Action|Adventure|Sci-Fi,USA,English,bounty hunter|outer space|raccoon|talking anim...
1927,Star Wars: Episode V - The Empire Strikes Back,http://www.imdb.com/title/tt0080684/?ref_=fn_t...,8.8,1980.0,127.0,837759,Action|Adventure|Fantasy|Sci-Fi,USA,English,duel|famous twist|rebel|rescue|snowy landscape
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
2343,Aliens,http://www.imdb.com/title/tt0090605/?ref_=fn_t...,8.4,1986.0,154.0,488537,Action|Adventure|Sci-Fi,USA,English,alien|human versus alien|monster|rescue missio...



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2132,Capitalism: A Love Story,http://www.imdb.com/title/tt1232207/?ref_=fn_t...,7.4,2009.0,105.0,35137,Crime|Documentary|News,USA,English,capitalism|critique of capitalism|investment b...
4059,Inside Job,http://www.imdb.com/title/tt1645089/?ref_=fn_t...,8.3,2010.0,105.0,55382,Crime|Documentary,USA,English,florida|iceland|interview|new york city new yo...
4687,The Trials of Darryl Hunt,http://www.imdb.com/title/tt0446055/?ref_=fn_t...,7.7,2006.0,106.0,771,Crime|Documentary,USA,English,false accusation|murder|north carolina|trial|w...
4119,Slacker Uprising,http://www.imdb.com/title/tt0850669/?ref_=fn_t...,5.3,2007.0,102.0,2242,Documentary,USA,English,character name in title|election campaign|pres...
4331,"Food, Inc.",http://www.imdb.com/title/tt1286537/?ref_=fn_t...,7.9,2008.0,94.0,42389,Documentary,USA,English,farming|flesh eating|food|food industry|gluttony
4325,An Inconvenient Truth,http://www.imdb.com/title/tt0497116/?ref_=fn_t...,7.5,2006.0,96.0,67654,Documentary,USA,English,climate|earth|global warming|science|truth



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4583,Cries & Whispers,http://www.imdb.com/title/tt0069467/?ref_=fn_t...,8.2,1972.0,91.0,19964,Drama,Sweden,Swedish,dying|love|mansion|selfishness|turn of the cen...
3778,Midnight Cowboy,http://www.imdb.com/title/tt0064665/?ref_=fn_t...,7.9,1969.0,113.0,76616,Drama,USA,English,gay|homosexual|homosexuality|hustler|male rear...
3979,Jesus' Son,http://www.imdb.com/title/tt0186253/?ref_=fn_t...,7.0,1999.0,107.0,6221,Drama,USA,English,hospital|love|mennonite|shower|song
2425,Philadelphia,http://www.imdb.com/title/tt0107818/?ref_=fn_t...,7.7,1993.0,125.0,178731,Drama,USA,English,aids|city name in title|gay|homophobia|lawyer
4473,Naturally Native,http://www.imdb.com/title/tt0133117/?ref_=fn_t...,6.5,1998.0,107.0,103,Drama,USA,English,cosmetics|female protagonist|native american|n...
2417,White Oleander,http://www.imdb.com/title/tt0283139/?ref_=fn_t...,7.2,2002.0,109.0,25549,Drama,USA,English,california|foster home|love|oleander|prison



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1297,Obitaemyy ostrov,http://www.imdb.com/title/tt0972558/?ref_=fn_t...,5.3,2009.0,115.0,4387,Action|Adventure|Sci-Fi,Russia,Russian,22nd century|first part|friend|guard|space opera
48,Star Trek Beyond,http://www.imdb.com/title/tt2660888/?ref_=fn_t...,7.5,2016.0,122.0,53607,Action|Adventure|Sci-Fi|Thriller,USA,English,hatred|sequel|space opera|star trek|third part
80,Independence Day: Resurgence,http://www.imdb.com/title/tt1628841/?ref_=fn_t...,5.5,2016.0,120.0,58137,Action|Adventure|Sci-Fi,USA,English,alien|battle|defense|independence day|mothership
53,Jupiter Ascending,http://www.imdb.com/title/tt1617661/?ref_=fn_t...,5.4,2015.0,127.0,139593,Action|Adventure|Sci-Fi,USA,English,box office flop|critically bashed|planet earth...
95,Terminator Genisys,http://www.imdb.com/title/tt1340138/?ref_=fn_t...,6.6,2015.0,126.0,188457,Action|Adventure|Sci-Fi,USA,English,alternate timeline|cyborg|future|robot|time ma...
55,X-Men: Apocalypse,http://www.imdb.com/title/tt3385516/?ref_=fn_t...,7.3,2016.0,144.0,148379,Action|Adventure|Sci-Fi,USA,English,mutant|superhero|superhero team|x men|year 1983


Par-rapport à la méthode précédente, les résultats sont équivalents pour le blockbuster, le film suèdois et le film russe; mais ils se sont bien améliorés pour le documentaire, qui renvoie cette fois d'autres documentaires du même style.

###### Conclusion: Les résultats sont plutôt bons et satisfaisants avec cette méthode. Pour améliorer encore le résultat, une solution pourrait être de faire une analyse et une transformation plus poussée des mots-clés (ils ne sont pas toujours pertinents ici).

## 5. Comparaison des résultats

In [89]:
def compare(X, id_movie):
    recommend(X, id_movie, subset=['Genres'])
    recommend2(X, id_movie)
    recommend3(X, id_movie)
    recommend4(X, id_movie)

#### Film n°1: Avatar

In [211]:
compare(X, 0)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
213,Star Wars: Episode III - Revenge of the Sith,http://www.imdb.com/title/tt0121766/?ref_=fn_t...,7.6,2005.0,140.0,520104,Action|Adventure|Fantasy|Sci-Fi,USA,English,elongated cry of no|friends become enemies|kic...
738,The Avengers,http://www.imdb.com/title/tt0848228/?ref_=fn_t...,8.1,2012.0,173.0,995415,Action|Adventure|Sci-Fi,USA,English,alien invasion|assassin|battle|iron man|soldier
185,Pirates of the Caribbean: The Curse of the Bla...,http://www.imdb.com/title/tt0325980/?ref_=fn_t...,8.1,2003.0,143.0,809474,Action|Adventure|Fantasy,USA,English,caribbean|curse|governor|pirate|undead
39,X-Men: Days of Future Past,http://www.imdb.com/title/tt1877832/?ref_=fn_t...,8.0,2014.0,149.0,514125,Action|Adventure|Fantasy|Sci-Fi|Thriller,USA,English,dystopia|super strength|supernatural power|tim...



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
126,The Last Airbender,http://www.imdb.com/title/tt0938283/?ref_=fn_t...,4.2,2010.0,103.0,118951,Action|Adventure|Family|Fantasy,USA,English,avatar|fire|kingdom|tribe|water
4791,The Ridges,http://www.imdb.com/title/tt1781935/?ref_=fn_t...,3.0,2011.0,143.0,125,Drama|Horror|Thriller,USA,English,avatar|college|death|tron|university
551,Battle Los Angeles,http://www.imdb.com/title/tt1217613/?ref_=fn_t...,5.8,2011.0,116.0,154955,Action|Sci-Fi,USA,English,alien|extraterrestrial|invasion|marine|mission
705,Jack Ryan: Shadow Recruit,http://www.imdb.com/title/tt1205537/?ref_=fn_t...,6.2,2014.0,105.0,99035,Action|Drama|Thriller,USA,English,covert analyst|marine|russian|spy|stock market
1087,High Crimes,http://www.imdb.com/title/tt0257756/?ref_=fn_t...,6.3,2002.0,115.0,30077,Crime|Drama|Mystery|Thriller,USA,English,defense lawyer|lawyer|marine|murder|villager



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
2853,Star Wars: Episode IV - A New Hope,http://www.imdb.com/title/tt0076759/?ref_=fn_t...,8.7,1977.0,125.0,911097,Action|Adventure|Fantasy|Sci-Fi,USA,English,death star|empire|galactic war|princess|rebellion
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
143,Star Trek,http://www.imdb.com/title/tt0796366/?ref_=fn_t...,8.0,2009.0,127.0,504419,Action|Adventure|Sci-Fi,USA,English,box office hit|future|lifted by the throat|sta...
109,X-Men Origins: Wolverine,http://www.imdb.com/title/tt0458525/?ref_=fn_t...,6.7,2009.0,119.0,361924,Action|Adventure|Fantasy|Sci-Fi|Thriller,USA,English,army|civil war|claw fight|commando|wolverine t...
389,Hellboy II: The Golden Army,http://www.imdb.com/title/tt0411477/?ref_=fn_t...,7.0,2008.0,120.0,208422,Action|Adventure|Fantasy|Horror|Sci-Fi,USA,English,creature|elf|prince|rebellion|superhero



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
0,Avatar,http://www.imdb.com/title/tt0499549/?ref_=fn_t...,7.9,2009.0,178.0,886204,Action|Adventure|Fantasy|Sci-Fi,USA,English,avatar|future|marine|native|paraplegic
185,Pirates of the Caribbean: The Curse of the Bla...,http://www.imdb.com/title/tt0325980/?ref_=fn_t...,8.1,2003.0,143.0,809474,Action|Adventure|Fantasy,USA,English,caribbean|curse|governor|pirate|undead
83,Guardians of the Galaxy,http://www.imdb.com/title/tt2015381/?ref_=fn_t...,8.1,2014.0,121.0,682155,Action|Adventure|Sci-Fi,USA,English,bounty hunter|outer space|raccoon|talking anim...
1927,Star Wars: Episode V - The Empire Strikes Back,http://www.imdb.com/title/tt0080684/?ref_=fn_t...,8.8,1980.0,127.0,837759,Action|Adventure|Fantasy|Sci-Fi,USA,English,duel|famous twist|rebel|rescue|snowy landscape
13,Man of Steel,http://www.imdb.com/title/tt0770828/?ref_=fn_t...,7.2,2013.0,143.0,548573,Action|Adventure|Fantasy|Sci-Fi,USA,English,based on comic book|british actor playing amer...
2343,Aliens,http://www.imdb.com/title/tt0090605/?ref_=fn_t...,8.4,1986.0,154.0,488537,Action|Adventure|Sci-Fi,USA,English,alien|human versus alien|monster|rescue missio...


#### Film n°2: Spectre

In [101]:
compare(X, 2)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2,Spectre,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,6.8,2015.0,148.0,275868,Action|Adventure|Thriller,UK,English,bomb|espionage|sequel|spy|terrorist
194,The Bourne Legacy,http://www.imdb.com/title/tt1194173/?ref_=fn_t...,6.7,2012.0,135.0,229823,Action|Adventure|Thriller,USA,English,assassin|cia|drone|pill|wolf
121,Mission: Impossible - Rogue Nation,http://www.imdb.com/title/tt2381249/?ref_=fn_t...,7.4,2015.0,131.0,232187,Action|Adventure|Thriller,China,English,capture|computer hacker|mission|rogue agent|spy
243,Live Free or Die Hard,http://www.imdb.com/title/tt0337978/?ref_=fn_t...,7.2,2007.0,129.0,336235,Action|Adventure|Thriller,USA,English,fbi|hacker|independence day|police|terrorist
283,The Expendables 3,http://www.imdb.com/title/tt2333784/?ref_=fn_t...,6.1,2014.0,131.0,127258,Action|Adventure|Thriller,USA,English,battle|fight|mission|pg 13 sequel to r rated f...
134,Die Another Day,http://www.imdb.com/title/tt0246460/?ref_=fn_t...,6.1,2002.0,133.0,169914,Action|Adventure|Thriller,UK,English,catfight|clinic|colonel|diamond|patricide



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2,Spectre,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,6.8,2015.0,148.0,275868,Action|Adventure|Thriller,UK,English,bomb|espionage|sequel|spy|terrorist
2777,Casino Royale,http://www.imdb.com/title/tt0381061/?ref_=fn_t...,8.0,2006.0,144.0,470501,Action|Adventure|Thriller,UK,English,casino|espionage|free running|james bond|terro...
260,True Lies,http://www.imdb.com/title/tt0111503/?ref_=fn_t...,7.2,1994.0,141.0,190439,Action|Comedy|Thriller,USA,English,espionage|secret agent|secret mission|spy|spy ...
565,I Spy,http://www.imdb.com/title/tt0297181/?ref_=fn_t...,5.4,2002.0,97.0,41663,Action|Adventure|Comedy|Thriller,USA,English,boxer|espionage|spy|stealth|top secret
3229,Restless,http://www.imdb.com/title/tt2241676/?ref_=fn_t...,7.2,2012.0,180.0,2098,Drama|Romance,UK,English,espionage|motel|mother son relationship|murder...
879,Central Intelligence,http://www.imdb.com/title/tt1489889/?ref_=fn_t...,6.6,2016.0,107.0,33354,Action|Comedy|Crime,USA,English,accountant|blooper|espionage|facebook|high sch...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2,Spectre,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,6.8,2015.0,148.0,275868,Action|Adventure|Thriller,UK,English,bomb|espionage|sequel|spy|terrorist
591,Body of Lies,http://www.imdb.com/title/tt0758774/?ref_=fn_t...,7.1,2008.0,128.0,174248,Action|Drama|Thriller,USA,English,cia|jordan|middle east|spy|terrorist
2777,Casino Royale,http://www.imdb.com/title/tt0381061/?ref_=fn_t...,8.0,2006.0,144.0,470501,Action|Adventure|Thriller,UK,English,casino|espionage|free running|james bond|terro...
155,The World Is Not Enough,http://www.imdb.com/title/tt0143145/?ref_=fn_t...,6.4,1999.0,128.0,157519,Action|Adventure|Thriller,UK,English,british|oil|scientist|terrorist|tycoon
1152,The Count of Monte Cristo,http://www.imdb.com/title/tt0245844/?ref_=fn_t...,7.7,2002.0,131.0,104991,Action|Adventure|Drama|Romance|Thriller,UK,English,count|escape|island|revenge|sailor
327,Die Hard with a Vengeance,http://www.imdb.com/title/tt0112864/?ref_=fn_t...,7.6,1995.0,128.0,299258,Action|Adventure|Thriller,USA,English,bomb|detective|new york city|police|terrorist



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
2,Spectre,http://www.imdb.com/title/tt2379713/?ref_=fn_t...,6.8,2015.0,148.0,275868,Action|Adventure|Thriller,UK,English,bomb|espionage|sequel|spy|terrorist
139,Mission: Impossible - Ghost Protocol,http://www.imdb.com/title/tt1229238/?ref_=fn_t...,7.4,2011.0,133.0,365104,Action|Adventure|Thriller,USA,English,dubai|kremlin|race against time|russian|terrorist
243,Live Free or Die Hard,http://www.imdb.com/title/tt0337978/?ref_=fn_t...,7.2,2007.0,129.0,336235,Action|Adventure|Thriller,USA,English,fbi|hacker|independence day|police|terrorist
134,Die Another Day,http://www.imdb.com/title/tt0246460/?ref_=fn_t...,6.1,2002.0,133.0,169914,Action|Adventure|Thriller,UK,English,catfight|clinic|colonel|diamond|patricide
2777,Casino Royale,http://www.imdb.com/title/tt0381061/?ref_=fn_t...,8.0,2006.0,144.0,470501,Action|Adventure|Thriller,UK,English,casino|espionage|free running|james bond|terro...
155,The World Is Not Enough,http://www.imdb.com/title/tt0143145/?ref_=fn_t...,6.4,1999.0,128.0,157519,Action|Adventure|Thriller,UK,English,british|oil|scientist|terrorist|tycoon


#### Film n°3: Toy Story 3

In [212]:
compare(X, 35)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
35,Toy Story 3,http://www.imdb.com/title/tt0435761/?ref_=fn_t...,8.3,2010.0,103.0,544884,Adventure|Animation|Comedy|Family|Fantasy,USA,English,college|day care|escape|teddy bear|toy
215,"Monsters, Inc.",http://www.imdb.com/title/tt0198781/?ref_=fn_t...,8.1,2001.0,92.0,585659,Adventure|Animation|Comedy|Family|Fantasy,USA,English,friend|little girl|monster|rival|scream
864,Shrek,http://www.imdb.com/title/tt0126029/?ref_=fn_t...,7.9,2001.0,90.0,467113,Adventure|Animation|Comedy|Family|Fantasy,USA,English,donkey|fairy tale|ogre|princess|swamp
81,How to Train Your Dragon,http://www.imdb.com/title/tt0892769/?ref_=fn_t...,8.2,2010.0,98.0,485430,Adventure|Animation|Family|Fantasy,USA,English,dragon|island|training|viking|village
47,Brave,http://www.imdb.com/title/tt1217209/?ref_=fn_t...,7.2,2012.0,93.0,273556,Adventure|Animation|Comedy|Family|Fantasy,USA,English,archery|coming of age|female warrior|princess|...
68,Inside Out,http://www.imdb.com/title/tt2096673/?ref_=fn_t...,8.3,2015.0,95.0,345198,Adventure|Animation|Comedy|Drama|Family|Fantasy,USA,English,anger|joy|memory|running away|sadness



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
35,Toy Story 3,http://www.imdb.com/title/tt0435761/?ref_=fn_t...,8.3,2010.0,103.0,544884,Adventure|Animation|Comedy|Family|Fantasy,USA,English,college|day care|escape|teddy bear|toy
868,Ted,http://www.imdb.com/title/tt1637725/?ref_=fn_t...,7.0,2012.0,112.0,471644,Comedy|Fantasy,USA,English,2010s|car accident|sex scene|teddy bear|testicle
2257,Big,http://www.imdb.com/title/tt0094737/?ref_=fn_t...,7.3,1988.0,130.0,153468,Comedy|Drama|Family|Fantasy|Romance,USA,English,friend|job|new york city|toy|wish
1150,Small Soldiers,http://www.imdb.com/title/tt0122718/?ref_=fn_t...,6.1,1998.0,108.0,77415,Action|Adventure|Comedy|Family|Sci-Fi,USA,English,action figure|battle|computer|military|toy
606,The Santa Clause 2,http://www.imdb.com/title/tt0304669/?ref_=fn_t...,5.5,2002.0,104.0,34561,Comedy|Family|Fantasy,USA,English,christmas|magic|santa claus|son|toy
695,Jingle All the Way,http://www.imdb.com/title/tt0116705/?ref_=fn_t...,5.4,1996.0,94.0,68406,Comedy|Family,USA,English,action figure|christmas|hero|karate|toy



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
35,Toy Story 3,http://www.imdb.com/title/tt0435761/?ref_=fn_t...,8.3,2010.0,103.0,544884,Adventure|Animation|Comedy|Family|Fantasy,USA,English,college|day care|escape|teddy bear|toy
864,Shrek,http://www.imdb.com/title/tt0126029/?ref_=fn_t...,7.9,2001.0,90.0,467113,Adventure|Animation|Comedy|Family|Fantasy,USA,English,donkey|fairy tale|ogre|princess|swamp
5,Tangled,http://www.imdb.com/title/tt0398286/?ref_=fn_t...,7.8,2010.0,100.0,294810,Adventure|Animation|Comedy|Family|Fantasy|Musi...,USA,English,17th century|based on fairy tale|disney|flower...
47,Brave,http://www.imdb.com/title/tt1217209/?ref_=fn_t...,7.2,2012.0,93.0,273556,Adventure|Animation|Comedy|Family|Fantasy,USA,English,archery|coming of age|female warrior|princess|...
29,Monsters University,http://www.imdb.com/title/tt1453405/?ref_=fn_t...,7.3,2013.0,104.0,235025,Adventure|Animation|Comedy|Family|Fantasy,USA,English,cheating|fraternity|monster|singing in a car|u...
477,Madagascar,http://www.imdb.com/title/tt0351283/?ref_=fn_t...,6.9,2005.0,86.0,266636,Adventure|Animation|Comedy|Family,USA,English,escape|jungle|lemur|madagascar|zoo



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
35,Toy Story 3,http://www.imdb.com/title/tt0435761/?ref_=fn_t...,8.3,2010.0,103.0,544884,Adventure|Animation|Comedy|Family|Fantasy,USA,English,college|day care|escape|teddy bear|toy
57,Up,http://www.imdb.com/title/tt1049413/?ref_=fn_t...,8.3,2009.0,96.0,665575,Adventure|Animation|Comedy|Family,USA,English,balloon|house|promise|retirement|skeleton
215,"Monsters, Inc.",http://www.imdb.com/title/tt0198781/?ref_=fn_t...,8.1,2001.0,92.0,585659,Adventure|Animation|Comedy|Family|Fantasy,USA,English,friend|little girl|monster|rival|scream
864,Shrek,http://www.imdb.com/title/tt0126029/?ref_=fn_t...,7.9,2001.0,90.0,467113,Adventure|Animation|Comedy|Family|Fantasy,USA,English,donkey|fairy tale|ogre|princess|swamp
68,Inside Out,http://www.imdb.com/title/tt2096673/?ref_=fn_t...,8.3,2015.0,95.0,345198,Adventure|Animation|Comedy|Drama|Family|Fantasy,USA,English,anger|joy|memory|running away|sadness
47,Brave,http://www.imdb.com/title/tt1217209/?ref_=fn_t...,7.2,2012.0,93.0,273556,Adventure|Animation|Comedy|Family|Fantasy,USA,English,archery|coming of age|female warrior|princess|...


#### Film n°4: Waterworld

In [121]:
compare(X, 66)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
66,Waterworld,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,6.1,1995.0,176.0,144337,Action|Adventure|Sci-Fi|Thriller,USA,English,future|sail|sea|smoker|water
135,Armageddon,http://www.imdb.com/title/tt0120591/?ref_=fn_t...,6.6,1998.0,153.0,322395,Action|Adventure|Sci-Fi|Thriller,USA,English,asteroid|astronaut|bomb|meteorite|outer space
383,The Core,http://www.imdb.com/title/tt0298814/?ref_=fn_t...,5.4,2003.0,135.0,77029,Action|Adventure|Sci-Fi|Thriller,USA,English,core|earth|natural disaster|pacemaker|scientist
24,Battleship,http://www.imdb.com/title/tt1440129/?ref_=fn_t...,5.9,2012.0,131.0,202382,Action|Adventure|Sci-Fi|Thriller,USA,English,box office flop|hawaii|naval|oahu hawaii|ship
1040,Dune,http://www.imdb.com/title/tt0087182/?ref_=fn_t...,6.6,1984.0,177.0,97087,Action|Adventure|Sci-Fi,USA,English,desert planet|dune|giant worm|space travel|spice
9,Superman Returns,http://www.imdb.com/title/tt0348150/?ref_=fn_t...,6.1,2006.0,169.0,240396,Action|Adventure|Sci-Fi,USA,English,crystal|epic|lex luthor|lois lane|return to earth



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
66,Waterworld,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,6.1,1995.0,176.0,144337,Action|Adventure|Sci-Fi|Thriller,USA,English,future|sail|sea|smoker|water
3574,Sea Rex 3D: Journey to a Prehistoric World,http://www.imdb.com/title/tt1529567/?ref_=fn_t...,6.9,2010.0,41.0,296,Documentary,UK,English,3d|imax|plesiosaur|sea|water
199,The Perfect Storm,http://www.imdb.com/title/tt0177971/?ref_=fn_t...,6.4,2000.0,130.0,133076,Action|Adventure|Drama|Thriller,USA,English,death|fish|fishing|sea|storm
3165,All Is Lost,http://www.imdb.com/title/tt2017038/?ref_=fn_t...,6.9,2013.0,106.0,59545,Action|Adventure|Drama,USA,English,boat|container|sea|shipping container|storm
4666,"20,000 Leagues Under the Sea",http://www.imdb.com/title/tt0046672/?ref_=fn_t...,7.2,1954.0,127.0,22124,Adventure|Drama|Family|Fantasy|Sci-Fi,USA,English,captain|expedition|sea|submarine|whale
941,The Life Aquatic with Steve Zissou,http://www.imdb.com/title/tt0362270/?ref_=fn_t...,7.3,2004.0,119.0,139535,Adventure|Comedy|Drama,USA,English,expedition|oceanographer|sea|shark|team



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
66,Waterworld,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,6.1,1995.0,176.0,144337,Action|Adventure|Sci-Fi|Thriller,USA,English,future|sail|sea|smoker|water
383,The Core,http://www.imdb.com/title/tt0298814/?ref_=fn_t...,5.4,2003.0,135.0,77029,Action|Adventure|Sci-Fi|Thriller,USA,English,core|earth|natural disaster|pacemaker|scientist
429,Lost in Space,http://www.imdb.com/title/tt0120738/?ref_=fn_t...,5.1,1998.0,130.0,58402,Action|Adventure|Family|Sci-Fi|Thriller,USA,English,based on tv series|remake|robot|scientist|spac...
24,Battleship,http://www.imdb.com/title/tt1440129/?ref_=fn_t...,5.9,2012.0,131.0,202382,Action|Adventure|Sci-Fi|Thriller,USA,English,box office flop|hawaii|naval|oahu hawaii|ship
1261,Star Trek: Generations,http://www.imdb.com/title/tt0111280/?ref_=fn_t...,6.6,1994.0,118.0,60504,Action|Adventure|Mystery|Sci-Fi|Thriller,USA,English,23rd century|bare chested male bondage|enterpr...
1252,Serenity,http://www.imdb.com/title/tt0379786/?ref_=fn_t...,8.0,2005.0,119.0,242599,Action|Adventure|Sci-Fi|Thriller,USA,English,alliance|cannibalism|future|operative|planet



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
66,Waterworld,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,6.1,1995.0,176.0,144337,Action|Adventure|Sci-Fi|Thriller,USA,English,future|sail|sea|smoker|water
135,Armageddon,http://www.imdb.com/title/tt0120591/?ref_=fn_t...,6.6,1998.0,153.0,322395,Action|Adventure|Sci-Fi|Thriller,USA,English,asteroid|astronaut|bomb|meteorite|outer space
383,The Core,http://www.imdb.com/title/tt0298814/?ref_=fn_t...,5.4,2003.0,135.0,77029,Action|Adventure|Sci-Fi|Thriller,USA,English,core|earth|natural disaster|pacemaker|scientist
24,Battleship,http://www.imdb.com/title/tt1440129/?ref_=fn_t...,5.9,2012.0,131.0,202382,Action|Adventure|Sci-Fi|Thriller,USA,English,box office flop|hawaii|naval|oahu hawaii|ship
1252,Serenity,http://www.imdb.com/title/tt0379786/?ref_=fn_t...,8.0,2005.0,119.0,242599,Action|Adventure|Sci-Fi|Thriller,USA,English,alliance|cannibalism|future|operative|planet
723,Star Trek: Nemesis,http://www.imdb.com/title/tt0253754/?ref_=fn_t...,6.4,2002.0,116.0,58450,Action|Adventure|Sci-Fi|Thriller,USA,English,earth|federation|romulan|slave|space


#### Film n°5: Destination finale 2

In [213]:
compare(X, 1733)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1733,Final Destination 2,http://www.imdb.com/title/tt0309593/?ref_=fn_t...,6.2,2003.0,90.0,120786,Horror|Thriller,USA,English,car accident|death|freak accident|hospital|pre...
2973,Wrong Turn,http://www.imdb.com/title/tt0295700/?ref_=fn_t...,6.1,2003.0,84.0,87494,Horror|Thriller,USA,English,forest|mountain|road|stupid victim|west virginia
1548,Drag Me to Hell,http://www.imdb.com/title/tt1127180/?ref_=fn_t...,6.6,2009.0,99.0,158354,Horror|Thriller,USA,English,evil|fight|hell|psychic|psychologist
4097,Eden Lake,http://www.imdb.com/title/tt1020530/?ref_=fn_t...,6.8,2008.0,91.0,57811,Horror|Thriller,UK,English,camping|forest|lake|stabbed with glass|vomiting
2236,The Crazies,http://www.imdb.com/title/tt0455407/?ref_=fn_t...,6.5,2010.0,101.0,93272,Horror|Thriller,USA,English,deputy|iowa|rural setting|sheriff|survival
1200,The Reaping,http://www.imdb.com/title/tt0444682/?ref_=fn_t...,5.7,2007.0,99.0,37412,Horror|Thriller,USA,English,biblical plague|louisiana|missionary|plague|sm...



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1733,Final Destination 2,http://www.imdb.com/title/tt0309593/?ref_=fn_t...,6.2,2003.0,90.0,120786,Horror|Thriller,USA,English,car accident|death|freak accident|hospital|pre...
3263,Extract,http://www.imdb.com/title/tt1225822/?ref_=fn_t...,6.2,2009.0,92.0,37530,Comedy|Crime|Romance,USA,English,freak accident|gigolo|hit in the crotch|nosy n...
161,Turbo,http://www.imdb.com/title/tt1860353/?ref_=fn_t...,6.5,2013.0,96.0,62424,Adventure|Animation|Comedy|Family|Sport,USA,English,accident|freak accident|race|snail|underdog
3340,The Heart of Me,http://www.imdb.com/title/tt0301390/?ref_=fn_t...,6.7,2002.0,96.0,1966,Drama|Romance,UK,English,1930s|car accident|hospital|marital rape|painter
1980,The Glass House,http://www.imdb.com/title/tt0221218/?ref_=fn_t...,5.8,2001.0,106.0,25462,Crime|Drama|Mystery|Thriller,USA,English,car|car accident|death|friend|glass
3188,Shutter,http://www.imdb.com/title/tt0482599/?ref_=fn_t...,5.2,2008.0,90.0,26609,Horror|Mystery|Thriller,USA,English,car accident|japan|newlywed|photograph|photogr...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1733,Final Destination 2,http://www.imdb.com/title/tt0309593/?ref_=fn_t...,6.2,2003.0,90.0,120786,Horror|Thriller,USA,English,car accident|death|freak accident|hospital|pre...
2615,Urban Legend,http://www.imdb.com/title/tt0146336/?ref_=fn_t...,5.5,1998.0,99.0,47814,Horror|Mystery|Thriller,USA,English,campus|death|friend|professor|urban legend
1092,Red Riding Hood,http://www.imdb.com/title/tt1486185/?ref_=fn_t...,5.4,2011.0,100.0,91151,Fantasy|Horror|Mystery|Thriller,USA,English,death|forest|love|werewolf|werewolf bite
3188,Shutter,http://www.imdb.com/title/tt0482599/?ref_=fn_t...,5.2,2008.0,90.0,26609,Horror|Mystery|Thriller,USA,English,car accident|japan|newlywed|photograph|photogr...
1435,The Watcher,http://www.imdb.com/title/tt0204626/?ref_=fn_t...,5.3,2000.0,97.0,22220,Crime|Horror|Mystery|Thriller,USA,English,fbi|murder|scene of the crime|serial killer|th...
2610,My Bloody Valentine,http://www.imdb.com/title/tt1179891/?ref_=fn_t...,5.5,2009.0,101.0,45603,Horror|Thriller,USA,English,coma|death|miner|valentine|valentine's day



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1733,Final Destination 2,http://www.imdb.com/title/tt0309593/?ref_=fn_t...,6.2,2003.0,90.0,120786,Horror|Thriller,USA,English,car accident|death|freak accident|hospital|pre...
2973,Wrong Turn,http://www.imdb.com/title/tt0295700/?ref_=fn_t...,6.1,2003.0,84.0,87494,Horror|Thriller,USA,English,forest|mountain|road|stupid victim|west virginia
2236,The Crazies,http://www.imdb.com/title/tt0455407/?ref_=fn_t...,6.5,2010.0,101.0,93272,Horror|Thriller,USA,English,deputy|iowa|rural setting|sheriff|survival
3946,Diary of the Dead,http://www.imdb.com/title/tt0848557/?ref_=fn_t...,5.7,2007.0,95.0,40800,Horror,USA,English,barn|film student|hospital|mummy|video camera
2475,When a Stranger Calls,http://www.imdb.com/title/tt0455857/?ref_=fn_t...,5.0,2006.0,87.0,34711,Horror|Thriller,USA,English,babysitting|cell phone|high school|nightmare|t...
1200,The Reaping,http://www.imdb.com/title/tt0444682/?ref_=fn_t...,5.7,2007.0,99.0,37412,Horror|Thriller,USA,English,biblical plague|louisiana|missionary|plague|sm...


#### Film n°6: Cloud Atlas

In [214]:
compare(X, 278)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
278,Cloud Atlas,http://www.imdb.com/title/tt1371111/?ref_=fn_t...,7.5,2012.0,172.0,284825,Drama|Sci-Fi,Germany,English,composer|future|letter|nonlinear timeline|nurs...
3343,Hard to Be a God,http://www.imdb.com/title/tt2328813/?ref_=fn_t...,6.7,2013.0,177.0,2518,Drama|Sci-Fi,Russia,Russian,fictional war|hanging|male full frontal nudity...
3096,Melancholia,http://www.imdb.com/title/tt1527186/?ref_=fn_t...,7.1,2011.0,130.0,128729,Drama|Sci-Fi,Denmark,English,art director|breasts|depression|outdoor sex|ri...
2544,Brazil,http://www.imdb.com/title/tt0088846/?ref_=fn_t...,8.0,1985.0,142.0,152306,Drama|Sci-Fi,UK,English,black comedy|bureaucracy|dream|terrorist|wrong...
334,A.I. Artificial Intelligence,http://www.imdb.com/title/tt0212720/?ref_=fn_t...,7.1,2001.0,146.0,238747,Adventure|Drama|Sci-Fi,USA,English,affection|boy|fairy|future|robot
251,The Martian,http://www.imdb.com/title/tt3659388/?ref_=fn_t...,8.1,2015.0,151.0,472488,Adventure|Drama|Sci-Fi,USA,English,astronaut|international cooperation|left for d...



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
278,Cloud Atlas,http://www.imdb.com/title/tt1371111/?ref_=fn_t...,7.5,2012.0,172.0,284825,Drama|Sci-Fi,Germany,English,composer|future|letter|nonlinear timeline|nurs...
1509,The Notebook,http://www.imdb.com/title/tt0332280/?ref_=fn_t...,7.9,2004.0,123.0,396396,Drama|Romance,USA,English,class differences|letter|nursing home|second c...
2270,Amadeus,http://www.imdb.com/title/tt0086879/?ref_=fn_t...,8.3,1984.0,180.0,270790,Biography|Drama|History|Music,USA,English,1800s|classical composer|composer|first person...
366,The Holiday,http://www.imdb.com/title/tt0457939/?ref_=fn_t...,6.9,2006.0,138.0,182757,Comedy|Romance,USA,English,book|composer|house|love|self esteem
2538,De-Lovely,http://www.imdb.com/title/tt0352277/?ref_=fn_t...,6.6,2004.0,125.0,9649,Biography|Drama|Music|Musical,USA,English,cole porter|composer|love|party|tears
1943,Marvin's Room,http://www.imdb.com/title/tt0116999/?ref_=fn_t...,6.7,1996.0,98.0,20163,Drama,USA,English,house|leukemia|mental institution|nursing home...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
278,Cloud Atlas,http://www.imdb.com/title/tt1371111/?ref_=fn_t...,7.5,2012.0,172.0,284825,Drama|Sci-Fi,Germany,English,composer|future|letter|nonlinear timeline|nurs...
334,A.I. Artificial Intelligence,http://www.imdb.com/title/tt0212720/?ref_=fn_t...,7.1,2001.0,146.0,238747,Adventure|Drama|Sci-Fi,USA,English,affection|boy|fairy|future|robot
1934,Her,http://www.imdb.com/title/tt1798709/?ref_=fn_t...,8.0,2013.0,126.0,355126,Drama|Romance|Sci-Fi,USA,English,divorce|female frontal nudity|future|lonelines...
66,Waterworld,http://www.imdb.com/title/tt0114898/?ref_=fn_t...,6.1,1995.0,176.0,144337,Action|Adventure|Sci-Fi|Thriller,USA,English,future|sail|sea|smoker|water
1509,The Notebook,http://www.imdb.com/title/tt0332280/?ref_=fn_t...,7.9,2004.0,123.0,396396,Drama|Romance,USA,English,class differences|letter|nursing home|second c...
1518,Looper,http://www.imdb.com/title/tt1276104/?ref_=fn_t...,7.4,2012.0,119.0,428916,Action|Crime|Drama|Sci-Fi|Thriller,USA,English,chase|future|surprise ending|time loop|time tr...



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
278,Cloud Atlas,http://www.imdb.com/title/tt1371111/?ref_=fn_t...,7.5,2012.0,172.0,284825,Drama|Sci-Fi,Germany,English,composer|future|letter|nonlinear timeline|nurs...
251,The Martian,http://www.imdb.com/title/tt3659388/?ref_=fn_t...,8.1,2015.0,151.0,472488,Adventure|Drama|Sci-Fi,USA,English,astronaut|international cooperation|left for d...
334,A.I. Artificial Intelligence,http://www.imdb.com/title/tt0212720/?ref_=fn_t...,7.1,2001.0,146.0,238747,Adventure|Drama|Sci-Fi,USA,English,affection|boy|fairy|future|robot
1934,Her,http://www.imdb.com/title/tt1798709/?ref_=fn_t...,8.0,2013.0,126.0,355126,Drama|Romance|Sci-Fi,USA,English,divorce|female frontal nudity|future|lonelines...
1509,The Notebook,http://www.imdb.com/title/tt0332280/?ref_=fn_t...,7.9,2004.0,123.0,396396,Drama|Romance,USA,English,class differences|letter|nursing home|second c...
2127,21 Grams,http://www.imdb.com/title/tt0315733/?ref_=fn_t...,7.7,2003.0,124.0,189683,Drama,USA,English,accident|faith|love|mathematician|nonlinear ti...


#### Film n°7: Amélie Poulain

In [215]:
compare(X, 1218)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1218,Amélie,http://www.imdb.com/title/tt0211915/?ref_=fn_t...,8.4,2001.0,122.0,534262,Comedy|Romance,France,French,cheerfulness|female protagonist|garden gnome|m...
1723,The 40-Year-Old Virgin,http://www.imdb.com/title/tt0405422/?ref_=fn_t...,7.1,2005.0,133.0,313797,Comedy|Romance,USA,English,40 year old|car accident|cheating on girlfrien...
1688,Knocked Up,http://www.imdb.com/title/tt0478311/?ref_=fn_t...,7.0,2007.0,133.0,298590,Comedy|Romance,USA,English,one night stand|pregnancy|slacker|unplanned pr...
2010,Silver Linings Playbook,http://www.imdb.com/title/tt1045658/?ref_=fn_t...,7.8,2012.0,122.0,533607,Comedy|Drama|Romance,USA,English,adulterous wife|bipolar disorder|obsessive com...
1956,There's Something About Mary,http://www.imdb.com/title/tt0129387/?ref_=fn_t...,7.1,1998.0,107.0,247289,Comedy|Romance,USA,English,accident|curtain call|dream girl|high school|prom
2276,Birdman or (The Unexpected Virtue of Ignorance),http://www.imdb.com/title/tt2562232/?ref_=fn_t...,7.8,2014.0,119.0,395087,Comedy|Drama|Romance,USA,English,actor|artist|broadway play|replacement|stage



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1218,Amélie,http://www.imdb.com/title/tt0211915/?ref_=fn_t...,8.4,2001.0,122.0,534262,Comedy|Romance,France,French,cheerfulness|female protagonist|garden gnome|m...
2049,Edward Scissorhands,http://www.imdb.com/title/tt0099487/?ref_=fn_t...,7.9,1990.0,105.0,357581,Fantasy|Romance,USA,English,creation|inventor|magical realism|scissors|suburb
2456,"Crouching Tiger, Hidden Dragon",http://www.imdb.com/title/tt0190332/?ref_=fn_t...,7.9,2000.0,120.0,217740,Action|Drama|Romance,Taiwan,Mandarin,bare midriff|china|magical realism|martial art...
2696,Being John Malkovich,http://www.imdb.com/title/tt0120601/?ref_=fn_t...,7.8,1999.0,112.0,254404,Comedy|Drama|Fantasy,USA,English,body swap|magical realism|portal|puppeteer|sur...
1423,Ponyo,http://www.imdb.com/title/tt0876563/?ref_=fn_t...,7.7,2008.0,101.0,85589,Adventure|Animation|Family|Fantasy,Japan,Japanese,children adventure|magical realism|sea goddess...
4183,Waitress,http://www.imdb.com/title/tt0473308/?ref_=fn_t...,7.1,2007.0,108.0,37714,Comedy|Drama|Romance,USA,English,domestic violence|pregnancy|unhappy marriage|u...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1218,Amélie,http://www.imdb.com/title/tt0211915/?ref_=fn_t...,8.4,2001.0,122.0,534262,Comedy|Romance,France,French,cheerfulness|female protagonist|garden gnome|m...
3086,Amour,http://www.imdb.com/title/tt1602620/?ref_=fn_t...,7.9,2012.0,127.0,70382,Drama|Romance,France,French,aging|daughter|old couple|old love|stroke
2712,"Paris, je t'aime",http://www.imdb.com/title/tt0401711/?ref_=fn_t...,7.3,2006.0,120.0,63084,Comedy|Drama|Romance,France,French,baby|divorce|oscar wilde|pigalle|tourist
1993,Joyeux Noel,http://www.imdb.com/title/tt0424205/?ref_=fn_t...,7.8,2005.0,116.0,21394,Drama|History|Music|Romance|War,France,French,christmas|no man's land|soldier|trench|world w...
3563,The Barbarian Invasions,http://www.imdb.com/title/tt0338135/?ref_=fn_t...,7.7,2003.0,112.0,24921,Comedy|Crime|Drama|Mystery|Romance,Canada,French,cancer|dying|friend|money|socialist
3458,L'auberge espagnole,http://www.imdb.com/title/tt0283900/?ref_=fn_t...,7.3,2002.0,111.0,34383,Comedy|Drama,France,French,apartment|erasmus|language|student|trip



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
1218,Amélie,http://www.imdb.com/title/tt0211915/?ref_=fn_t...,8.4,2001.0,122.0,534262,Comedy|Romance,France,French,cheerfulness|female protagonist|garden gnome|m...
1956,There's Something About Mary,http://www.imdb.com/title/tt0129387/?ref_=fn_t...,7.1,1998.0,107.0,247289,Comedy|Romance,USA,English,accident|curtain call|dream girl|high school|prom
782,Hitch,http://www.imdb.com/title/tt0386588/?ref_=fn_t...,6.6,2005.0,118.0,244840,Comedy|Romance,USA,English,advice|friendship|gossip|newspaper|quitting a job
2400,The Artist,http://www.imdb.com/title/tt1655442/?ref_=fn_t...,8.0,2011.0,100.0,190030,Comedy|Drama|Romance,France,English,1920s|jack russell terrier|modern silent movie...
2712,"Paris, je t'aime",http://www.imdb.com/title/tt0401711/?ref_=fn_t...,7.3,2006.0,120.0,63084,Comedy|Drama|Romance,France,French,baby|divorce|oscar wilde|pigalle|tourist
2660,Clueless,http://www.imdb.com/title/tt0112697/?ref_=fn_t...,6.8,1995.0,97.0,123390,Comedy|Romance,USA,English,female protagonist|high school|makeover|matchm...


#### Film n°8: Bruce Tout-Puissant

In [216]:
compare(X, 392)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
392,Bruce Almighty,http://www.imdb.com/title/tt0315327/?ref_=fn_t...,6.7,2003.0,101.0,296904,Comedy|Drama,USA,English,answer to prayer|breast expansion|pleading wit...
1287,Burn After Reading,http://www.imdb.com/title/tt0887883/?ref_=fn_t...,7.0,2008.0,96.0,248468,Comedy|Drama,USA,English,cia|divorce|embassy|gym|memoir
611,Due Date,http://www.imdb.com/title/tt1231583/?ref_=fn_t...,6.6,2010.0,95.0,272789,Comedy|Drama,USA,English,actor|aspiring actor|birth|misadventure|suitcase
1654,The Royal Tenenbaums,http://www.imdb.com/title/tt0265666/?ref_=fn_t...,7.6,2001.0,110.0,209133,Comedy|Drama,USA,English,brother brother relationship|family relationsh...
1490,Big Daddy,http://www.imdb.com/title/tt0142342/?ref_=fn_t...,6.4,1999.0,93.0,161858,Comedy|Drama,USA,English,homosexual|law|law school|responsibility|vomiting
3180,Little Miss Sunshine,http://www.imdb.com/title/tt0449059/?ref_=fn_t...,7.9,2006.0,101.0,355810,Comedy|Drama,USA,English,gay|graduate student|sister sister relationshi...



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
392,Bruce Almighty,http://www.imdb.com/title/tt0315327/?ref_=fn_t...,6.7,2003.0,101.0,296904,Comedy|Drama,USA,English,answer to prayer|breast expansion|pleading wit...
2388,The Illusionist,http://www.imdb.com/title/tt0443543/?ref_=fn_t...,7.6,2006.0,110.0,295375,Drama|Mystery|Romance|Thriller,USA,English,duchess|illusionist|magician|prince|supernatur...
768,The Devil's Advocate,http://www.imdb.com/title/tt0118971/?ref_=fn_t...,7.5,1997.0,136.0,259519,Drama|Mystery|Thriller,USA,English,deal with the devil|lawyer|money|reality|super...
1842,The Box,http://www.imdb.com/title/tt0362478/?ref_=fn_t...,5.6,2009.0,115.0,76303,Drama|Fantasy|Mystery|Thriller,USA,English,alien|library|moral dilemma|nosebleed|supernat...
1574,My Super Ex-Girlfriend,http://www.imdb.com/title/tt0465624/?ref_=fn_t...,5.1,2006.0,95.0,53884,Comedy|Romance|Sci-Fi,USA,English,architect|boyfriend girlfriend relationship|su...
1364,Queen of the Damned,http://www.imdb.com/title/tt0238546/?ref_=fn_t...,5.2,2002.0,101.0,43991,Drama|Fantasy|Horror,USA,English,fatal attraction|interspecies romance|queen|su...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
392,Bruce Almighty,http://www.imdb.com/title/tt0315327/?ref_=fn_t...,6.7,2003.0,101.0,296904,Comedy|Drama,USA,English,answer to prayer|breast expansion|pleading wit...
386,Click,http://www.imdb.com/title/tt0389860/?ref_=fn_t...,6.4,2006.0,107.0,246492,Comedy|Drama|Fantasy|Romance,USA,English,architect|frozen time|obese man|remote control...
611,Due Date,http://www.imdb.com/title/tt1231583/?ref_=fn_t...,6.6,2010.0,95.0,272789,Comedy|Drama,USA,English,actor|aspiring actor|birth|misadventure|suitcase
321,The Secret Life of Walter Mitty,http://www.imdb.com/title/tt0359950/?ref_=fn_t...,7.3,2013.0,114.0,236421,Adventure|Comedy|Drama|Fantasy|Romance,USA,English,daydream|life magazine|magazine|photographer|s...
489,The Terminal,http://www.imdb.com/title/tt0362227/?ref_=fn_t...,7.3,2004.0,128.0,303864,Comedy|Drama,USA,English,airport|construction site|fish out of water|fl...
399,Lilo & Stitch,http://www.imdb.com/title/tt0275847/?ref_=fn_t...,7.2,2002.0,85.0,117212,Adventure|Animation|Comedy|Drama|Family|Fantas...,USA,English,alien|escape|fugitive|hawaii|social worker



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
392,Bruce Almighty,http://www.imdb.com/title/tt0315327/?ref_=fn_t...,6.7,2003.0,101.0,296904,Comedy|Drama,USA,English,answer to prayer|breast expansion|pleading wit...
3180,Little Miss Sunshine,http://www.imdb.com/title/tt0449059/?ref_=fn_t...,7.9,2006.0,101.0,355810,Comedy|Drama,USA,English,gay|graduate student|sister sister relationshi...
1654,The Royal Tenenbaums,http://www.imdb.com/title/tt0265666/?ref_=fn_t...,7.6,2001.0,110.0,209133,Comedy|Drama,USA,English,brother brother relationship|family relationsh...
3254,Thank You for Smoking,http://www.imdb.com/title/tt0427944/?ref_=fn_t...,7.6,2005.0,92.0,191998,Comedy|Drama,USA,English,cigarette smoking|lobbyist|political satire|re...
2394,Dead Poets Society,http://www.imdb.com/title/tt0097165/?ref_=fn_t...,8.0,1989.0,128.0,277451,Comedy|Drama,USA,English,education|english teacher|poet|professor|student
3781,Friday,http://www.imdb.com/title/tt0113118/?ref_=fn_t...,7.3,1995.0,97.0,74626,Comedy|Drama,USA,English,bully|drug dealer|first part|neighbor|neighbor...


#### Film n°9: A Beautiful Mind

In [217]:
compare(X, 461)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
461,A Beautiful Mind,http://www.imdb.com/title/tt0268978/?ref_=fn_t...,8.2,2001.0,135.0,610568,Biography|Drama,USA,English,conspiracy|cryptography|mathematician|mental i...
1119,The Social Network,http://www.imdb.com/title/tt1285016/?ref_=fn_t...,7.7,2010.0,120.0,479453,Biography|Drama,USA,English,competitiveness|creator|entrepreneur|facebook|...
791,The Pursuit of Happyness,http://www.imdb.com/title/tt0454921/?ref_=fn_t...,8.0,2006.0,117.0,338383,Biography|Drama,USA,English,bus|intern|internship|salesman|stockbroker
3518,Dallas Buyers Club,http://www.imdb.com/title/tt0790636/?ref_=fn_t...,8.0,2013.0,117.0,326494,Biography|Drama,USA,English,drugs|fda|gay community|hiv|homophobia
848,Catch Me If You Can,http://www.imdb.com/title/tt0264464/?ref_=fn_t...,8.0,2002.0,141.0,525801,Biography|Crime|Drama,USA,English,attorney|cat and mouse|fbi|pan am|pilot
1789,Goodfellas,http://www.imdb.com/title/tt0099685/?ref_=fn_t...,8.7,1990.0,146.0,728685,Biography|Crime|Drama,USA,English,betrayal|gangster|mafia|organized crime|robbery



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
461,A Beautiful Mind,http://www.imdb.com/title/tt0268978/?ref_=fn_t...,8.2,2001.0,135.0,610568,Biography|Drama,USA,English,conspiracy|cryptography|mathematician|mental i...
2463,The Imitation Game,http://www.imdb.com/title/tt2084970/?ref_=fn_t...,8.1,2014.0,114.0,467613,Biography|Drama|Thriller|War,UK,English,crossword puzzle|cryptography|enigma code|gay ...
2127,21 Grams,http://www.imdb.com/title/tt0315733/?ref_=fn_t...,7.7,2003.0,124.0,189683,Drama,USA,English,accident|faith|love|mathematician|nonlinear ti...
4744,Pi,http://www.imdb.com/title/tt0138704/?ref_=fn_t...,7.5,1998.0,84.0,142619,Drama|Mystery|Thriller,USA,English,mathematician|nature|numbers|pattern|using a m...
2720,The Perks of Being a Wallflower,http://www.imdb.com/title/tt1659337/?ref_=fn_t...,8.0,2012.0,102.0,351274,Drama|Romance,USA,English,coming of age|depression|gay|high school|menta...
3327,We Need to Talk About Kevin,http://www.imdb.com/title/tt1242460/?ref_=fn_t...,7.5,2011.0,112.0,95529,Drama|Thriller,UK,English,archery|defiance|mental illness|psychopath|toi...



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
461,A Beautiful Mind,http://www.imdb.com/title/tt0268978/?ref_=fn_t...,8.2,2001.0,135.0,610568,Biography|Drama,USA,English,conspiracy|cryptography|mathematician|mental i...
848,Catch Me If You Can,http://www.imdb.com/title/tt0264464/?ref_=fn_t...,8.0,2002.0,141.0,525801,Biography|Crime|Drama,USA,English,attorney|cat and mouse|fbi|pan am|pilot
1119,The Social Network,http://www.imdb.com/title/tt1285016/?ref_=fn_t...,7.7,2010.0,120.0,479453,Biography|Drama,USA,English,competitiveness|creator|entrepreneur|facebook|...
524,Braveheart,http://www.imdb.com/title/tt0112573/?ref_=fn_t...,8.4,1995.0,178.0,736638,Biography|Drama|History|War,USA,English,14th century|legend|revolt|scotland|tyranny
791,The Pursuit of Happyness,http://www.imdb.com/title/tt0454921/?ref_=fn_t...,8.0,2006.0,117.0,338383,Biography|Drama,USA,English,bus|intern|internship|salesman|stockbroker
785,Captain Phillips,http://www.imdb.com/title/tt1535109/?ref_=fn_t...,7.9,2013.0,134.0,323353,Biography|Drama|Thriller,USA,English,hijacking|hostage|leader|ship|somalia



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
461,A Beautiful Mind,http://www.imdb.com/title/tt0268978/?ref_=fn_t...,8.2,2001.0,135.0,610568,Biography|Drama,USA,English,conspiracy|cryptography|mathematician|mental i...
1119,The Social Network,http://www.imdb.com/title/tt1285016/?ref_=fn_t...,7.7,2010.0,120.0,479453,Biography|Drama,USA,English,competitiveness|creator|entrepreneur|facebook|...
791,The Pursuit of Happyness,http://www.imdb.com/title/tt0454921/?ref_=fn_t...,8.0,2006.0,117.0,338383,Biography|Drama,USA,English,bus|intern|internship|salesman|stockbroker
3518,Dallas Buyers Club,http://www.imdb.com/title/tt0790636/?ref_=fn_t...,8.0,2013.0,117.0,326494,Biography|Drama,USA,English,drugs|fda|gay community|hiv|homophobia
259,American Gangster,http://www.imdb.com/title/tt0765429/?ref_=fn_t...,7.8,2007.0,176.0,324671,Biography|Crime|Drama,USA,English,death|heroin|popcorn|smuggling|vietnam
232,The Aviator,http://www.imdb.com/title/tt0338751/?ref_=fn_t...,7.5,2004.0,170.0,264318,Biography|Drama,USA,English,1920s|aviation|fight|spruce goose|test flight


#### Film n°10: Les temps modernes

In [218]:
compare(X, 4208)


Méthode n°1 - Dataset: Genres:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4208,Modern Times,http://www.imdb.com/title/tt0027977/?ref_=fn_t...,8.6,1936.0,87.0,143086,Comedy|Drama|Family,USA,English,actor director writer|invention|machine|music ...
4260,How Green Was My Valley,http://www.imdb.com/title/tt0033729/?ref_=fn_t...,7.8,1941.0,118.0,15840,Drama|Family,USA,English,coal mine|girl|school|village|wales
4185,Mr. Smith Goes to Washington,http://www.imdb.com/title/tt0031679/?ref_=fn_t...,8.2,1939.0,120.0,77392,Comedy|Drama,USA,English,camp|corruption|governor|senate|u.s. senate
1027,"Yours, Mine and Ours",http://www.imdb.com/title/tt0063829/?ref_=fn_t...,7.2,1968.0,111.0,5888,Comedy|Family,USA,English,nurse|parent|sibling|widow|widower
4275,The Blue Bird,http://www.imdb.com/title/tt0032264/?ref_=fn_t...,6.5,1940.0,83.0,1047,Drama|Family|Fantasy,USA,English,1800s|bird|blue bird of happiness|foreign lang...
3826,A Christmas Story,http://www.imdb.com/title/tt0085334/?ref_=fn_t...,8.1,1983.0,94.0,104908,Comedy|Family,USA,English,bb gun|boy|bully|christmas|tongue stuck on a pole



Méthode n°2:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4208,Modern Times,http://www.imdb.com/title/tt0027977/?ref_=fn_t...,8.6,1936.0,87.0,143086,Comedy|Drama|Family,USA,English,actor director writer|invention|machine|music ...
3631,The Texas Chainsaw Massacre 2,http://www.imdb.com/title/tt0092076/?ref_=fn_t...,5.5,1986.0,101.0,19234,Comedy|Horror,USA,English,chainsaw|chainsaw murder|human monster|music s...
4804,Primer,http://www.imdb.com/title/tt0390384/?ref_=fn_t...,7.0,2004.0,77.0,72639,Drama|Sci-Fi|Thriller,USA,English,changing the future|independent film|invention...
3336,Chairman of the Board,http://www.imdb.com/title/tt0118836/?ref_=fn_t...,2.3,1998.0,95.0,5143,Comedy,USA,English,box office flop|company|invention|roommate|surfer
1566,9,http://www.imdb.com/title/tt0472033/?ref_=fn_t...,7.1,2009.0,79.0,111117,Action|Adventure|Animation|Drama|Mystery|Sci-F...,USA,English,hope|key|machine|number|rag doll
82,Terminator 3: Rise of the Machines,http://www.imdb.com/title/tt0181852/?ref_=fn_t...,6.4,2003.0,109.0,305340,Action|Sci-Fi,USA,English,drifter|exploding truck|future|machine|skynet



Méthode n°3:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4208,Modern Times,http://www.imdb.com/title/tt0027977/?ref_=fn_t...,8.6,1936.0,87.0,143086,Comedy|Drama|Family,USA,English,actor director writer|invention|machine|music ...
3250,Driving Miss Daisy,http://www.imdb.com/title/tt0097239/?ref_=fn_t...,7.4,1989.0,99.0,72324,Comedy|Drama|Family,USA,English,1950s|african american|jewish|old age|widow
2776,The Muppet Christmas Carol,http://www.imdb.com/title/tt0104940/?ref_=fn_t...,7.7,1992.0,89.0,33850,Comedy|Drama|Family|Fantasy|Musical,USA,English,christmas|christmas eve|miser|muppet|scrooge
2386,My Girl,http://www.imdb.com/title/tt0102492/?ref_=fn_t...,6.8,1991.0,102.0,55895,Comedy|Drama|Family|Romance,USA,English,best friend|funeral|girl|overalls|summer
1754,Mrs. Doubtfire,http://www.imdb.com/title/tt0107614/?ref_=fn_t...,6.9,1993.0,125.0,181380,Comedy|Drama|Family|Romance,USA,English,actor|based on novel|fancy restaurant|male act...
2453,3 Men and a Baby,http://www.imdb.com/title/tt0094137/?ref_=fn_t...,5.9,1987.0,102.0,36918,Comedy|Drama|Family,USA,English,1980s|baby|heroin|package|questioned by police



Méthode n°4:


Unnamed: 0,movie_title,movie_imdb_link,imdb_score,title_year,duration,num_voted_users,genres,country,language,plot_keywords
4208,Modern Times,http://www.imdb.com/title/tt0027977/?ref_=fn_t...,8.6,1936.0,87.0,143086,Comedy|Drama|Family,USA,English,actor director writer|invention|machine|music ...
4260,How Green Was My Valley,http://www.imdb.com/title/tt0033729/?ref_=fn_t...,7.8,1941.0,118.0,15840,Drama|Family,USA,English,coal mine|girl|school|village|wales
3250,Driving Miss Daisy,http://www.imdb.com/title/tt0097239/?ref_=fn_t...,7.4,1989.0,99.0,72324,Comedy|Drama|Family,USA,English,1950s|african american|jewish|old age|widow
4242,Trees Lounge,http://www.imdb.com/title/tt0117958/?ref_=fn_t...,7.2,1996.0,95.0,11369,Comedy|Drama,USA,English,alcoholic drink|bar|drink|drinking|drunk
2634,Crooklyn,http://www.imdb.com/title/tt0109504/?ref_=fn_t...,6.9,1994.0,115.0,6011,Comedy|Drama,USA,English,argument|birthday|bully|coming of age|semi aut...
2453,3 Men and a Baby,http://www.imdb.com/title/tt0094137/?ref_=fn_t...,5.9,1987.0,102.0,36918,Comedy|Drama|Family,USA,English,1980s|baby|heroin|package|questioned by police


#### Classement (des meilleures recommendations aux moins bonnes) par film:
Film 1: 4 > 1 > 3 > 2
<br>
Film 2: 2 > 4 > 1 > 3
<br>
Film 3: 1 = 3 = 4 > 2 
<br>
Film 4: 3 > 4 > 1 > 2
<br>
Film 5: 4 > 1 > 3 > 2
<br>
Film 6: 3 > 4 > 1 > 2
<br>
Film 7: 3 > 4 > 1 > 2
<br>
Film 8: 3 > 4 > 1 > 2
<br>
Film 9: 1 > 4 > 3 > 2
<br>
Film 10: 4 > 1 > 3 > 2

### Conclusion: La méthode 2 peut ne donner aucun résultat satisfaisant sur certains films. Les méthodes 1, 3 et 4 quant à elles, donnent toujours au moins 1 suggestion valable. Parmi ces 3 là, la méthode 4 est celle qui est la plus consistante dans les tests effectués. C'est celle qui sera implémentée dans l'API.