In [1]:
import pandas as pd
import numpy as np

In [2]:
netflix = pd.read_csv('netflix_titles.csv')
netflix.count()

show_id         7787
type            7787
title           7787
director        5398
cast            7069
country         7280
date_added      7777
release_year    7787
rating          7780
duration        7787
listed_in       7787
description     7787
dtype: int64

In [3]:
netflix_filledna = netflix.fillna("")
netflix_filledna.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...


### Select features for similarity comparison

In [60]:
model_data = netflix_filledna[['title','director','cast','country','listed_in','description']]
features = ['director','listed_in','description']
model_feature = netflix_filledna[features]

### Clean data - convert texts into lowercase and fill all spaces

In [61]:
for f in features:
    model_feature[f] = model_feature[f].apply(lambda x: str.lower(x.replace(" ","")))

### Create a bag of words for all rows

In [62]:
def create_soup(x):
    # return x['director']+' '+x['cast']+' '+x['country']+' '+x['listed_in']+' '+x['description']
    return x['director']+' '+x['listed_in']+' '+x['description']

model_feature['soup'] = model_feature.apply(create_soup, axis=1)

In [14]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [63]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(model_feature['soup'])


In [64]:
# print(count.get_feature_names())
print(count_matrix.toarray())

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [65]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [66]:
indices = pd.Series(model_data.index, index=model_data['title'])

In [67]:
def get_recommendations(title, cosine_sim):
    # title = title.replace(' ','').lower()
    idx = indices[title]

    # get the pairwise similarity scores between all movies and this movie
    sim_scores = list(enumerate(cosine_sim[idx])) # returns a tuplist with each element as (index, sim scores)

    # sort movies based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # get the scores of top 10 similar movies
    sim_scores_top = sim_scores[1:11]

    # get these movies index
    movie_indices = [i[0] for i in sim_scores_top]

    # return the top 10 most similar movies
    return netflix.iloc[movie_indices]


In [68]:
netflix[netflix['title']=='Platoon']

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
4879,s4880,Movie,Platoon,Oliver Stone,"Tom Berenger, Willem Dafoe, Charlie Sheen, For...","United States, United Kingdom","November 1, 2020",1986,R,120 min,"Action & Adventure, Classic Movies, Dramas",A soldier grows disillusioned as he's caught b...


In [70]:
get_recommendations('Platoon',cosine_sim)

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
153,s154,Movie,A Bridge Too Far,Richard Attenborough,"Dirk Bogarde, James Caan, Michael Caine, Sean ...","United States, United Kingdom","July 1, 2020",1977,PG,176 min,"Action & Adventure, Classic Movies, Dramas",This wartime drama details a pivotal day in 19...
5351,s5352,Movie,Saladin,Youssef Chahine,"Ahmad Mazhar, Salah Zo El Faqqar, Nadia Lotfi,...",Egypt,"June 18, 2020",1963,TV-MA,194 min,"Action & Adventure, Classic Movies, Dramas",The Sultan of Egypt and Syria launches a campa...
5907,s5908,Movie,Superfly,Gordon Parks,"Ron O'Neal, Carl Lee, Sheila Frazier, Julius H...",United States,"November 1, 2019",1972,R,91 min,"Action & Adventure, Classic Movies, Dramas","To get away from the thug life, cocaine dealer..."
7667,s7668,Movie,World Trade Center,Oliver Stone,"Nicolas Cage, Michael Peña, Maggie Gyllenhaal,...",United States,"November 20, 2019",2006,PG-13,129 min,"Action & Adventure, Dramas","Working under treacherous conditions, an army ..."
2274,s2275,Movie,Free State of Jones,Gary Ross,"Matthew McConaughey, Gugu Mbatha-Raw, Mahersha...",United States,"October 1, 2020",2016,R,139 min,"Action & Adventure, Dramas",A Mississippi farmer turns outlaw as he leads ...
2969,s2970,Movie,In Search of Fellini,Taron Lexton,"Ksenia Solo, Mary Lynn Rajskub, Maria Bello, E...",United States,"March 19, 2018",2017,R,103 min,"Action & Adventure, Dramas",A naive and sheltered young woman embarks on a...
1041,s1042,Movie,Bonnie and Clyde,Arthur Penn,"Warren Beatty, Faye Dunaway, Michael J. Pollar...",United States,"January 1, 2021",1967,R,111 min,"Action & Adventure, Classic Movies, Dramas","Bonnie Parker and Clyde Barrow are young, in l..."
5244,s5245,Movie,Rocky,John G. Avildsen,"Sylvester Stallone, Talia Shire, Burt Young, C...",United States,"August 1, 2019",1976,PG,120 min,"Action & Adventure, Classic Movies, Dramas",Sylvester Stallone shot to fame as Rocky Balbo...
127,s128,Movie,6 Underground,Michael Bay,"Ryan Reynolds, Mélanie Laurent, Corey Hawkins,...",United States,"December 13, 2019",2019,R,129 min,"Action & Adventure, Dramas","After faking his death, a tech billionaire rec..."
280,s281,Movie,Abdo Mota,,Mohamed Ramadan,Egypt,"April 19, 2019",2012,TV-MA,100 min,"Action & Adventure, Dramas, International Movies","Following the mysterious death of his parents,..."


In [58]:
netflix[netflix['director']=='Quentin Tarantino']

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
1787,s1788,Movie,Django Unchained,Quentin Tarantino,"Jamie Foxx, Christoph Waltz, Leonardo DiCaprio...",United States,"April 25, 2020",2012,R,165 min,"Action & Adventure, Dramas","Accompanied by a German bounty hunter, a freed..."
2997,s2998,Movie,Inglourious Basterds,Quentin Tarantino,"Brad Pitt, Mélanie Laurent, Christoph Waltz, E...","Germany, United States","July 22, 2019",2009,R,153 min,Action & Adventure,A Jewish cinema owner in occupied Paris is for...
3103,s3104,Movie,Jackie Brown,Quentin Tarantino,"Pam Grier, Samuel L. Jackson, Robert Forster, ...",United States,"August 1, 2019",1997,R,154 min,"Dramas, Thrillers",When an aging flight attendant's caught smuggl...
3375,s3376,Movie,Kill Bill: Vol. 1,Quentin Tarantino,"Uma Thurman, Lucy Liu, Vivica A. Fox, Daryl Ha...","United States, Japan","January 1, 2020",2003,R,111 min,Action & Adventure,"An assassin is shot by her ruthless employer, ..."
3376,s3377,Movie,Kill Bill: Vol. 2,Quentin Tarantino,"Uma Thurman, David Carradine, Michael Madsen, ...",United States,"January 1, 2020",2004,R,137 min,Action & Adventure,The Bride has three left on her rampage list: ...
5002,s5003,Movie,Pulp Fiction,Quentin Tarantino,"John Travolta, Samuel L. Jackson, Uma Thurman,...",United States,"January 1, 2019",1994,R,154 min,"Classic Movies, Cult Movies, Dramas",This stylized crime caper weaves together stor...
6434,s6435,Movie,The Hateful Eight,Quentin Tarantino,"Samuel L. Jackson, Kurt Russell, Jennifer Jaso...",United States,"October 25, 2017",2015,R,168 min,"Action & Adventure, Thrillers","Years after the Civil War, a bounty hunter and..."
6435,s6436,TV Show,The Hateful Eight: Extended Version,Quentin Tarantino,"Samuel L. Jackson, Kurt Russell, Jennifer Jaso...",,"April 25, 2019",2015,R,1 Season,TV Shows,Trapped at a stagecoach stop as a storm rages ...


In [57]:
netflix.director.value_counts()

Raúl Campos, Jan Suter           18
Marcus Raboy                     16
Jay Karas                        14
Cathy Garcia-Molina              13
Martin Scorsese                  12
Jay Chapman                      12
Youssef Chahine                  12
Steven Spielberg                 10
David Dhawan                      9
Johnnie To                        8
Quentin Tarantino                 8
Shannon Hartman                   8
Robert Rodriguez                  8
Kunle Afolayan                    8
Hakan Algül                       8
Ryan Polito                       8
Lance Bangs                       8
McG                               7
Omoni Oboli                       7
Fernando Ayllón                   7
Don Michael Paul                  7
Yılmaz Erdoğan                    7
S.S. Rajamouli                    7
Justin G. Dyck                    7
Ozan Açıktan                      7
Troy Miller                       6
Steven Soderbergh                 6
Ron Howard                  