In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
%matplotlib inline

In [30]:
df=pd.read_csv("netflix_titles.csv")

In [31]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [32]:
df['director'] = df['director'].fillna('NoDataAvailable')
df['country'] = df['country'].fillna(df['country'].mode()[0])
df['cast'] = df['cast'].fillna('NoDataAvailable')
df['date_added'] = df['date_added'].fillna(df['date_added'].mode()[0])
df['rating'] = df['rating'].fillna(df['rating'].mode()[0])

In [33]:
df["year"]=pd.DatetimeIndex(df["date_added"]).year
df["month"]=pd.DatetimeIndex(df["date_added"]).month
df.drop(columns=['date_added'], inplace=True)
df

Unnamed: 0,show_id,type,title,director,cast,country,release_year,rating,duration,listed_in,description,year,month
0,s1,TV Show,3%,NoDataAvailable,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020,8
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016,12
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018,12
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017,11
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...,2020,10
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...,2019,3
7784,s7785,Movie,Zulu Man in Japan,NoDataAvailable,Nasty C,United States,2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast...",2020,9
7785,s7786,TV Show,Zumbo's Just Desserts,NoDataAvailable,"Adriano Zumbo, Rachel Khoo",Australia,2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...,2020,10


In [34]:
ratings_ages = {
    'TV-PG': 'Older Kids',
    'TV-MA': 'Adults',
    'TV-Y7-FV': 'Older Kids',
    'TV-Y7': 'Older Kids',
    'TV-14': 'Teens',
    'R': 'Adults',
    'TV-Y': 'Kids',
    'NR': 'Adults',
    'PG-13': 'Teens',
    'TV-G': 'Kids',
    'PG': 'Older Kids',
    'G': 'Kids',
    'UR': 'Adults',
    'NC-17': 'Adults'
}
df["ratings_ages"]=df["rating"].replace(ratings_ages)

In [35]:
df.drop(columns=['rating'])

Unnamed: 0,show_id,type,title,director,cast,country,release_year,duration,listed_in,description,year,month,ratings_ages
0,s1,TV Show,3%,NoDataAvailable,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020,8,Adults
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016,12,Adults
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2011,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018,12,Adults
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2009,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017,11,Teens
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2008,123 min,Dramas,A brilliant group of students become card-coun...,2020,1,Teens
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...",2005,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...,2020,10,Adults
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,2015,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...,2019,3,Teens
7784,s7785,Movie,Zulu Man in Japan,NoDataAvailable,Nasty C,United States,2019,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast...",2020,9,Adults
7785,s7786,TV Show,Zumbo's Just Desserts,NoDataAvailable,"Adriano Zumbo, Rachel Khoo",Australia,2019,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...,2020,10,Older Kids


In [61]:
features = ['type', 'director', 'country', 'ratings_ages', 'country']

In [76]:
def combine_features(row):
	try:
		return row['type'] +" "+row['director']+" "+row["country"]+" "+row["ratings_ages"]+" "+row["country"]
	except:
		print( "Error:", row)

df["combined_features"] = df.apply(combine_features,axis=1)
df['index']= df.index
df

Unnamed: 0,show_id,type,title,director,cast,country,release_year,rating,duration,listed_in,description,year,month,ratings_ages,combined_features,index
0,s1,TV Show,3%,NoDataAvailable,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,2020,8,Adults,TV Show NoDataAvailable Brazil Adults Brazil,0
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,2016,12,Adults,Movie Jorge Michel Grau Mexico Adults Mexico,1
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",2018,12,Adults,Movie Gilbert Chan Singapore Adults Singapore,2
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi...",2017,11,Teens,Movie Shane Acker United States Teens United S...,3
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...,2020,1,Teens,Movie Robert Luketic United States Teens Unite...,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Movie,Zozo,Josef Fares,"Imad Creidi, Antoinette Turk, Elias Gergi, Car...","Sweden, Czech Republic, United Kingdom, Denmar...",2005,TV-MA,99 min,"Dramas, International Movies",When Lebanon's Civil War deprives Zozo of his ...,2020,10,Adults,"Movie Josef Fares Sweden, Czech Republic, Unit...",7782
7783,s7784,Movie,Zubaan,Mozez Singh,"Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanan...",India,2015,TV-14,111 min,"Dramas, International Movies, Music & Musicals",A scrappy but poor boy worms his way into a ty...,2019,3,Teens,Movie Mozez Singh India Teens India,7783
7784,s7785,Movie,Zulu Man in Japan,NoDataAvailable,Nasty C,United States,2019,TV-MA,44 min,"Documentaries, International Movies, Music & M...","In this documentary, South African rapper Nast...",2020,9,Adults,Movie NoDataAvailable United States Adults Uni...,7784
7785,s7786,TV Show,Zumbo's Just Desserts,NoDataAvailable,"Adriano Zumbo, Rachel Khoo",Australia,2019,TV-PG,1 Season,"International TV Shows, Reality TV",Dessert wizard Adriano Zumbo looks for the nex...,2020,10,Older Kids,TV Show NoDataAvailable Australia Older Kids A...,7785


In [70]:
cv = CountVectorizer()

count_matrix = cv.fit_transform(df["combined_features"])

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [88]:
def get_index_from_title(title):
	return df[df.title == title]["index"].values[0]

def get_title_from_index(index):
	return df[df.index == index]["title"].values[0]

In [102]:
cosine_sim = cosine_similarity(count_matrix) 
movie_user_likes = "7:19"
movie_index = get_index_from_title(movie_user_likes)
similar_movies =  list(enumerate(cosine_sim[movie_index]))
sorted_similar_movies = sorted(similar_movies,key=lambda x:x[1],reverse=True)

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
i=0
for element in sorted_similar_movies:
		print(get_title_from_index(element[0]))
		i=i+1
		if i>50:
			break

In [110]:
title_df = df[['title', 'index']]
title_df.to_csv('title.csv', index = None)

In [10]:
titles_db = pd.read_csv('title.csv')
for title  in titles_db.title:
    print(f'"{title}",')

"3%",
"7:19",
"23:59",
"9",
"21",
"46",
"122",
"187",
"706",
"1920",
"1922",
"1983",
"1994",
"2,215",
"3022",
"Oct-01",
"Feb-09",
"22-Jul",
"15-Aug",
"'89",
"​​Kuch Bheege Alfaaz",
"​Goli Soda 2",
"​Maj Rati ​​Keteki",
"​Mayurakshi",
"​SAINT SEIYA: Knights of the Zodiac",
"(T)ERROR",
"(Un)Well",
"#Alive",
"#AnneFrank - Parallel Stories",
"#blackAF",
"#cats_the_mewvie",
"#FriendButMarried",
"#FriendButMarried 2",
"#realityhigh",
"#Roxy",
"#Rucker50",
"#Selfie",
"#Selfie 69",
"แผนร้ายนายเจ้าเล่ห์",
"¡Ay, mi madre!",
"Çarsi Pazar",
"Ég man þig",
"Çok Filim Hareketler Bunlar",
"Òlòtūré",
"Æon Flux",
"Şubat",
"1 Chance 2 Dance",
"1 Mile to You",
"10 Days in Sun City",
"10 jours en or",
"10,000 B.C.",
"100 Days My Prince",
"100 Days Of Solitude",
"100 Humans",
"100 Meters",
"100 Things to do Before High School",
"100 Years: One Woman's Fight for Justice",
"100% Halal",
"100% Hotter",
"1000 Rupee Note",
"12 ROUND GUN",
"12 Years Promise",
"13 Cameras",
"13 Reasons Why",
"13 Reasons Why: Beyon

"Destiny",
"Detention",
"Detention Letter",
"Detour",
"Deuces",
"Dev.D",
"Deviant Love",
"Devil's Bride",
"Devil's Gate",
"Devilman Crybaby",
"Devlok with Devdutt Pattanaik",
"Devrai",
"Dexter",
"Dhamaal",
"Dhan Dhana Dhan Goal",
"Dhanak",
"Dharam Sankat Mein",
"Dharmakshetra",
"Dhh",
"Dhia Sofea",
"Dhobi Ghat (Mumbai Diaries)",
"Dhoondte Reh Jaoge",
"Di Renjie zhi Sidatianwang",
"Diablero",
"Diagnosis",
"Diamond City",
"Diamond Lover",
"Diamonds in the Sky",
"Diana: 7 Days That Shook the World",
"Diana: In Her Own Words",
"Diary of a Chambermaid",
"Dick Johnson Is Dead",
"Die Another Day",
"Die Ontwaking",
"Dieter Nuhr: Nuhr in Berlin",
"Digs & Discoveries: All Track's Lead to Rome",
"Digs & Discoveries: Mines of Mystery",
"Dil",
"Dil Chahta Hai",
"Dil Dhadakne Do",
"Dil Hai Tumhaara",
"Dil Se",
"Dil Vil Pyaar Vyaar",
"Dilan 1990",
"Dilan 1991",
"Dilwale",
"Dinner for Five",
"Dinner for Schmucks",
"Dino Girl Gauko",
"Dino Hunt",
"Dinosaur King",
"Dinotrux",
"Dinotrux Supercharged",
"D

"La Niña",
"LA Originals",
"La Piloto",
"La Révolution",
"La Reina del Sur",
"La Robe De Mariee Des Cieux",
"La Rosa de Guadalupe",
"La Viuda Negra",
"Laal Rang",
"Laatu",
"Laddaland",
"Ladies First",
"Ladies Up",
"Lady Bird",
"Lady Bloodfight",
"Lady Driver",
"Lady Dynamite",
"Lady in the Water",
"Lady J",
"Lady-Like",
"Lady, la vendedora de rosas",
"Laerte-se",
"Lagaan",
"Lagos Real Fake Life",
"Lakeeran",
"Lakshya",
"Lal Patthar",
"Lalbaug Parel: Zali Mumbai Sonyachi",
"Land Girls",
"Lang Tong",
"Lara and the Beat",
"Larceny",
"Larry Charles' Dangerous World of Comedy",
"Larry the Cable Guy: Remain Seated",
"Larva",
"Larva Island",
"Las muñecas de la mafia",
"Last",
"Last Breath",
"Last Chance U",
"Last Ferry",
"Last Flight to Abuja",
"LAST HOPE",
"Last Knights",
"Last Night",
"Last Tango in Halifax",
"Late Life: The Chien-Ming Wang Story",
"Latte and the Magic Waterstone",
"Lavell Crawford: Can a Brother Get Some Love?",
"Lavender",
"Lawless",
"Laws of Attraction",
"Layer Cake",
"L

"Sex Education",
"Sex, Explained",
"Sexo, Pudor y Lagrimas",
"Sextuplets",
"Sexy Central",
"SGT. Will Gardner",
"Shéhérazade",
"Shabd",
"Shadow",
"Shadow of Truth",
"Shahanpan Dega Deva",
"Shaitan",
"Shaka Zulu",
"Shakti: The Power",
"Shameless (U.S.)",
"Shanghai",
"Shanghai Fortress",
"Shararat",
"Shark Busters",
"Shark Night",
"Shattered Memories",
"Shaun the Sheep",
"Shaun the Sheep: Adventures from Mossy Bottom",
"Shaun the Sheep: The Farmer’s Llamas",
"Shaurya: It Takes Courage to Make Right... Right",
"Shawn Mendes: In Wonder",
"Shawn Mendes: Live in Concert",
"She",
"She Did That",
"She Made Me a Criminal",
"She-Ra and the Princesses of Power",
"She's Dating the Gangster",
"She's Gotta Have It",
"She's Out of My League",
"She's the One",
"Shelby American",
"Shelter",
"Shepherds and Butchers",
"Sherlock",
"Sherlock Holmes",
"Shikari",
"Shimla Mirchi",
"Shimmer Lake",
"Shine On with Reese",
"Shine Your Eyes",
"Shirkers",
"Shiva",
"Shonar Pahar",
"Shooter",
"Shootout at Lokhandwala

"Trailer Park Boys: The Animated Series",
"Trailer Park Boys: The Movie",
"Trailer Park Boys: Xmas Special",
"Train of the Dead",
"Train to Busan",
"Trainspotting",
"Traitor",
"Traitors",
"Tramps",
"Transcendence",
"Transfers",
"Transformer",
"Transformers Prime",
"Transformers Rescue Bots Academy",
"Transformers: Cyberverse",
"Transformers: Rescue Bots",
"Transformers: Robots in Disguise",
"Transformers: War For Cybertron Trilogy",
"Transformers: War for Cybertron: Earthrise",
"Trash",
"Trash Fire",
"Trash Truck",
"Travel Mates 2",
"Travelers",
"Travis Scott: Look Mom I Can Fly",
"Tread",
"Treasures from the Wreck of the Unbelievable",
"Tree House Tales",
"Tree Man",
"Treehouse Detectives",
"Tremors",
"Tremors 2: Aftershocks",
"Tremors 3: Back to Perfection",
"Tremors 4: The Legend Begins",
"Tremors 5:  Bloodline",
"Tremors 6: A Cold Day in Hell",
"Tremors: Shrieker Island",
"Trespass Against Us",
"Trevor Noah: Afraid of the Dark",
"Trevor Noah: Son of Patricia",
"Triad Princess",
"Tr