In [42]:
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objects as graphObjects
from plotly.offline import init_notebook_mode, iplot
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from rake_nltk import Rake

In [43]:
data=pd.read_csv("D:/Projects/Netflix Recommendation/inputs/netflix_titles.csv") #Read CSV 
data= data[['title','director','cast','listed_in','description']]  #Extract Needed data from the CSV
data.head() #Display new Dataset

Unnamed: 0,title,director,cast,listed_in,description
0,Norm of the North: King Sized Adventure,"Richard Finn, Tim Maltby","Alan Marriott, Andrew Toth, Brian Dobson, Cole...","Children & Family Movies, Comedies",Before planning an awesome wedding for his gra...
1,Jandino: Whatever it Takes,,Jandino Asporaat,Stand-Up Comedy,Jandino Asporaat riffs on the challenges of ra...
2,Transformers Prime,,"Peter Cullen, Sumalee Montano, Frank Welker, J...",Kids' TV,"With the help of three human allies, the Autob..."
3,Transformers: Robots in Disguise,,"Will Friedle, Darren Criss, Constance Zimmer, ...",Kids' TV,When a prison ship crash unleashes hundreds of...
4,#realityhigh,Fernando Lebrija,"Nesta Cooper, Kate Walsh, John Michael Higgins...",Comedies,When nerdy high schooler Dani finally attracts...


In [44]:
data.columns  #GEt Data Columns

Index(['title', 'director', 'cast', 'listed_in', 'description'], dtype='object')

In [45]:
data.dtypes

title          object
director       object
cast           object
listed_in      object
description    object
dtype: object

In [46]:
data.shape  #Get Number of Datas in the Data set

(6234, 5)

In [47]:
# Pre processing data - Fill Nul Director,Cast Rows
data['director']=data['director'].fillna(' ')
data['cast']=data['cast'].fillna(' ')

# Set direstor and cast data type to String
data['director']=data['director'].astype('str')
data['cast']=data['cast'].astype('str')

In [48]:
data['WordBulk']= ''  #Add new Column to data set

#Iterate threw the dataset
for index,row in data.iterrows():  
      
    category = ''.join(row['listed_in'].split(',')).lower()  #Get Category of the each  
    cast = ' '.join(row['cast'].replace(' ','').split(',')).lower()  #Get actors in each
    director = ''.join(row['director'].replace(' ','').split(',')).lower() #Get director of Each
    

    plot = row['description']  #add desription to a dataframe
    extracter = Rake()  
    extracter.extract_keywords_from_text(plot)  #extract key words in the descriptions of each
    words_in_description = extracter.get_word_degrees() #get those extracted key words
    
    description = ' '.join(list(words_in_description.keys())) #Key words in the description
    row['WordBulk'] = category + ' ' + ' ' + director + ' ' + cast + ' '+description  #And identified key words from each sections of each item(Movie or Tv Series)

recomendation_data = data[['title','WordBulk']] #Set title and word bulk to a new data frame
recomendation_data.head()  #Display data frame

Unnamed: 0,title,WordBulk
0,Norm of the North: King Sized Adventure,children & family movies comedies richardfinn...
1,Jandino: Whatever it Takes,stand-up comedy jandinoasporaat raising kids...
2,Transformers Prime,kids' tv petercullen sumaleemontano frankwel...
3,Transformers: Robots in Disguise,kids' tv willfriedle darrencriss constancezi...
4,#realityhigh,comedies fernandolebrija nestacooper katewals...


In [49]:
model = CountVectorizer()   #Count Vectorizer model
count_matrix = model.fit_transform(recomendation_data['WordBulk']) #Fit data to the model  
similarity_matrix = cosine_similarity(count_matrix,count_matrix)  #Train the model(This matrix shows one items similarity to others- Squred Metrix)
#Matrix=m(i,j) i=columns j=rows when i=j  m(i,j)=1

print(similarity_matrix) #Display similarity matrix

[[1.         0.         0.         ... 0.0942809  0.03086067 0.03390318]
 [0.         1.         0.04472136 ... 0.         0.         0.        ]
 [0.         0.04472136 1.         ... 0.         0.10141851 0.07427814]
 ...
 [0.0942809  0.         0.         ... 1.         0.         0.        ]
 [0.03086067 0.         0.10141851 ... 0.         1.         0.21971769]
 [0.03390318 0.         0.07427814 ... 0.         0.21971769 1.        ]]


In [50]:
titles = pd.Series(recomendation_data['title']) #Get titles list

#Function for view recomended items for given movie or TV series 
def recommendationEngine(name):
    recomend_list=[] #recomendation list 
    index = titles[titles == name].index[0] #get index of the given name 
    sort_index = pd.Series(similarity_matrix[index]).sort_values(ascending = False)  #get indexes of the simillar items for the given name from the matrix
    similarItems = sort_index.iloc[1:6]  #Get First 5 recomended items
    for i in similarItems.index:
        recomend_list.append(titles[i])   #Add recomended items to the List
    for movie in recomend_list:
        print(movie)      #Display recomeded list

In [51]:
recommendationEngine('The Runner')

The Rehearsal
A Billion Colour Story
Manto
Princess Cyd
Sunday's Illness


In [52]:
recommendationEngine('6 Underground')

Abdo Mota
Highway
Furie
Veerappan
Naan Sigappu Manithan


In [53]:
recommendationEngine('Monster Island')

Zipi & Zape y la Isla del Capitan
In This Corner of the World
Raising the Bar
Santa Girl
Chitty Chitty Bang Bang
