In [35]:
import pandas as pd
from IPython.display import display
import numpy as np

In [36]:
pd.set_option("display.max_rows", None)

In [37]:
df = None

In [38]:

minVotes = None
def readFile(path):
    global df
    df = pd.read_csv(path)
    
def cleanDataset():
    global df
    df = df.drop(['year'], axis=1)
    df = df.drop(['certificate'], axis=1)
    df = df.drop(['duration'], axis=1)
    df = df.drop(['description'], axis=1)
    df = df.drop(['stars'], axis=1)
    df = df.drop_duplicates(subset=['title'])
    
    df['votes']= df['votes'].str.replace(',','').astype(float)
    df = df.fillna(0)
    

def filterGenre(genre):
    global df
    df = df[df['genre'].str.contains(genre, case=False).fillna(False)]

    

def threshVotes(thrs):
    global df
    global minVotes 
    minVotes  = np.quantile(df['votes'] , thrs)
    df =df.drop(df[df.votes<minVotes].index)

    
def weightedAvgScore():
    global df
    weightedAvg= []
    mean = df['rating'].mean()
    df = df.reset_index(drop=True)
    
    for i in range(0 , len(df['rating'])):
        res = ( df['votes'][i]/(df['votes'][i]+ minVotes)*df['rating'][i]) + (minVotes/(df['votes'][i]+minVotes))*mean
        weightedAvg.append(res)
    df["weightedAvg"]= weightedAvg
    
def sortNscores(n):
    sort = df.sort_values('weightedAvg', ascending=False).head(n)
    display(sort)
     


def runRecommenderSystem(genre, votesThrs, numberOfReturnedData):
    
    readFile("C:\\Users\\ASUS\\Desktop\\Python\\Project-Netflix Recommender Systems Introduction\\NetflixDatasetMovies.csv")
    cleanDataset()
    filterGenre(genre)
    threshVotes(votesThrs)
    weightedAvgScore()
    sortNscores(numberOfReturnedData)
    

In [39]:
runRecommenderSystem("Drama", 0.8, 20)

Unnamed: 0,title,genre,rating,votes,weightedAvg
4,Breaking Bad,"Crime, Drama, Thriller",9.5,1831340.0,9.475954
53,Sherlock,"Crime, Drama, Mystery",9.1,913816.0,9.060775
33,The Lord of the Rings: The Return of the King,"Action, Adventure, Drama",9.0,1819157.0,8.98116
167,Death Note,"Animation, Crime, Drama",9.0,316300.0,8.896898
2,Better Call Saul,"Crime, Drama",8.9,501384.0,8.837303
36,Fargo,"Crime, Drama, Thriller",8.9,369918.0,8.816149
9,The Lord of the Rings: The Fellowship of the Ring,"Action, Adventure, Drama",8.8,1844055.0,8.783529
41,The Lord of the Rings: The Two Towers,"Action, Adventure, Drama",8.8,1642708.0,8.781534
498,Leyla and Mecnun,"Adventure, Comedy, Drama",9.1,93632.0,8.77698
23,Black Mirror,"Drama, Mystery, Sci-Fi",8.8,535782.0,8.744737


In [40]:
class netflixTopRecommenderSystem:
    def __init__(self, datasetPath, genre, votesThrs, topN ):
    
        self._datasetPath = datasetPath
        self._genre = genre
        self._votesThrs = votesThrs
        self._topN = topN
        self.df = None
        self._minVotes = None
        
        self.colDrop = ['year', 'certificate', 'duration', 'description', 'stars']
        self.colTitle= {"votes": "votes", "genre": "genre", "title":"title", "rating": "rating"}
        
 

    def readFile(self):   
        self.df = pd.read_csv(self._datasetPath)
        

    def cleanDataset(self):
         
        for item in self.colDrop:
            self.df = self.df.drop([item], axis=1)
            
 
        
        self.df = self.df.drop_duplicates(subset=[self.colTitle["title"]])
        
        if type(self.df["votes"][0]) == str:
            self.df[self.colTitle["votes"]]= self.df[self.colTitle["votes"]].str.replace(',','').astype(float)
            
        self.df = self.df.fillna(0)
        
    def filterGenre(self):
       
        self.df = self.df[self.df[self.colTitle["genre"]].str.contains(self._genre, case=False).fillna(False)]

    def threshVotes(self):

        self._minVotes  = np.quantile(self.df[self.colTitle["votes"]] , self._votesThrs)
        self.df =self.df.drop(self.df [ self.df.votes < self._minVotes].index)
        
    def weightedAvgScore(self):
 
        weightedAvg= []
        mean = self.df[self.colTitle["rating"]].mean()
        self.df = self.df.reset_index(drop=True)

        for i in range(0 , len(self.df[self.colTitle["rating"]])):
            res = ( self.df[self.colTitle["votes"]][i]/(self.df[self.colTitle["votes"]][i]+ self._minVotes)* self.df[self.colTitle["rating"]][i]) + (self._minVotes/(self.df[self.colTitle["rating"]][i]+self._minVotes))*mean
            weightedAvg.append(res)
        self.df["weightedAvg"]= weightedAvg
        
    def sortNscores(self):
        sort = self.df.sort_values('weightedAvg', ascending=False).head(self._topN)
        display(sort)
    
    def run(self):
        self.readFile()
        self.cleanDataset()
        self.filterGenre()
        self.threshVotes()
        self.weightedAvgScore()
        self.sortNscores()
        

In [34]:
path = "C:\\Users\\ASUS\\Desktop\\Python\\Project-Netflix Recommender Systems Introduction\\NetflixDatasetMovies.csv"
best5Comedy = netflixTopRecommenderSystem(path, "Comedy", 0.8, 20)
best5Comedy.colDrop = ['year', 'certificate', 'duration', 'description', 'stars']
best5Comedy.colTitle= {"votes": "votes", "genre": "genre", "title":"title", "rating": "rating"}
best5Comedy.run()


Unnamed: 0,title,genre,rating,votes,weightedAvg
1,Rick and Morty,"Animation, Adventure, Comedy",9.2,502160.0,15.936105
3,Friends,"Comedy, Romance",8.9,979424.0,15.71961
8,Seinfeld,Comedy,8.9,314089.0,15.54797
27,South Park,"Animation, Comedy",8.7,366394.0,15.388565
29,Arrested Development,Comedy,8.7,302834.0,15.344859
5,Modern Family,"Comedy, Drama, Romance",8.5,423963.0,15.221511
7,Suits,"Comedy, Drama",8.5,405863.0,15.213583
25,BoJack Horseman,"Animation, Comedy, Drama",8.8,152649.0,15.199459
275,Leyla and Mecnun,"Adventure, Comedy, Drama",9.1,93632.0,15.183357
4,Shameless,"Comedy, Drama",8.6,239541.0,15.182946
