In [None]:
#these modules will be used in this project
!pip install scikit-surprise
!pip3 install fuzzywuzzy

You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.[0m


In [None]:
#importing the important libraries
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
#we are using the fuzzywuzzy to search for movie name in dataframe
from fuzzywuzzy import process
from sklearn.metrics.pairwise import cosine_similarity

from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer

''' Surprise is a python scikit for collaborative filtering recommendation systems. 
It contains various buildin algorithms that we can use to train,predict and evaluate the performance of recommendation systems
the KNNBasic algorithm is derived from nearest neighbors which we are going to use in our project to make recommendations
'''
from surprise import Dataset,Reader
from surprise import KNNBasic
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise import KNNBasic

In [None]:
#Reading the csv file
df= pd.read_csv("data.txt")

#dataframe processing

#dropping nan values
df = df.dropna()   

#dropping index 0
df.pop(df.columns[0])




In [None]:
#cleaned_df will contain the columns necassary to train the KNN model
cleaned_df = pd.DataFrame(my_array, columns = ['user_id','program_desc','rating'])
cleaned_df.reset_index(drop=True, inplace=True)


In [None]:
#This function is used to make recommendations based on content of the data, this function will be executes if knn recommendation returns error or empty list
def content_based_filtering(movie_name,df):
    #preprocessing import columns
    #try:
        #processing dataframe
    
        # Delete duplicate rows based on specific columns 
    df = df.drop_duplicates(subset=["overview"], keep=False)
        
        #reseting index
    df.reset_index(drop=True, inplace=True)
    df['index'] = df.reset_index().index
        
        #selecting important_features
    selected_features=['program_class','overview','program_genre','original_name']
    for feature in selected_features:
        df[feature]=df[feature].fillna('')

                #combining all features
    combined_features= df['program_genre']+' '+df['overview']+' '+df['program_class']+' '+df['original_name']

                #converting the combined features into numeric values
    vectorizer=TfidfVectorizer()
    feature_vectors=vectorizer.fit_transform(combined_features)

                #Cosine Similarity
    similarity=cosine_similarity(feature_vectors)

    index_of_the_movie=process.extractOne(movie_name,df['overview'])[2]

        #getting list of similar movies
    similarity_score=list(enumerate(similarity[index_of_the_movie]))

    sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)

    print("Recommended movies for {}".format(movie_name))
    i=0
    for movie in sorted_similar_movies:
        index=movie[0]
        title_from_index=df[df.index==index]['overview'].values[0]
        if i<6:
            print(i,' ',title_from_index)
            i+=1
        else:
            break
    #except:
     #   print('Movie not Found')

In [None]:
'''Steps for training the KNNBasic algorithm '''
#To read data for modeling, we will follow several steps

#Initializing a Reader scale so that surprise can use it.In rating_scale, we set a range so that model doesn't predict a value above or below this range
reader=Reader(rating_scale=(1,5))

#the load_from_df allows to load the data through dataframe taking in account reader as well
data=Dataset.load_from_df(cleaned_df[['user_id','program_desc','rating']],reader)

#Splitting data into training and testing datasets
trainset,testset=train_test_split(data,test_size=0.30)

In [None]:
def evaluate_performance(knn,testset):
    #making predictions on testing dataset to measure performance
    predictions=knn.test(testset)
    #evaluating the performance
    accuracy.rmse(predictions)
    accuracy.mae(predictions)

In [None]:
#The KNNBasic algorithm takes a dictionary as a parameter in which we define name of similarity to use and option to compute knn based on user or item 
sim_options = {'name': 'cosine',
                   'user_based': True,  # compute  similarities between users rating to make recommendations
                   'min_support':1}

 #initializing the estimater
knn = KNNBasic(k=5,sim_options=sim_options)

#training the model
knn.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7f164a1cb390>

In [None]:
def knn_recommendation(movie_name,n,knn,trainset):
    #the extractOne method will find the best match of movie name in dataframe and it will return its index
    query_index=process.extractOne(movie_name,cleaned_df['program_desc'])[2]

    #storing the index data into x dataframe
    x=cleaned_df.iloc[[query_index]]
    #To extract the user id from dataframe , we will convert to into list and store id in uid variable
    L=x.to_numpy().tolist()
    
    #user_id is on 0th index of the list
    uid=L[0][0]
    
    '''# When using Surprise, there are RAW and INNER IDs.Raw IDs are the IDs, strings or numbers, you use when creating the trainset. 
        The raw ID will be converted to an unique integer Surprise can more easily manipulate for computations.
        So in order to find an user inside the trainset, you need to convert their RAW ID to the INNER Id'''
    test_subject_iid = trainset.to_inner_uid(uid)
    #we will pass the iid to get_neighbors function to get neighbors of that particular user, this function will return the list of iid that are similar to that particular user
    iids_list=knn.get_neighbors(test_subject_iid,n)
    
    #after getting iid list, we will convert it back into raw ids to get the movie names from dataframe by searching that id
    raw_ids=[]
    for ids in iids_list:
        
        #converting iid back to raw_id to find movie names based on that user_id
        raw_ids.append(trainset.to_raw_uid(ids))
    #creating a empty list to append recommended movies
    Recommended_movies=[]
    #creating a temprory list to store movies watched by similar users and the rating, we will recommend movies based on the rating of that movie.
    movies_list=[]
   
    
    #iterating through each userid in raw_ids list
    for user_id in raw_ids:
        #retreiving all the movies watched by that user and storing it in dataframe X
        X=cleaned_df.loc[cleaned_df['user_id'] == user_id]
        
        #converting it into list to access elements easily
        List=X.to_numpy().tolist()
        
        #iterating through the list of movies used by particular id and storing the movie name and rating in nested list movie_list
        for i in range(0,len(List)):
            movies_list.append([List[i][1],List[i][2]])
       

    #after iterating through all the user_ids, we will sort the movie_list based on the rating in descending order
    movies_list.sort(key=lambda x: x[1],reverse=True)
    
    
    #we will append top recommended movies in list
    for i in range(0,len(movies_list)):
        #if movie name exists we will pass else append
        if movies_list[i][0] in Recommended_movies:
            pass
        else:
            Recommended_movies.append(movies_list[i][0])
    #printing the n top rated movies 
    print("Recommendations for {}".format(movie_name))
    for i in range(0,n):
        print(Recommended_movies[i])
        


    

In [None]:
def search_movies(movie_name,knn,testset,trainset,df,n):
    try:
        knn_recommendation(movie_name,n,knn,trainset)
    except:
        content_based_filtering(movie_name,df)      

In [None]:
movie_name=input("Search Movie: ")
search_movies(movie_name,knn,testset,trainset,df,10)

Recommendations for Horro
Inside (Horror-MOVIE)
The Man with the Iron Heart (Action-MOVIE)
Game of Aces (Action-MOVIE)
A Very British Gangster (Documentary-MOVIE)
Haqq Mayyet,Season 1,Episode 10 (Drama-SERIES/EPISODES)
Philomena (Drama-MOVIE)
Going in tyle (Comedy-MOVIE)
Spider-Man (Action-MOVIE)
Surf's Up : WaveMania (Animation-MOVIE)
Storks (Animation-MOVIE)


In [None]:
#evaluating model performance
evaluate_performance(knn,testset)

RMSE: 1.6745
MAE:  1.5473


In [None]:
content_based_filtering(movie_name,df)

Recommended movies for Horro
0   Transfiguration (Horror-MOVIE)
1   American Horror tory,Season 4,Episode 7 (Horror-SERIES/EPISODES)
2   American Horror tory,Season 6,Episode 1 (Horror-SERIES/EPISODES)
3   Havenhurst (Horror-MOVIE)
4   Cloverfield (Horror-MOVIE)
5   Quarantine (Horror-MOVIE)


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=b8ae239b-d5a4-407a-84d7-0b22a7159a1d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>