# Importing the Required Libraries

In [None]:
import pandas as pd					# For standard Operations
import numpy as np          # For standard Operations
from sklearn.feature_extraction.text import CountVectorizer               # Converting the words into Vectors for mathematical Formulation
from sklearn.metrics.pairwise import cosine_similarity                    # Similarity Algorithm

### Let us define some helper function, which we will be using in order to get the index from the title of the movie and getting the title of the movie from the index

In [None]:
###### Defining the Helper functions, so that we can use them when needed #######
def get_title_from_index(index):
	return df[df.index == index]["title"].values[0]
def get_index_from_title(title):
	return df[df.title == title]["index"].values[0]
##################################################

## The pathway we would be following for the project would be :
1. Reading the CSV File
2. Selecting the features due to which the recommendation of the movies should be done.
3. Creating a new DataFrame, which would contain only those columns which would contain our features
4. Using the CountVectorizer for the Mathematical Stuffs
5. Applying the Cosine SImilarity Algorithm for the Similarity part.
6. Getting the Index of the movie which the user has recently liked
7. Recommending them top 10 similar movies

In [None]:
##Step 1: Read CSV File
df=pd.read_csv('Your File path')
print(df.columns)

In [None]:
##Step 2: Select Features
features=["keywords","cast",'genres','director']

In [None]:
#Step 3: Create a column in DF which combines all selected features
for i in features:
    df[i].fillna('',inplace=True)
def combine_features(row):
    try:
        return row['keywords']+' '+row['cast']+' '+row['genres']+' '+row['director']
    except:
        print("Error:",row)
df['combined_features']=df.apply(combine_features,axis=1)
print(df['combined_features'].head())

### In order to know more about the CountVectorizer, you can refer to the sklearn document <a href = 'https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.CountVectorizer.html'>here</a>

In [None]:
#Step 4: Create count matrix from this new combined column
cv = CountVectorizer()
count_matrix = cv.fit_transform(df['combined_features'])

### In order to know more about the Cosine SImilarity Algorithm, you can refer to the sklearn document <a href = 'https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html'>here</a>

In [None]:
#Step 5: Compute the Cosine Similarity based on the count_matrix
cosine_sim=cosine_similarity(count_matrix)
movie_user_likes = "Batman v Superman: Dawn of Justice"

In [None]:
# Step 6: Get index of this movie from its title
movie_index=get_index_from_title(movie_user_likes)
similar_movies = list(enumerate(cosine_sim[movie_index]))
sorted_similar_movies=sorted(similar_movies,key=lambda x:x[1],reverse=True)

In [None]:
# Step 7: Get a list of similar movies in descending order of similarity score
i=0
import win32com.client as winc1
for movie in sorted_similar_movies:
    speak=winc1.Dispatch("SAPI.SpVoice")
    speak.Speak(get_title_from_index(movie[0]))
    print(get_title_from_index(movie[0]))
    i=i+1
    if i>10:
        break