<a href="https://colab.research.google.com/github/yousufmunna143/movie_recommendation_system/blob/main/movie_recommendation_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing the dependencies

In [177]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Data Collection and Pre-Processing

In [178]:
# Loading the data from a csv file to a pandas dataframe

movies_data=pd.read_csv("/content/movies.csv")

In [179]:
# printing first 5 rows of dataframe

movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [180]:
# number of rows and columns in dataframe

movies_data.shape

(4803, 24)

In [181]:
# selecting only relevant features for recommendation

selected_features=["genres", "keywords", "tagline", "cast", "director"]

In [182]:
# replacing null value with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna("")

In [183]:
# combining all the 5 features

combined_features=movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [None]:
combined_features

In [185]:
# converting text data into feature vectors
# searching becomes easy with numeric data rather text data
# creating a instance of TfidfVectorizer class

vectorizer = TfidfVectorizer()

In [186]:
# fitting text data and transforming into numeric data using fit_transform method

feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

# Cosine Similarity

In [188]:
similarity = cosine_similarity(feature_vectors)

In [189]:
print(similarity.shape)
print(similarity)

(4803, 4803)
[[1.         0.07219487 0.037733   ... 0.         0.         0.        ]
 [0.07219487 1.         0.03281499 ... 0.03575545 0.         0.        ]
 [0.037733   0.03281499 1.         ... 0.         0.05389661 0.        ]
 ...
 [0.         0.03575545 0.         ... 1.         0.         0.02651502]
 [0.         0.         0.05389661 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02651502 0.         1.        ]]


In [None]:
# getting movie name from user

movie_name = input('Enter your favourite movie: ')

In [None]:
# creating a list of all movie titles from dataset

list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

In [None]:
# finding close match to movie name given by user using difflib

find_close_matches = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_matches)

In [None]:
close_match = find_close_matches[0]
print(close_match)

In [None]:
# finding the index of close_match from dataset

index_of_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_movie)

In [None]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_movie]))
print(similarity_score)

In [None]:
# sorting movies based on their similarity scores

sorted_similar_movies = sorted(similarity_score, key=lambda x:x[1], reverse=True)
print(sorted_similar_movies)

In [None]:
# print the name of similar movies based on index

print("Movies you might like:")

i = 1

for movie in sorted_similar_movies:
  index=movie[0]
  index_from_movie=movies_data[movies_data.index == index]['title'].values[0]
  if i<16:
    print("-> " + index_from_movie)
    i+=1


In [205]:
movie_name = input('Enter your favourite movie: ')
list_of_all_titles = movies_data['title'].tolist()
find_close_matches = difflib.get_close_matches(movie_name, list_of_all_titles)
close_match = find_close_matches[0]
index_of_movie = movies_data[movies_data.title == close_match]['index'].values[0]
similarity_score = list(enumerate(similarity[index_of_movie]))
sorted_similar_movies = sorted(similarity_score, key=lambda x:x[1], reverse=True)
print("Movies you might like:")

i = 1

for movie in sorted_similar_movies:
  index=movie[0]
  index_from_movie=movies_data[movies_data.index == index]['title'].values[0]
  if i<16:
    print("-> " + index_from_movie)
    i+=1


Enter your favourite movie: bat man
Movies you might like:
-> Batman
-> Batman Returns
-> Batman & Robin
-> The Dark Knight Rises
-> Batman Begins
-> The Dark Knight
-> A History of Violence
-> Superman
-> Beetlejuice
-> Bedazzled
-> Mars Attacks!
-> The Sentinel
-> Planet of the Apes
-> Man of Steel
-> Suicide Squad
