In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
movies_data = pd.read_csv('/content/movies.csv')

In [None]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [None]:
movies_data.tail()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
4798,4798,220000,Action Crime Thriller,,9367,united states\u2013mexico barrier legs arms pa...,es,El Mariachi,El Mariachi just wants to play his guitar and ...,14.269792,...,81.0,"[{""iso_639_1"": ""es"", ""name"": ""Espa\u00f1ol""}]",Released,"He didn't come looking for trouble, but troubl...",El Mariachi,6.6,238,Carlos Gallardo Jaime de Hoyos Peter Marquardt...,"[{'name': 'Robert Rodriguez', 'gender': 0, 'de...",Robert Rodriguez
4799,4799,9000,Comedy Romance,,72766,,en,Newlyweds,A newlywed couple's honeymoon is upended by th...,0.642552,...,85.0,[],Released,A newlywed couple's honeymoon is upended by th...,Newlyweds,5.9,5,Edward Burns Kerry Bish\u00e9 Marsha Dietlein ...,"[{'name': 'Edward Burns', 'gender': 2, 'depart...",Edward Burns
4800,4800,0,Comedy Drama Romance TV Movie,http://www.hallmarkchannel.com/signedsealeddel...,231617,date love at first sight narration investigati...,en,"Signed, Sealed, Delivered","""Signed, Sealed, Delivered"" introduces a dedic...",1.444476,...,120.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,"Signed, Sealed, Delivered",7.0,6,Eric Mabius Kristin Booth Crystal Lowe Geoff G...,"[{'name': 'Carla Hetland', 'gender': 0, 'depar...",Scott Smith
4801,4801,0,,http://shanghaicalling.com/,126186,,en,Shanghai Calling,When ambitious New York attorney Sam is sent t...,0.857008,...,98.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A New Yorker in Shanghai,Shanghai Calling,5.7,7,Daniel Henney Eliza Coupe Bill Paxton Alan Ruc...,"[{'name': 'Daniel Hsia', 'gender': 2, 'departm...",Daniel Hsia
4802,4802,0,Documentary,,25975,obsession camcorder crush dream girl,en,My Date with Drew,Ever since the second grade when he first saw ...,1.929883,...,90.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,My Date with Drew,6.3,16,Drew Barrymore Brian Herzlinger Corey Feldman ...,"[{'name': 'Clark Peterson', 'gender': 2, 'depa...",Brian Herzlinger


In [None]:
movies_data.shape

(4803, 24)

In [None]:
#selecting relevant features for recommendation

selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [None]:
#replacing the null values with null strings

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [None]:
#combining all the selected features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+movies_data['director']

In [None]:
print(combined_features)


0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [None]:
#converting text data to feature vectors

vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

Cosine similarity

In [None]:
similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.07047992 0.01423497 ... 0.         0.         0.        ]
 [0.07047992 1.         0.03271772 ... 0.03557523 0.         0.        ]
 [0.01423497 0.03271772 1.         ... 0.         0.02997687 0.        ]
 ...
 [0.         0.03557523 0.         ... 1.         0.         0.02941525]
 [0.         0.         0.02997687 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02941525 0.         1.        ]]


In [None]:
print(similarity.shape)

(4803, 4803)


In [None]:
#getting movie name from user

movie_name = input("Enter your favourite movie name: ");

Enter your favourite movie name: Iron Man


In [None]:
#creating a list of all movies names given in dataset

list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [None]:
#finding the close match for movie name given by user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Iron Man', 'Iron Man 3', 'Iron Man 2']


In [None]:
close_match = find_close_match[0]
print(close_match)

Iron Man


In [None]:
#find index of the movie with title

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

68


In [None]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.03229579644860707), (1, 0.05393195039447943), (2, 0.013495256913644418), (3, 0.006290137712561313), (4, 0.03181599223363476), (5, 0.013492855232607511), (6, 0.0807002229078615), (7, 0.23948092153801254), (8, 0.007641303920257967), (9, 0.07528320175535286), (10, 0.07440697853473807), (11, 0.011816684330342166), (12, 0.013493141962799469), (13, 0.012195430222074342), (14, 0.09532110167793534), (15, 0.007191559327707054), (16, 0.22696359695614315), (17, 0.012738836763260195), (18, 0.04033642067761474), (19, 0.07763620585984604), (20, 0.07702266637203238), (21, 0.011076120703301915), (22, 0.00678715371866881), (23, 0.006402612273657429), (24, 0.012262924064256244), (25, 0.0), (26, 0.2174886369018547), (27, 0.029945938769572685), (28, 0.06190026789378866), (29, 0.013589845716434311), (30, 0.0781463629334929), (31, 0.27155401360493714), (32, 0.028052126365086392), (33, 0.129125151923067), (34, 0.0), (35, 0.03417250378858466), (36, 0.031252233451102195), (37, 0.007885689399278513), (38

In [None]:
len(similarity_score)

4803

In [None]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(68, 1.0000000000000002), (79, 0.3485398866616459), (31, 0.27155401360493714), (7, 0.23948092153801254), (16, 0.22696359695614315), (26, 0.2174886369018547), (85, 0.20384498126302353), (182, 0.1921149578641447), (511, 0.16526444635810036), (64, 0.15008682959582387), (203, 0.14651794298050327), (174, 0.14326369370437603), (4401, 0.14305673130472468), (46, 0.13991742457175796), (101, 0.13929773066794662), (169, 0.13514452526336213), (788, 0.13122776344970674), (94, 0.1310560532476988), (126, 0.1309077781595354), (33, 0.129125151923067), (3623, 0.11773832986774721), (2442, 0.11576168742539061), (353, 0.1130183623626873), (131, 0.10937368345102094), (38, 0.10906527471967552), (1740, 0.10886177866608816), (122, 0.10626607781771327), (1451, 0.10557107626215012), (242, 0.10419083368445188), (618, 0.1026173289166027), (1210, 0.09932679833929622), (954, 0.09917190541875921), (2390, 0.098307771574909), (3166, 0.09732062356161367), (3385, 0.09710030708125605), (2235, 0.09697177344765096), (1406,

In [None]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Iron Man
2 . Iron Man 2
3 . Iron Man 3
4 . Avengers: Age of Ultron
5 . The Avengers
6 . Captain America: Civil War
7 . Captain America: The Winter Soldier
8 . Ant-Man
9 . X-Men
10 . X-Men: Apocalypse
11 . X2
12 . The Incredible Hulk
13 . The Helix... Loaded
14 . X-Men: Days of Future Past
15 . X-Men: First Class
16 . Captain America: The First Avenger
17 . Deadpool
18 . Guardians of the Galaxy
19 . Thor: The Dark World
20 . X-Men: The Last Stand
21 . Made
22 . Southland Tales
23 . Tropic Thunder
24 . G-Force
25 . The Amazing Spider-Man 2
26 . Kick-Ass 2
27 . X-Men Origins: Wolverine
28 . Zoom
29 . Fantastic Four


Movie Recommendation System

In [None]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name : Vampire diaries
Movies suggested for you : 

1 . Vampires
2 . BloodRayne
3 . Contact
4 . Dudley Do-Right
5 . Priest
6 . Ghosts of Mississippi
7 . Me and Orson Welles
8 . The Girl with the Dragon Tattoo
9 . The Shadow
10 . High Anxiety
11 . xXx
12 . The Lord of the Rings: The Two Towers
13 . The Best Offer
14 . Wal-Mart: The High Cost of Low Price
15 . The Relic
16 . Salvador
17 . How to Be Single
18 . The Homesman
19 . Stranded
20 . Brooklyn Rules
21 . Dreamcatcher
22 . Wanted
23 . The Lord of the Rings: The Return of the King
24 . Lifeforce
25 . The Wiz
26 . Vampire in Brooklyn
27 . The Hunted
28 . Deterrence
29 . Underworld: Awakening


Saving the model

In [None]:
import pickle

In [None]:
filename = 'Movie_Recommendation_model.sav'

In [None]:
with open('vectorizer.sav', 'wb') as f:
    pickle.dump(vectorizer, f)

with open('similarity.sav', 'wb') as f:
    pickle.dump(similarity, f)

In [None]:
with open('vectorizer.sav', 'rb') as f:
    vectorizer = pickle.load(f)

with open('similarity.sav', 'rb') as f:
    similarity = pickle.load(f)