In [36]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import difflib
from sklearn.metrics.pairwise import cosine_similarity

In [37]:
df = pd.read_csv('Bmovie.csv')

In [38]:
df.head()

Unnamed: 0,imdbId,title,releaseYear,releaseDate,genre,writers,actors,directors,sequel,hitFlop
0,tt0118578,Albela,2001,20 Apr 2001,Romance,Honey Irani (screenplay) | Honey Irani (story)...,Govinda | Aishwarya Rai Bachchan | Jackie Shro...,Deepak Sareen,0.0,2
1,tt0169102,Lagaan: Once Upon a Time in India,2001,08 May 2002,Adventure | Drama | Musical,Ashutosh Gowariker (story) | Ashutosh Gowarike...,Aamir Khan | Gracy Singh | Rachel Shelley | Pa...,Ashutosh Gowariker,0.0,6
2,tt0187279,Meri Biwi Ka Jawab Nahin,2004,02 Jul 2004,Action | Comedy,,Akshay Kumar | Sridevi | Gulshan Grover | Laxm...,Pankaj Parashar | S.M. Iqbal,0.0,1
3,tt0222024,Hum Tumhare Hain Sanam,2002,24 May 2002,Drama | Romance,K.S. Adiyaman | Arun Kumar (assistant dialogue...,Shah Rukh Khan | Madhuri Dixit | Salman Khan |...,K.S. Adiyaman,0.0,4
4,tt0227194,One 2 Ka 4,2001,30 Mar 2001,Action | Comedy | Drama,Sanjay Chhel | Raaj Kumar Dahima (screenplay) ...,Shah Rukh Khan | Juhi Chawla | Jackie Shroff |...,Shashilal K. Nair,0.0,1


In [39]:
combined_features = (
    df['genre'].astype(str) + ' ' + 
    df['actors'].astype(str) + ' ' + 
    df['directors'].astype(str)
 
)

In [40]:
combined_features

0       Romance Govinda | Aishwarya Rai Bachchan | Jac...
1       Adventure | Drama | Musical Aamir Khan | Gracy...
2       Action | Comedy Akshay Kumar | Sridevi | Gulsh...
3       Drama | Romance Shah Rukh Khan | Madhuri Dixit...
4       Action | Comedy | Drama Shah Rukh Khan | Juhi ...
                              ...                        
1279    nan Kaivalya Chheda | Rajsekhar Aningi | Avida...
1280    Horror Manish Choudhary | Suzanna Mukherjee | ...
1281    nan Adil Hussain | Ekavali Khanna | Kulbhushan...
1282    Comedy Vir Das | Sandeepa Dhar | Rahul Handa |...
1283    Thriller Shradha Das | Mohan Kapoor | Mannara ...
Length: 1284, dtype: object

In [41]:
df.isnull().sum()

imdbId           0
title            0
releaseYear      0
releaseDate     53
genre            2
writers        119
actors           3
directors        4
sequel           3
hitFlop          0
dtype: int64

In [42]:
for col in df:
   df[col] = df[col].fillna('')

In [43]:
df.isnull().sum()

imdbId         0
title          0
releaseYear    0
releaseDate    0
genre          0
writers        0
actors         0
directors      0
sequel         0
hitFlop        0
dtype: int64

In [44]:
tfv = TfidfVectorizer()

In [45]:
feature_converted = tfv.fit_transform(combined_features)

In [46]:
cs = cosine_similarity(feature_converted)

In [47]:
movie_name= input('Enter the name of movie:')

Enter the name of movie: Iron man


In [48]:
movie_list = df['title'].tolist()

In [49]:
print(movie_list)



In [50]:
find_close_match = difflib.get_close_matches(movie_name,movie_list)

In [51]:
print(find_close_match)

['Insan', 'Drona']


In [52]:
close_match = find_close_match[0]
print(close_match)

Insan


In [53]:
index_of_movie = df[df.title ==close_match].index[0]
print(index_of_movie)

318


In [54]:
similarity_score = list(enumerate(cs[index_of_movie]))

In [55]:
print(similarity_score)

[(0, np.float64(0.0)), (1, np.float64(0.007631826779093936)), (2, np.float64(0.10518703298032127)), (3, np.float64(0.009336955363330948)), (4, np.float64(0.008552600064062447)), (5, np.float64(0.0)), (6, np.float64(0.058964524548351605)), (7, np.float64(0.009209079045951477)), (8, np.float64(0.1862176434166563)), (9, np.float64(0.18896260078774735)), (10, np.float64(0.06734764950463812)), (11, np.float64(0.006611067710091102)), (12, np.float64(0.03901616726550191)), (13, np.float64(0.007930319388271828)), (14, np.float64(0.008164902459521036)), (15, np.float64(0.044884669266628666)), (16, np.float64(0.02829944547261094)), (17, np.float64(0.06795176059951279)), (18, np.float64(0.0)), (19, np.float64(0.04321240273485722)), (20, np.float64(0.008227220796457982)), (21, np.float64(0.11696898887474932)), (22, np.float64(0.008927362927467678)), (23, np.float64(0.04848108014865775)), (24, np.float64(0.008063726189141581)), (25, np.float64(0.008682842352777517)), (26, np.float64(0.1578853905186

In [56]:
sorted_similar_movies = sorted(similarity_score,key =lambda x:x[1],reverse=True)

In [57]:
print(sorted_similar_movies)

[(318, np.float64(1.0)), (340, np.float64(0.3656198530197881)), (787, np.float64(0.3626360107142978)), (339, np.float64(0.35559786972258645)), (96, np.float64(0.3493632080198197)), (145, np.float64(0.33328986643263003)), (1073, np.float64(0.3253773986087221)), (462, np.float64(0.3242911323448187)), (610, np.float64(0.3177094434374182)), (144, np.float64(0.2702450974504358)), (558, np.float64(0.26646649114781384)), (860, np.float64(0.2437752090397069)), (349, np.float64(0.2373646427750949)), (389, np.float64(0.2264438329997301)), (1070, np.float64(0.21688997919518513)), (856, np.float64(0.21385617687248237)), (1232, np.float64(0.2135482755156838)), (248, np.float64(0.21210025065616636)), (878, np.float64(0.21118058014397037)), (176, np.float64(0.2099954193187062)), (253, np.float64(0.20848163643449086)), (582, np.float64(0.20344791276954072)), (351, np.float64(0.20120055761713648)), (1153, np.float64(0.2002095825573632)), (181, np.float64(0.19713691049356857)), (153, np.float64(0.196524

In [58]:
print('Movies Suggested for you:')
i = 1
for movies in sorted_similar_movies:
    index = movies[0]
    title_from_index = df[df.index ==index]['title'].values[0]
    if(i<30):
        print(i, '.',title_from_index)
        i+=1
    







Movies Suggested for you:
1 . Insan
2 . Tango Charlie
3 . Golmaal 3
4 . Main Aisa Hi Hoon
5 . Kyaa Dil Ne Kahaa
6 . LOC: Kargil
7 . Chakravyuh
8 . Golmaal: Fun Unlimited
9 . One Two Three
10 . Khakee
11 . U Me Aur Hum
12 . Thank You
13 . Dosti: Friends Forever
14 . Pyare Mohan
15 . Satyagraha
16 . Tezz
17 . Mahabharat
18 . Ab Tumhare Hawale Watan Saathiyo
19 . Phas Gaye Re Obama
20 . Bewafaa
21 . Action Jackson
22 . Yuvvraaj
23 . Shaadi No. 1
24 . Jolly LLB
25 . Qayamat: City Under Threat
26 . Kucch To Hai
27 . Dhoom
28 . Omkara
29 . Ankahee


In [64]:
movie_name= input('Enter the name of movie:')
movie_list = df['title'].tolist()
find_close_match = difflib.get_close_matches(movie_name,movie_list)
close_match = find_close_match[0]
index_of_movie = df[df.title ==close_match].index[0]
similarity_score = list(enumerate(cs[index_of_movie]))
sorted_similar_movies = sorted(similarity_score,key =lambda x:x[1],reverse=True)

print('Movies Suggested for you:')
i = 1
for movies in sorted_similar_movies:
    index = movies[0]
    title_from_index = df[df.index ==index]['title'].values[0]
    if(i<30):
        print(i, '.',title_from_index)
        i+=1
    


Enter the name of movie: Golmaal


Movies Suggested for you:
1 . Golmaal 3
2 . Golmaal: Fun Unlimited
3 . All the Best: Fun Begins
4 . Singham Returns
5 . Zameen
6 . Sunday
7 . Kyaa Super Kool Hain Hum
8 . Bol Bachchan
9 . Insan
10 . Satyagraha
11 . Mujhe Kucch Kehna Hai
12 . Omkara
13 . Singham
14 . Action Jackson
15 . Qayamat: City Under Threat
16 . Golmaal Returns
17 . Hulchul
18 . Jeena Sirf Merre Liye
19 . Yuvvraaj
20 . U Me Aur Hum
21 . Tezz
22 . Tashan
23 . Enemmy
24 . Cash
25 . Tango Charlie
26 . Blackmail
27 . Kaanchi
28 . LOC: Kargil
29 . Bewafaa
