In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [4]:
movie_data = pd.read_csv("movies.csv")

In [5]:
movie_data.head()

Unnamed: 0,id,title,genre,original_language,overview,popularity,release_date,vote_average,vote_count
0,278,The Shawshank Redemption,"Drama,Crime",en,Framed in the 1940s for the double murder of h...,94.075,1994-09-23,8.7,21862
1,19404,Dilwale Dulhania Le Jayenge,"Comedy,Drama,Romance",hi,"Raj is a rich, carefree, happy-go-lucky second...",25.408,1995-10-19,8.7,3731
2,238,The Godfather,"Drama,Crime",en,"Spanning the years 1945 to 1955, a chronicle o...",90.585,1972-03-14,8.7,16280
3,424,Schindler's List,"Drama,History,War",en,The true story of how businessman Oskar Schind...,44.761,1993-12-15,8.6,12959
4,240,The Godfather: Part II,"Drama,Crime",en,In the continuing saga of the Corleone crime f...,57.749,1974-12-20,8.6,9811


In [6]:
def recommend_movie(movie_name):
    
    try:

        # Combine 'genre' and 'overview' into a new 'tags' column
        movie_data['tags'] = movie_data['genre'] + " " + movie_data['overview']

        # Select necessary columns
        final_movie_data = movie_data[['id', 'title', 'tags']]

        # Vectorize the 'tags' column
        vectorizer = CountVectorizer(max_features=10000)
        tag_vectors = vectorizer.fit_transform(final_movie_data['tags'].values.astype("U")).toarray()

        # Compute cosine similarity between vectors
        similarity_scores = cosine_similarity(tag_vectors)

        # Check if the movie exists in the data
        if movie_name not in final_movie_data['title'].values:
            print("The movie is not in the database.")
            return

        # Get the index of the movie
        movie_index = final_movie_data[final_movie_data['title'] == movie_name].index[0]

        # Compute similarity scores
        similarity_list = sorted(list(enumerate(similarity_scores[movie_index])), reverse=True, key=lambda x: x[1])

        # Print the titles of the top 5 similar movies
        for i in similarity_list[1:6]:  # Skip the first one because it's the movie itself
            print(final_movie_data.iloc[i[0]].title)

    except Exception as e:
        print(f"An error occurred: {e}")


In [7]:
#Assuming Movie Name is Batman
movie_name="Batman"

In [8]:
#checking for recommendation based on the movie name
recommend_movie(movie_name)

Batman Returns
Escape Plan 2: Hades
Angel of Evil
Batman: Death in the Family
Desperado
