In [14]:
import pandas as pd

# Load the uploaded dataset
file_path = 'Top_Movies.csv'
df = pd.read_csv(file_path)

# Check the first few rows and columns
print("Columns in dataset:", df.columns.tolist())
df.head()


Columns in dataset: ['Movie Name', 'Rating', 'Runtime', 'Genre', 'Metascore', 'Plot', 'Directors', 'Stars', 'Votes', 'Gross', 'Link']


Unnamed: 0,Movie Name,Rating,Runtime,Genre,Metascore,Plot,Directors,Stars,Votes,Gross,Link
0,The Godfather,9.2,175 min,"Crime, Drama",100.0,"Don Vito Corleone, head of a mafia family, dec...",Francis Ford Coppola,"Marlon Brando, Al Pacino, James Caan, Diane Ke...",1914405,134966411,https://www.imdb.com/title/tt0068646/
1,The Shawshank Redemption,9.3,142 min,Drama,82.0,"Over the course of several years, two convicts...",Frank Darabont,"Tim Robbins, Morgan Freeman, Bob Gunton, Willi...",2751997,28341469,https://www.imdb.com/title/tt0111161/
2,Shichinin no samurai,8.6,207 min,"Action, Drama",98.0,Farmers from a village exploited by bandits hi...,Akira Kurosawa,"Toshirô Mifune, Takashi Shimura, Keiko Tsushim...",353392,269061,https://www.imdb.com/title/tt0047478/
3,Cidade de Deus,8.6,130 min,"Crime, Drama",79.0,"In the slums of Rio, two kids' paths diverge a...",Fernando Meirelles,"Kátia Lund, Alexandre Rodrigues, Leandro Firmi...",772169,7563397,https://www.imdb.com/title/tt0317248/
4,The Godfather Part II,9.0,202 min,"Crime, Drama",90.0,The early life and career of Vito Corleone in ...,Francis Ford Coppola,"Al Pacino, Robert De Niro, Robert Duvall, Dian...",1303664,57300000,https://www.imdb.com/title/tt0071562/


In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Define the recommendation function using original column names
def recommend_movies(query, top_n=5):
    # Use the 'Plot' column directly
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(df['Plot'].fillna(''))  # Handle missing values
    query_vector = vectorizer.transform([query])
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    top_indices = np.argsort(similarities)[::-1][:top_n]

    # Use 'Movie Name' and 'Plot' columns
    recommendations = df.iloc[top_indices][['Movie Name', 'Plot']]
    scores = similarities[top_indices]

    return recommendations, scores


In [16]:
# Get user input
query = input("🎥 Describe the kind of movie you're looking for: ")

# Get top 5 recommendations
recommendations, scores = recommend_movies(query, top_n=5)

# Display the top recommendations
print("\n🔥 Top 5 Movie Recommendations:\n")
for idx, (title, description) in enumerate(zip(recommendations['Movie Name'], recommendations['Plot'])):
    print(f"{idx+1}. {title} (Score: {scores[idx]:.4f})")
    print(f"   {description}\n")


🎥 Describe the kind of movie you're looking for: I love action movies with superheroes and space battles.

🔥 Top 5 Movie Recommendations:

1. The Incredibles (Score: 0.2682)
   While trying to lead a quiet suburban life, a family of undercover superheroes are forced into action to save the world.

2. Gravity (Score: 0.1467)
   Two astronauts work together to survive after an accident leaves them stranded in space.

3. Gravity (Score: 0.1467)
   Two astronauts work together to survive after an accident leaves them stranded in space.

4. Spider-Man 2 (Score: 0.1359)
   Peter Parker is beset with troubles in his failing personal life as he battles a brilliant scientist named Doctor Otto Octavius.

5. Clerks (Score: 0.1299)
   A day in the lives of two convenience clerks named Dante and Randal as they annoy customers, discuss movies, and play hockey on the store roof.

