In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches  # ✅ Fuzzy matching

# Sample movie dataset
file_path = "movies_metadata.csv"
df = pd.read_csv(
    file_path,
    encoding="ISO-8859-1",
    on_bad_lines="skip",
    dtype={"id": str},
    engine="python"
)

# 🔹 Clean movie titles and genres for better matching
df["title_cleaned"] = df["title"].str.strip().str.lower()
df["genres_cleaned"] = df["genres"].apply(lambda x: set(x.lower().split()))

# 🔹 Create TF-IDF matrix for genres
tfidf = TfidfVectorizer()
genre_matrix = tfidf.fit_transform(df["genres"])

# 🔹 Compute cosine similarity
cosine_sim = cosine_similarity(genre_matrix, genre_matrix)

# 🔹 Recommendation Function with Fuzzy Matching
def recommend_movies(movie_name, df, cosine_sim):
    movie_name_cleaned = movie_name.strip().lower()

    # ✅ Use fuzzy matching to find the closest title
    close_matches = get_close_matches(movie_name_cleaned, df["title_cleaned"], n=1, cutoff=0.7)

    if not close_matches:
        print(f"❌ '{movie_name}' not found in the dataset. Please check the spelling or try another movie.")
        return []

    # ✅ Use the closest matched movie
    matched_movie = close_matches[0]
    index = df[df["title_cleaned"] == matched_movie].index[0]
    movie_genres = df.loc[index, "genres_cleaned"]

    # 🎯 Find similar movies
    similarity_scores = list(enumerate(cosine_sim[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    recommendations = []

    for i, sim_score in similarity_scores:
        if i == index:
            continue  # Skip the input movie itself

        candidate_genres = df.loc[i, "genres_cleaned"]
        genre_match_count = len(movie_genres.intersection(candidate_genres))

        # 📌 At least 2 genres should match
        if genre_match_count >= 2:
            recommendations.append((df.loc[i, "title"], sim_score))

        if len(recommendations) >= 10:
            break  # Stop after 10 recommendations

    print(f"\n✅ Found: {df.loc[index, 'title']} (You searched: '{movie_name}')")  # ✅ Shows the matched title

    return [title for title, _ in recommendations]

# 🔹 Example Usage
movie_input = input("Enter a movie name: ")
recommended_movies = recommend_movies(movie_input, df, cosine_sim)

if recommended_movies:
    print("\n🎬 Recommended Movies:")
    for movie in recommended_movies:
        print(f"🔹 {movie}")


Enter a movie name: Little Women

✅ Found: Little Women (You searched: 'Little Women')

🎬 Recommended Movies:
🔹 Sense and Sensibility
🔹 Leaving Las Vegas
🔹 Persuasion
🔹 Restoration
🔹 How To Make An American Quilt
🔹 When Night Is Falling
🔹 Bed of Roses
🔹 Once Upon a Time... When We Were Colored
🔹 Angels and Insects
🔹 The Bridges of Madison County
