<a href="https://colab.research.google.com/github/yukti468/task/blob/main/MovieRecommendationSystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib


In [None]:
with zipfile.ZipFile("/content/archive.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/")

In [None]:
movies = pd.read_csv("/content/tmdb_5000_movies.csv")
credits = pd.read_csv("/content/tmdb_5000_credits.csv")


In [None]:
movies = movies.merge(credits, left_on='id', right_on='movie_id')

In [None]:
print(movies.columns.tolist())


['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language', 'original_title', 'overview', 'popularity', 'production_companies', 'production_countries', 'release_date', 'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title_x', 'vote_average', 'vote_count', 'movie_id', 'title_y', 'cast', 'crew']


In [None]:
def get_names(text, key='name', top=3):
    try:
        return ' '.join([i[key].replace(" ", "") for i in ast.literal_eval(text)[:top]])
    except:
        return ''

movies['cast'] = movies['cast'].apply(lambda x: get_names(x, 'name', 3))
movies['crew'] = movies['crew'].apply(lambda x: next((i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'), ''))
movies['genres'] = movies['genres'].apply(lambda x: get_names(x))
movies['keywords'] = movies['keywords'].apply(lambda x: get_names(x))

In [None]:
# ✅ Create combined feature column
movies['combined_features'] = (
    movies['genres'] + ' ' +
    movies['keywords'] + ' ' +
    movies['tagline'].fillna('') + ' ' +
    movies['cast'] + ' ' +
    movies['crew']
)

In [None]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['combined_features'])

In [None]:
cosine_sim = cosine_similarity(tfidf_matrix)
movie_titles = movies['title_x'].tolist()


In [None]:
def recommend_movie(input_movie):
    close_match = difflib.get_close_matches(input_movie, movie_titles, n=1)
    if not close_match:
        print(f"No movie found for '{input_movie}'.")
        return
    matched_title = close_match[0]
    index = movies[movies.title_x == matched_title].index[0]
    similarity_scores = list(enumerate(cosine_sim[index]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    print(f"\n🎬 Because you watched '{matched_title}', you might like:\n")
    for i, (idx, _) in enumerate(sorted_scores[1:6], 1):
        print(f"{i}. {movies.iloc[idx]['title_x']}")

In [None]:
movie_input = input("Enter a movie you like: ")
recommend_movie(movie_input)

Enter a movie you like: 500 days of summer

🎬 Because you watched '(500) Days of Summer', you might like:

1. Don Jon
2. The Good Girl
3. Premium Rush
4. The Amazing Spider-Man 2
5. The Juror
