# 🎬 Movie Recommender System using Content-Based Filtering

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## 📥 Load and Explore the Dataset

In [None]:
movies = pd.read_csv("movies_cleaned.csv")
movies.head()

## 📊 Exploratory Data Analysis (EDA)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import ast
raw_movies = pd.read_csv("tmdb_5000_movies.csv")
raw_movies['release_date'] = pd.to_datetime(raw_movies['release_date'], errors='coerce')
raw_movies['year'] = raw_movies['release_date'].dt.year
# Top genres
genres_list = [g['name'] for sublist in raw_movies['genres'].dropna().apply(ast.literal_eval) for g in sublist]
top_genres = pd.Series(genres_list).value_counts().head(10)
sns.barplot(x=top_genres.values, y=top_genres.index)
plt.title('Top 10 Genres')
plt.xlabel('Number of Movies')
plt.show()

In [None]:
# Movies released per year
yearly_counts = raw_movies['year'].value_counts().sort_index()
sns.lineplot(x=yearly_counts.index, y=yearly_counts.values)
plt.title('Number of Movies Released per Year')
plt.xlabel('Year')
plt.ylabel('Number of Movies')
plt.grid(True)
plt.show()

## 🔍 Vectorization using CountVectorizer

In [None]:
cv = CountVectorizer(max_features=3000, stop_words='english')
vectors = cv.fit_transform(movies['tags'])

## 🧠 Recommendation Function

In [None]:
def recommend(movie):
    movie = movie.lower()
    if movie not in movies['title'].str.lower().values:
        return ["Movie not found."]
    idx = movies[movies['title'].str.lower() == movie].index[0]
    vec = vectors[idx]
    sim = cosine_similarity(vec, vectors).flatten()
    sim_scores = list(enumerate(sim))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]
    return [movies.iloc[i[0]].title for i in sim_scores]

## 🎯 Test the System

In [None]:
recommend("Avatar")

## ⚠️ Limitations & Future Improvements
- This is a **content-based system** only. It doesn’t consider user preferences.
- It doesn’t handle cold-start problems for new movies without metadata.
- A future upgrade can include collaborative filtering or hybrid approaches using user ratings.
- Popularity and ratings can be used to sort recommendations for better user satisfaction.