 🎬 Movie Recommendation System

This project demonstrates how to build **movie recommendation engi** using c dataset of 5,000 movies.  
We implement two approaches:  
1. **Content-based Filting** – recommends movies similar to a given movie based on metadata (genres, director, year, etc.). 
2. **Collaborative FilteringSVD)** – recommends movies to users based on rating patt.ta
movies.head()


In [1]:

# MOVIE RECOMMENDATION ENGINE

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD




df = pd.read_csv("movies_dataset_5000.csv")


# CONTENT-BASED FILTERING

df['soup'] = df['genre'] + " " + df['director'] + " " + df['title']

# We use TF-IDF + Cosine Similarity on metadata (genres, director, year) to find movies similar to a given one.
# TF-IDF vectorization
tfidf = TfidfVectorizer(stop_words="english")
tfidf_matrix = tfidf.fit_transform(df['soup'])

# Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Map title -> index
title_to_index = pd.Series(df.index, index=df['title']).drop_duplicates()

def recommend_content(title, top_n=10):
    """Recommend similar movies to a given title"""
    if title not in title_to_index:
        print(f"Movie '{title}' not found in dataset.")
        return pd.DataFrame()
    
    idx = title_to_index[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]  # exclude the movie itself
    movie_indices = [i for i, _ in sim_scores]
    
    return df.iloc[movie_indices][['movie_id','title','genre','director','year']]


# COLLABORATIVE FILTERING (SVD)
# We create a user–movie rating matrix and apply Truncated SVD (matrix factorization).

# Build user-item matrix
user_item = df.pivot_table(index="user_id", columns="movie_id", values="user_rating", aggfunc="mean").fillna(0)
R = user_item.values
user_means = np.true_divide(R.sum(1), (R!=0).sum(1), where=(R!=0).sum(1)!=0)
global_mean = R[R!=0].mean() if np.any(R!=0) else 3.0
user_means = np.where(np.isnan(user_means), global_mean, user_means)
R_centered = R - user_means.reshape(-1,1)

# SVD
svd = TruncatedSVD(n_components=50, random_state=42)
R_transformed = svd.fit_transform(R_centered)
VT = svd.components_
R_approx_centered = np.dot(R_transformed, VT)
R_pred = R_approx_centered + user_means.reshape(-1,1)

# Predicted ratings DataFrame
R_pred_df = pd.DataFrame(R_pred, index=user_item.index, columns=user_item.columns)

def recommend_collaborative(user_id, top_n=10):
    """Recommend top movies for a user using collaborative filtering"""
    if user_id not in R_pred_df.index:
        print(f"User {user_id} not found.")
        return pd.DataFrame()
    
    preds = R_pred_df.loc[user_id]
    user_rated = user_item.loc[user_id]
    unrated_mask = user_rated == 0
    
    candidates = preds[unrated_mask]
    top_movies = candidates.sort_values(ascending=False).head(top_n)
    
    results = pd.DataFrame({
        'movie_id': top_movies.index,
        'predicted_rating': top_movies.values
    }).reset_index(drop=True)
    
    return results.merge(df.drop_duplicates('movie_id')[['movie_id','title','genre','director','year']],
                         on='movie_id', how='left')


# Movies similar to "Movie 25"
print("\nContent-based recommendations for 'Movie 25':")
print(recommend_content("Movie 25", top_n=10))

#Recommendations for User 50
print("\nCollaborative recommendations for User 50:")
print(recommend_collaborative(50, top_n=10))



Content-based recommendations for 'Movie 25':
      movie_id       title    genre      director  year
458        459   Movie 459   Sci-Fi  Director 315  1995
1160      1161  Movie 1161   Sci-Fi  Director 315  2009
818        819   Movie 819   Sci-Fi   Director 25  1981
268        269   Movie 269  Romance  Director 315  2005
314        315   Movie 315   Horror   Director 83  2015
150        151   Movie 151   Horror   Director 25  1986
469        470   Movie 470   Action   Director 25  2005
2191      2192  Movie 2192   Horror  Director 315  1992
3411      3412  Movie 3412   Horror  Director 315  2020
4953      4954  Movie 4954   Horror  Director 315  1987

Collaborative recommendations for User 50:
   movie_id  predicted_rating       title     genre      director  year
0      3383          0.130231  Movie 3383    Sci-Fi  Director 223  1987
1      3806          0.124472  Movie 3806     Drama   Director 56  1996
2      2038          0.118713  Movie 2038  Thriller  Director 197  1987
3    

In [10]:
# Movies similar to 'Movie 2398'
print("\nContent-based recommendations for 'Movie 2398':")
print(recommend_content("Movie 2398", top_n=10))

#Recommendations for User 999
print("\nCollaborative recommendations for User 999:")
print(recommend_collaborative(999, top_n=10))



Content-based recommendations for 'Movie 2398':
      movie_id       title     genre      director  year
2874      2875  Movie 2875    Action   Director 30  1980
147        148   Movie 148   Romance   Director 30  1995
29          30    Movie 30    Sci-Fi  Director 378  1996
2840      2841  Movie 2841    Horror   Director 30  2015
4774      4775  Movie 4775    Horror   Director 30  1984
859        860   Movie 860    Comedy   Director 30  1984
3434      3435  Movie 3435     Drama   Director 30  2018
2266      2267  Movie 2267  Thriller   Director 30  2020
3302      3303  Movie 3303  Thriller   Director 30  2011
4409      4410  Movie 4410  Thriller   Director 30  2019

Collaborative recommendations for User 999:
   movie_id  predicted_rating       title     genre      director  year
0       827          0.012123   Movie 827    Sci-Fi  Director 416  2000
1      3821          0.011852  Movie 3821  Thriller  Director 321  2008
2       244          0.011538   Movie 244    Action  Director 3

In [9]:
df[df.genre=='Action'].sort_values(by='user_id',ascending=False)

Unnamed: 0,movie_id,title,genre,director,year,duration_min,avg_rating,user_id,user_rating,soup
2397,2398,Movie 2398,Action,Director 30,2024,144,3.4,999,2.3,Action Director 30 Movie 2398
834,835,Movie 835,Action,Director 120,2004,94,3.1,997,2.2,Action Director 120 Movie 835
2255,2256,Movie 2256,Action,Director 326,1987,115,3.3,994,4.3,Action Director 326 Movie 2256
4760,4761,Movie 4761,Action,Director 10,1980,111,1.3,994,2.8,Action Director 10 Movie 4761
1446,1447,Movie 1447,Action,Director 345,2013,119,1.6,992,2.8,Action Director 345 Movie 1447
...,...,...,...,...,...,...,...,...,...,...
1541,1542,Movie 1542,Action,Director 219,2009,93,2.0,9,1.5,Action Director 219 Movie 1542
4385,4386,Movie 4386,Action,Director 477,2018,105,3.3,8,1.4,Action Director 477 Movie 4386
103,104,Movie 104,Action,Director 181,2022,102,2.7,6,3.4,Action Director 181 Movie 104
4335,4336,Movie 4336,Action,Director 330,2008,107,4.7,2,2.6,Action Director 330 Movie 4336


In [2]:
df

Unnamed: 0,movie_id,title,genre,director,year,duration_min,avg_rating,user_id,user_rating,soup
0,1,Movie 1,Thriller,Director 359,1981,170,4.9,835,2.8,Thriller Director 359 Movie 1
1,2,Movie 2,Horror,Director 91,2022,177,1.5,312,2.6,Horror Director 91 Movie 2
2,3,Movie 3,Sci-Fi,Director 346,1993,90,2.3,808,4.3,Sci-Fi Director 346 Movie 3
3,4,Movie 4,Thriller,Director 206,2001,80,4.2,765,2.5,Thriller Director 206 Movie 4
4,5,Movie 5,Drama,Director 17,2010,92,2.1,669,1.3,Drama Director 17 Movie 5
...,...,...,...,...,...,...,...,...,...,...
4995,4996,Movie 4996,Sci-Fi,Director 468,2019,173,2.3,112,5.0,Sci-Fi Director 468 Movie 4996
4996,4997,Movie 4997,Romance,Director 448,2012,142,3.5,368,4.4,Romance Director 448 Movie 4997
4997,4998,Movie 4998,Drama,Director 409,2013,105,2.1,102,2.0,Drama Director 409 Movie 4998
4998,4999,Movie 4999,Drama,Director 412,2023,174,4.7,594,1.8,Drama Director 412 Movie 4999
