# CineMind: Hybrid & Mood-Based Movie Recommandation System

### Import Libraries

In [29]:
import numpy as np
import pandas as pd
import ast
import pickle


### Load Datasets

In [30]:

movies = pd.read_csv("tmdb_5000_movies.csv",low_memory=False)
credits = pd.read_csv("tmdb_5000_credits.csv",low_memory=False)


In [31]:
print(movies.columns)
print(credits.columns)



Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')
Index(['movie_id', 'title', 'cast', 'crew', 'Unnamed: 4', 'Unnamed: 5',
       'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       ...
       'Unnamed: 1255', 'Unnamed: 1256', 'Unnamed: 1257', 'Unnamed: 1258',
       'Unnamed: 1259', 'Unnamed: 1260', 'Unnamed: 1261', 'Unnamed: 1262',
       'Unnamed: 1263', 'Unnamed: 1264'],
      dtype='object', length=1265)


In [32]:
credits = credits.loc[:, ~credits.columns.str.contains('^Unnamed')]


In [33]:
print(credits.columns)
print(credits.shape)


Index(['movie_id', 'title', 'cast', 'crew'], dtype='object')
(4813, 4)


### Clean & Merge Data

In [34]:
# Rename credits columns
credits.columns = ['movie_id', 'title', 'cast', 'crew']
# Merge datasets on title
movies = movies.merge(credits, on='title')
movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [35]:
movies.isnull().sum()

budget                     0
genres                     0
homepage                3096
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
movie_id                   0
cast                       0
crew                       4
dtype: int64

In [36]:
import ast
import pandas as pd


### Extract genres, cast, keywords, director

In [37]:
def extract_names(obj, limit=3):
    if pd.isna(obj):
        return ""
    try:
        if isinstance(obj, str):
            obj = ast.literal_eval(obj)
        return " ".join([i.get('name', '') for i in obj[:limit]])
    except:
        return ""

movies['genres'] = movies['genres'].apply(extract_names)
movies['keywords'] = movies['keywords'].apply(extract_names)
movies['cast'] = movies['cast'].apply(extract_names)
def get_director(obj):
    if pd.isna(obj):
        return ""
    try:
        if isinstance(obj, str):
            obj = ast.literal_eval(obj)
        for item in obj:
            if item.get('job') == 'Director':
                return item.get('name', '')
        return ""
    except:
        return ""

movies['director'] = movies['crew'].apply(get_director)



### Create TAGS column

In [38]:
movies['overview'] = movies['overview'].fillna("")

movies['tags'] = (
    movies['overview'] + " " +
    movies['genres'] + " " +
    movies['keywords'] + " " +
    movies['cast'] + " " +
    movies['director']
)


In [39]:
movies['tags'] 

0       In the 22nd century, a paraplegic Marine is di...
1       Captain Barbossa, long believed to be dead, ha...
2       A cryptic message from Bond’s past sends him o...
3       Following the death of District Attorney Harve...
4       John Carter is a war-weary, former military ca...
                              ...                        
4804    El Mariachi just wants to play his guitar and ...
4805    A newlywed couple's honeymoon is upended by th...
4806    "Signed, Sealed, Delivered" introduces a dedic...
4807    When ambitious New York attorney Sam is sent t...
4808    Ever since the second grade when he first saw ...
Name: tags, Length: 4809, dtype: object

### Text Vectorization + Similarity Matrix

In [40]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(movies['tags']).toarray()

similarity = cosine_similarity(vector)



### Content-Based Recommendation Function


In [41]:
def recommend(movie_title, n=10):
    index = movies[movies['title'] == movie_title].index[0]
    distances = similarity[index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:n+1]

    recommended_movies = []
    for i in movie_list:
        recommended_movies.append(movies.iloc[i[0]].title)

    return recommended_movies



### HYBRID RECOMMENDATION FUNCTION

In [42]:
def hybrid_recommend(movie_title, top_n=10):
    index = movies[movies['title'] == movie_title].index[0]

    # 1. Content-based similarity score
    content_scores = similarity[index]

    # 2. Popularity score (normalize)
    pop = movies['popularity']
    pop_norm = (pop - pop.min()) / (pop.max() - pop.min())

    # 3. Vote average score (normalize)
    vote = movies['vote_average']
    vote_norm = (vote - vote.min()) / (vote.max() - vote.min())

    # Final hybrid score
    final_score = (
        0.6 * content_scores +
        0.25 * vote_norm +
        0.15 * pop_norm
    )

    # Best movie indexes
    top_idx = final_score.argsort()[::-1][1:top_n+1]

    return movies.iloc[top_idx][['title', 'vote_average', 'popularity']]


### Trending Movies Function

In [43]:
def get_trending_movies(n=20):
    return movies.sort_values("popularity", ascending=False).head(n)


### Top Rated Movies Function

In [44]:
def get_top_rated_movies(n=20):
    return movies.sort_values("vote_average", ascending=False).head(n)



### Popular Movies Function

In [45]:
def get_popular_movies(n=20):
    return movies.sort_values("vote_count", ascending=False).head(n)


### Save Similarity for Streamlit

In [49]:
pickle.dump(similarity, open("similarity.pkl", "wb"))
movies.to_pickle("movies_df.pkl")

In [50]:
print(movies[["movie_id", "title"]].head())


  movie_id                                     title
0    19995                                    Avatar
1      285  Pirates of the Caribbean: At World's End
2   206647                                   Spectre
3    49026                     The Dark Knight Rises
4    49529                               John Carter
