## Import Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

## Load Dataset

In [3]:
ratings = pd.read_csv("ratings.csv")
movies = pd.read_csv("movies.csv")

df = pd.merge(ratings, movies, on="movieId")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1,3,4.0,964981247,Grumpier Old Men (1995),Comedy|Romance
2,1,6,4.0,964982224,Heat (1995),Action|Crime|Thriller
3,1,47,5.0,964983815,Seven (a.k.a. Se7en) (1995),Mystery|Thriller
4,1,50,5.0,964982931,"Usual Suspects, The (1995)",Crime|Mystery|Thriller


## User-Movie Matrix (Collaborative Part)

In [4]:
user_movie_matrix = df.pivot_table(
    index='userId',
    columns='title',
    values='rating'
)

user_movie_matrix.fillna(0, inplace=True)

## Item-Based Similarity (Collaborative Filtering)

In [5]:
item_similarity = cosine_similarity(user_movie_matrix.T)

item_similarity_df = pd.DataFrame(
    item_similarity,
    index=user_movie_matrix.columns,
    columns=user_movie_matrix.columns
)

## Content-Based Similarity (Genres)

In [6]:
tfidf = TfidfVectorizer(stop_words='english')

movies['genres'] = movies['genres'].replace("(no genres listed)", "")
tfidf_matrix = tfidf.fit_transform(movies['genres'])

content_similarity = cosine_similarity(tfidf_matrix)

content_similarity_df = pd.DataFrame(
    content_similarity,
    index=movies['title'],
    columns=movies['title']
)

## Hybrid Recommendation Function (REAL WORLD CORE)

In [17]:
def hybrid_recommendation(user_id, movie_name, n=5, alpha=0.5):
    if movie_name not in item_similarity_df.columns:
        return "Movie not found"

    # Collaborative score
    collab_scores = item_similarity_df[movie_name]

    # Content score
    content_scores = content_similarity_df[movie_name]

    # Combine scores (Hybrid)
    hybrid_scores = (alpha * collab_scores) + ((1 - alpha) * content_scores)

    # Remove movies already watched
    watched = user_movie_matrix.loc[user_id]
    watched = watched[watched > 0].index

    recommendations = hybrid_scores.drop(watched).sort_values(ascending=False)
    return recommendations.head(n).index.tolist()

## Test Hybrid Recommendation

In [19]:
hybrid_recommendation(
    user_id=1,
    movie_name="Die Hard (1988)",
    n=5
)

['Die Hard (1988)',
 'Die Hard: With a Vengeance (1995)',
 'Ronin (1998)',
 'Kill Bill: Vol. 1 (2003)',
 'Bourne Supremacy, The (2004)']