In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Load datasets
movies = pd.read_csv(r"C:\Users\udayv\OneDrive\Desktop\edurekha project\castrophone project\Project Dataset\movies.csv")
ratings = pd.read_csv(r"C:\Users\udayv\OneDrive\Desktop\edurekha project\castrophone project\Project Dataset\ratings.csv")

# Merge movies and ratings on movieId
movie_ratings = pd.merge(ratings, movies, on='movieId')

# Calculate average rating per movie and count of reviews per movie
movie_stats = movie_ratings.groupby('movieId').agg({
    'rating': ['mean', 'count']
}).reset_index()
movie_stats.columns = ['movieId', 'avg_rating', 'num_reviews']

# Define Popularity-Based Recommender
def popularity_based_recommendation(genre, min_reviews, num_recommendations):
    genre_movies = movies[movies['genres'].str.contains(genre, case=False, na=False)]
    genre_movie_stats = pd.merge(genre_movies, movie_stats, on='movieId')
    popular_movies = genre_movie_stats[genre_movie_stats['num_reviews'] >= min_reviews]
    popular_movies = popular_movies.sort_values(by='avg_rating', ascending=False)
    return popular_movies[['title', 'avg_rating', 'num_reviews']].head(num_recommendations)

# Define Content-Based Recommender with updated error handling
def content_based_recommendation(movie_title, num_recommendations):
    # Check if the movie title exists in the dataset
    if movie_title not in movies['title'].values:
        print(f"Movie '{movie_title}' not found in the dataset.")
        return pd.DataFrame(columns=['title'])  # Return an empty DataFrame for consistency

    count_vectorizer = CountVectorizer(tokenizer=lambda x: x.split('|'))
    genre_matrix = count_vectorizer.fit_transform(movies['genres'])
    
    # Get index of the movie title
    movie_idx = movies[movies['title'] == movie_title].index[0]
    
    # Calculate cosine similarity
    cosine_sim = cosine_similarity(genre_matrix)
    sim_scores = list(enumerate(cosine_sim[movie_idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get indices of similar movies, excluding the queried movie itself
    movie_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]
    return movies.iloc[movie_indices][['title']]

# Define Collaborative-Based Recommender
def collaborative_based_recommendation(user_id, num_recommendations, k):
    user_movie_matrix = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)
    knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=k, n_jobs=-1)
    knn.fit(user_movie_matrix)
    distances, indices = knn.kneighbors([user_movie_matrix.loc[user_id]])
    similar_users = indices.flatten()
    similar_ratings = user_movie_matrix.loc[similar_users].mean(axis=0)
    user_ratings = user_movie_matrix.loc[user_id]
    unrated_movies = user_ratings[user_ratings == 0]
    recommendations = similar_ratings[unrated_movies.index].sort_values(ascending=False).head(num_recommendations)
    recommended_movie_ids = recommendations.index
    return movies[movies['movieId'].isin(recommended_movie_ids)][['title']]

# User Interface for Testing
def main():
    while True:
        print("\nChoose Recommendation System:")
        print("1: Popularity-Based\n2: Content-Based\n3: Collaborative-Based\n4: Exit")
        
        try:
            choice = int(input("Enter choice (1, 2, 3, or 4 to exit): "))
            
            if choice == 1:
                genre = input("Enter Genre (e.g., Comedy): ")
                min_reviews = int(input("Enter Minimum Reviews Threshold: "))
                num_recommendations = int(input("Enter Number of Recommendations: "))
                result = popularity_based_recommendation(genre, min_reviews, num_recommendations)
                print(result)
                
            elif choice == 2:
                movie_title = input("Enter Movie Title (e.g., Toy Story): ")
                num_recommendations = int(input("Enter Number of Recommendations: "))
                result = content_based_recommendation(movie_title, num_recommendations)
                print(result)
                
            elif choice == 3:
                user_id = int(input("Enter User ID: "))
                num_recommendations = int(input("Enter Number of Recommendations: "))
                k = int(input("Enter Threshold for Similar Users: "))
                result = collaborative_based_recommendation(user_id, num_recommendations, k)
                print(result)
                
            elif choice == 4:
                print("Exiting the recommendation system.")
                break
                
            else:
                print("Invalid choice, please enter a number between 1 and 4.")
        
        except ValueError:
            print("Invalid input. Please enter a valid number.")

if __name__ == "__main__":
    main()



Choose Recommendation System:
1: Popularity-Based
2: Content-Based
3: Collaborative-Based
4: Exit


Enter choice (1, 2, 3, or 4 to exit):  2
Enter Movie Title (e.g., Toy Story):  Ghost
Enter Number of Recommendations:  3


Movie 'Ghost' not found in the dataset.
Empty DataFrame
Columns: [title]
Index: []

Choose Recommendation System:
1: Popularity-Based
2: Content-Based
3: Collaborative-Based
4: Exit


Enter choice (1, 2, 3, or 4 to exit):  2
Enter Movie Title (e.g., Toy Story):  Good Will Hunting
Enter Number of Recommendations:  3


Movie 'Good Will Hunting' not found in the dataset.
Empty DataFrame
Columns: [title]
Index: []

Choose Recommendation System:
1: Popularity-Based
2: Content-Based
3: Collaborative-Based
4: Exit


Enter choice (1, 2, 3, or 4 to exit):  1
Enter Genre (e.g., Comedy):  comedy
Enter Minimum Reviews Threshold:  4
Enter Number of Recommendations:  6


                                            title  avg_rating  num_reviews
3138                 Louis C.K.: Shameless (2007)    4.611111            9
3137                 Louis C.K.: Chewed Up (2008)    4.555556            9
2825          Blackadder's Christmas Carol (1988)    4.500000            5
3011           Ricky Gervais Live: Animals (2003)    4.500000            5
3465  Louis C.K.: Live at The Comedy Store (2015)    4.500000            6
3441                            Wild Tales (2014)    4.500000            4

Choose Recommendation System:
1: Popularity-Based
2: Content-Based
3: Collaborative-Based
4: Exit
