In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
ratings = pd.read_csv('movie_ratings.csv')  # Assumed to have columns: user_id, movie_id, rating

# Create a user-item matrix
user_item_matrix = ratings.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)

# Split data into training and test sets
train_data, test_data = train_test_split(ratings, test_size=0.2, random_state=42)
train_matrix = train_data.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)
test_matrix = test_data.pivot(index='user_id', columns='movie_id', values='rating').fillna(0)


In [None]:
## compute similarities

In [None]:
# Compute user-user similarity matrix
user_similarity = cosine_similarity(train_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=train_matrix.index, columns=train_matrix.index)

print(user_similarity_df.head())  # Display a part of the similarity matrix


In [None]:
## predict ratings

In [None]:
# Function to predict ratings
def predict_ratings(user_item_matrix, user_similarity):
    mean_user_rating = user_item_matrix.mean(axis=1)
    ratings_diff = (user_item_matrix.T - mean_user_rating).T
    pred = mean_user_rating[:, np.newaxis] + user_similarity.dot(ratings_diff) / np.array([np.abs(user_similarity).sum(axis=1)]).T
    return pred

# Predict ratings
predictions = predict_ratings(train_matrix.values, user_similarity)

# Convert predictions back to a DataFrame
predictions_df = pd.DataFrame(predictions, index=train_matrix.index, columns=train_matrix.columns)
print(predictions_df.head())


In [None]:
## recommend movie

In [None]:
def recommend_movies(user_id, predictions_df, user_item_matrix, n=5):
    user_ratings = user_item_matrix.loc[user_id, :]
    predicted_ratings = predictions_df.loc[user_id, :]
    
    # Recommend movies that the user hasn't rated yet
    recommended_movies = predicted_ratings[user_ratings == 0].sort_values(ascending=False).head(n)
    return recommended_movies

# Example: Recommend movies for user_id 1
recommendations = recommend_movies(user_id=1, predictions_df=predictions_df, user_item_matrix=user_item_matrix)
print("Recommended Movies:\n", recommendations)


In [None]:
## Content-Based Filtering Approach

In [None]:
# Example movie dataset
movies = pd.read_csv('movies.csv')  # Columns: movie_id, title, genre, director, etc.

# Example user preferences (assumed to be ratings)
user_preferences = {
    'Action': 5,
    'Comedy': 3,
    'Drama': 2
}

# Feature extraction (using genres as an example)
movies['genre_vector'] = movies['genre'].apply(lambda x: [1 if genre in x.split(',') else 0 for genre in user_preferences.keys()])


In [None]:
## calculate similarities