# Import Required Libraries
Import necessary libraries such as pandas, numpy, matplotlib, and sklearn.

In [None]:
# Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# Load Dataset
Load the ratings and movies datasets using pandas.

In [None]:
# Load Dataset
ratings = pd.read_csv(r"C:\Users\yagiz\Downloads\ml-latest\ml-latest\ratings.csv")
movies = pd.read_csv(r"C:\Users\yagiz\Downloads\ml-latest\ml-latest\movies.csv")

# Display the first few rows of the datasets
ratings.head(), movies.head()

# Create User-Movie Matrix
Create a sparse user-movie matrix using the ratings data.

In [None]:
# Create a sparse user-movie matrix
user_movie_matrix_sparse = csr_matrix((ratings['rating'], (ratings['userId'], ratings['movieId'])))

# Display the shape of the user-movie matrix
user_movie_matrix_sparse.shape

# Build Recommendation Model
Build a recommendation model using NearestNeighbors from sklearn.

In [None]:
# Build Recommendation Model

# Calculate cosine similarity between users using NearestNeighbors for efficiency
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(user_movie_matrix_sparse)

# Function to recommend movies
def recommend_movies(user_id, model_knn, user_movie_matrix_sparse, movies, top_n):
    print(f"Generating recommendations for user {user_id}...")
    # Get the ratings of the target user
    user_ratings = user_movie_matrix_sparse[user_id].toarray().flatten()

    # Find movies that the target user has not seen (rating is 0)
    unseen_movies = (user_ratings == 0).nonzero()[0]

    # Get similar users and their similarity scores
    distances, indices = model_knn.kneighbors(user_movie_matrix_sparse[user_id], n_neighbors=top_n)
    similar_users = pd.Series(distances.flatten(), index=indices.flatten())
    similar_users = similar_users[similar_users > 0]

    # Calculate the weighted sum of ratings from similar users
    weighted_sum = user_movie_matrix_sparse[similar_users.index].T.dot(1 - similar_users.values)

    # Calculate the sum of similarity scores
    similarity_sum = (1 - similar_users.values).sum()

    # Calculate recommendations by dividing weighted sum by similarity sum
    with np.errstate(divide='ignore', invalid='ignore'):
        recommendations = np.true_divide(weighted_sum, similarity_sum)
        recommendations[~np.isfinite(recommendations)] = 0  # set infinities and NaNs to 0

    # Filter recommendations to only include unseen movies and sort them
    recommendations = pd.Series(recommendations[unseen_movies], index=unseen_movies).sort_values(ascending=False)

    # Merge recommendations with movie data to get the release year
    recommendations = recommendations.reset_index().rename(columns={'index': 'movieId'}).merge(movies[['movieId', 'title']], on='movieId')

    return recommendations.head(top_n)

# Extract year from the title and handle NaN values
movies['year'] = movies['title'].str.extract(r'\((\d{4})\)')
movies['year'] = movies['year'].fillna(0).astype(int)

# Example usage
target_user = 1  # Example user ID
top_n = 10  # Number of recommendations
recommended_movies = recommend_movies(target_user, model_knn, user_movie_matrix_sparse, movies, top_n)
recommended_movies[['movieId', 'title', 'year']]

# Generate Movie Recommendations
Define a function to generate movie recommendations for a given user.

In [None]:
# Generate Movie Recommendations

# Function to recommend movies
def recommend_movies(user_id, model_knn, user_movie_matrix_sparse, movies, top_n):
    print(f"Generating recommendations for user {user_id}...")
    # Get the ratings of the target user
    user_ratings = user_movie_matrix_sparse[user_id].toarray().flatten()

    # Find movies that the target user has not seen (rating is 0)
    unseen_movies = (user_ratings == 0).nonzero()[0]

    # Get similar users and their similarity scores
    distances, indices = model_knn.kneighbors(user_movie_matrix_sparse[user_id], n_neighbors=top_n)
    similar_users = pd.Series(distances.flatten(), index=indices.flatten())
    similar_users = similar_users[similar_users > 0]

    # Calculate the weighted sum of ratings from similar users
    weighted_sum = user_movie_matrix_sparse[similar_users.index].T.dot(1 - similar_users.values)

    # Calculate the sum of similarity scores
    similarity_sum = (1 - similar_users.values).sum()

    # Calculate recommendations by dividing weighted sum by similarity sum
    with np.errstate(divide='ignore', invalid='ignore'):
        recommendations = np.true_divide(weighted_sum, similarity_sum)
        recommendations[~np.isfinite(recommendations)] = 0  # set infinities and NaNs to 0

    # Filter recommendations to only include unseen movies and sort them
    recommendations = pd.Series(recommendations[unseen_movies], index=unseen_movies).sort_values(ascending=False)

    # Merge recommendations with movie data to get the release year
    recommendations = recommendations.reset_index().rename(columns={'index': 'movieId'}).merge(movies[['movieId', 'title']], on='movieId')

    return recommendations.head(top_n)

# Example usage
target_user = 1  # Example user ID
top_n = 10  # Number of recommendations
recommended_movies = recommend_movies(target_user, model_knn, user_movie_matrix_sparse, movies, top_n)
recommended_movies[['movieId', 'title', 'year']]

# Define Evaluation Metrics
Define functions to calculate recall@k and precision@k.

In [None]:
# Define Evaluation Metrics

def recall_at_k(recommended_movies, relevant_movies, k):
    recommended_top_k = recommended_movies[:k]
    relevant_and_recommended = set(recommended_top_k) & set(relevant_movies)
    return len(relevant_and_recommended) / len(relevant_movies) if len(relevant_movies) > 0 else 0

def precision_at_k(recommended_movies, relevant_movies, k):
    recommended_top_k = recommended_movies[:k]
    relevant_and_recommended = set(recommended_top_k) & set(relevant_movies)
    return len(relevant_and_recommended) / k

def calculate_metrics_at_k_100_users(user_ids, user_movie_matrix_sparse, recommendations, k_values):
    metrics_scores = {k: {"recall": [], "precision": []} for k in k_values}

    for user_id in user_ids:
        # Get the movies watched by the user
        user_ratings = user_movie_matrix_sparse[user_id].toarray().flatten()
        relevant_movies = np.where(user_ratings > 0)[0].tolist()
        
        # Get the movies recommended to the user
        recommended_movies = (
            [recommendations[user_id]]
            if isinstance(recommendations[user_id], int)
            else [movie[0] for movie in recommendations[user_id]]
        )
        
        # Calculate Recall@K and Precision@K
        for k in k_values:
            recall = recall_at_k(recommended_movies, relevant_movies, k)
            precision = precision_at_k(recommended_movies, relevant_movies, k)
            metrics_scores[k]["recall"].append(recall)
            metrics_scores[k]["precision"].append(precision)
    
    # Calculate the average Recall and Precision for each K
    mean_metrics_scores = {
        k: {
            "recall": np.mean(metrics_scores[k]["recall"]),
            "precision": np.mean(metrics_scores[k]["precision"])
        }
        for k in k_values
    }
    return mean_metrics_scores

# Calculate Metrics for Recommendations
Calculate recall and precision for the recommendations generated for a sample of users.

In [None]:
# Calculate Metrics for Recommendations

# Define user IDs and K values for evaluation
user_ids = ratings['userId'].unique()[:100]  # Sample of 100 users
k_values = [1, 5, 10, 20]

# Generate recommendations for the sample users
recommendations = {user_id: recommend_movies(user_id, model_knn, user_movie_matrix_sparse, movies, max(k_values)) for user_id in user_ids}

# Calculate metrics
metrics_scores = calculate_metrics_at_k_100_users(user_ids, user_movie_matrix_sparse, recommendations, k_values)

# Display metrics
metrics_scores

# Plot Metrics
Plot the recall@k and precision@k metrics using matplotlib.

In [None]:
# Plot Metrics

def plot_metrics_at_k(metrics_scores):
    k_values = list(metrics_scores.keys())
    recall_scores = [metrics_scores[k]["recall"] for k in k_values]
    precision_scores = [metrics_scores[k]["precision"] for k in k_values]

    plt.figure(figsize=(10, 6))

    # Recall@K plot
    plt.plot(k_values, recall_scores, marker='o', linestyle='-', color='g', label='Recall@K')

    # Precision@K plot
    plt.plot(k_values, precision_scores, marker='o', linestyle='--', color='b', label='Precision@K')

    plt.title('Recall@K and Precision@K Plot')
    plt.xlabel('K Values')
    plt.ylabel('Score')
    plt.grid(alpha=0.5)
    plt.legend()
    plt.xticks(k_values)
    plt.show()

# Plot the metrics
plot_metrics_at_k(metrics_scores)