In [1]:
!pip install scikit-learn

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from google.colab import files

uploaded = files.upload()

file_path = next(iter(uploaded))
data = pd.read_csv(file_path)

print(data.head())

data.dropna(subset=['name', 'genre', 'type', 'rating', 'members'], inplace=True)

genre_encoded = data['genre'].str.get_dummies(sep=',')

type_encoded = pd.get_dummies(data['type'], prefix='type')

features = pd.concat([genre_encoded, type_encoded, data[['rating', 'members']]], axis=1)

scaler = MinMaxScaler()
features[['rating', 'members']] = scaler.fit_transform(features[['rating', 'members']])

print(features.head())




Saving anime.csv to anime.csv
   anime_id                              name  \
0     32281                    Kimi no Na wa.   
1      5114  Fullmetal Alchemist: Brotherhood   
2     28977                          Gintama°   
3      9253                       Steins;Gate   
4      9969                     Gintama&#039;   

                                               genre   type episodes  rating  \
0               Drama, Romance, School, Supernatural  Movie        1    9.37   
1  Action, Adventure, Drama, Fantasy, Magic, Mili...     TV       64    9.26   
2  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.25   
3                                   Sci-Fi, Thriller     TV       24    9.17   
4  Action, Comedy, Historical, Parody, Samurai, S...     TV       51    9.16   

   members  
0   200630  
1   793665  
2   114262  
3   673572  
4   151266  
    Adventure   Cars   Comedy   Dementia   Demons   Drama   Ecchi   Fantasy  \
0           0      0        0        

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

def recommend_anime(target_anime, data, features, top_n=5):
    if target_anime not in data['name'].values:
        return "Anime not found in the dataset."

    # Get the index of the target anime
    target_index = data[data['name'] == target_anime].index[0]

    # Compute cosine similarity
    similarity_scores = cosine_similarity([features.iloc[target_index]], features)
    similarity_scores = similarity_scores.flatten()

    # Get top N similar anime indices
    similar_indices = similarity_scores.argsort()[-top_n-1:-1][::-1]

    # Get anime names
    similar_anime = data.iloc[similar_indices]['name'].values
    return similar_anime

# Example usage
example_recommendations = recommend_anime("Naruto", data, features)
print("Recommendations for 'Naruto':", example_recommendations)

Recommendations for 'Naruto': ['Naruto: Shippuuden' 'Katekyo Hitman Reborn!' 'Dragon Ball Z' 'Bleach'
 'Dragon Ball Kai']


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Split the dataset
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

def evaluate_recommendations(data, features, train_data, test_data, top_n=5):
    y_true = []
    y_pred = []

    for anime in test_data['name']:
        if anime in train_data['name'].values:
            recommended = recommend_anime(anime, train_data, features, top_n=top_n)
            y_true.append(anime in recommended)
            y_pred.append(True)  # Assume all recommendations are positive

    precision = precision_score(y_true, y_pred, zero_division=1)
    recall = recall_score(y_true, y_pred, zero_division=1)
    f1 = f1_score(y_true, y_pred, zero_division=1)

    return precision, recall, f1

precision, recall, f1 = evaluate_recommendations(data, features, train_data, test_data)
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-Score: {f1:.4f}")

Precision: 1.0000, Recall: 1.0000, F1-Score: 1.0000


In [None]:
#INTERVIEW QUESTIONS
1)difference between user-based and item-based collaborative filtering
#USER-BASED COLLABORATIVE FILTERING

*Recommends items to a user by finding other users with similar preferences and suggesting items those similar users liked.
*Similarity Calculation: Measure similarity between users based on their interaction history (e.g., ratings, purchases, clicks).
Common similarity metrics include cosine similarity, Pearson correlation, or Jaccard index.
*Neighborhood Formation: Identify a group of users who are most similar to the target user.
*Recommendation Generation: Predict the target user’s preference for an item by aggregating the preferences of similar users.
*Focuses on finding people with similar tastes, so recommendations are often very personalized.
*Can adapt to changes in user behavior quickly.
*Performance decreases as the number of users grows.
*New users lack sufficient interaction history to determine similarities.

#ITEM BASED COLLABORATIVE FILTERING
*Recommends items based on the similarity between items, rather than users. It suggests items similar to those the user has interacted with.
*Similarity Calculation: Measure similarity between items based on user interactions (e.g., users who rated item A highly also rated item B highly).
Metrics like cosine similarity or adjusted cosine similarity are commonly used.
*Recommendation Generation: For each item the user has interacted with, find similar items and rank them based on predicted preference.
*Typically more scalable for large user bases because the number of items is often smaller than the number of users.
*Recommendations are less affected by changes in user behavior, as item similarity remains consistent.
*Requires sufficient interaction data for each item.
*Recommendations may be less personalized compared to user-based approaches.

In [None]:
2)What is collaborative filtering, and how does it work

Collaborative filtering is a technique used in recommendation systems to predict a user's preferences by leveraging the preferences of other users.
It operates on the principle that users with similar interests or behaviors in the past are likely to have similar preferences in the future.

#Collaborative filtering can be divided into two main steps:

Identify Similarities:
It identifies patterns of similarity either between users (user-based) or between items (item-based).
This is done by analyzing historical data such as user-item interactions (e.g., ratings, purchases, clicks).

Make Predictions:
Based on these similarities, it predicts how a user will interact with an item they haven't seen or interacted with before.
It does this by aggregating the preferences of similar users or items to generate recommendations.

#Types of Collaborative Filtering
1. User-Based Collaborative Filtering:
Focuses on finding users similar to the target user and recommending items those similar users liked.
For example: "Alice and Bob both like X and Y. Bob also likes Z, so Alice might like Z too."
2. Item-Based Collaborative Filtering:
Focuses on finding items similar to those the target user has interacted with and recommending those items.
For example: "Users who liked X also liked Y. Since Alice liked X, she might like Y too."

#Techniques Used
Similarity Metrics: Collaborative filtering relies on mathematical methods to calculate similarity between users or items. Common metrics include:
Cosine Similarity:  Measures the cosine of the angle between two vectors (e.g., user preference vectors).
Pearson Correlation: Measures linear correlation between two sets of data.
Jaccard Index: Measures the similarity between sets (e.g., the overlap of items rated by two users).

#Neighborhood Methods:
Identifies a group of similar users or items (a "neighborhood") and aggregates their preferences.

#Matrix Factorization (Advanced):
Techniques like Singular Value Decomposition (SVD) or Alternating Least Squares (ALS) decompose the user-item interaction matrix into latent factors, representing users and items in a lower-dimensional space.

#Advantages of Collaborative Filtering
No Domain Knowledge Required:
Unlike content-based systems, it does not need information about the items or users. It solely relies on interaction data.
Dynamic Recommendations:
Automatically adapts as new user interactions occur.
Personalization:
Delivers personalized recommendations by learning from user behavior.

#Challenges of Collaborative Filtering
Cold Start Problem:
Struggles with new users (no data about preferences) or new items (no interaction history).
Data Sparsity:
In large datasets, many users may only interact with a small subset of items, making it hard to compute similarities.
Scalability:
Performance can degrade with very large datasets due to the computational cost of similarity calculations.
Popularity Bias:
Tends to recommend popular items, potentially overshadowing niche or diverse content.

#Applications
E-commerce (e.g., "People who bought this also bought...").
Streaming services (e.g., Netflix, Spotify recommendations).
Social networks (e.g., suggesting friends or groups).
Online learning platforms (e.g., recommending courses).
Collaborative filtering is widely used because it learns directly from user behavior, making recommendations more relevant and engaging.















