In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
# Load the dataset
anime_data = pd.read_csv("https://raw.githubusercontent.com/ygtech27/Datasets/refs/heads/main/anime.csv")
anime_data.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
anime_data = anime_data.rename(columns={'genre':'genres','name':'title'})
anime_data

Unnamed: 0,anime_id,title,genres,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [5]:
print (anime_data['genres'].info())

<class 'pandas.core.series.Series'>
RangeIndex: 12294 entries, 0 to 12293
Series name: genres
Non-Null Count  Dtype 
--------------  ----- 
12232 non-null  object
dtypes: object(1)
memory usage: 96.2+ KB
None


In [6]:
# Data preprocessing
# Handle missing values (if any)
anime_data['genres'].fillna('', inplace=True)

# Feature extraction
# Combine genres and title for a more comprehensive similarity measure
anime_data['combined_features'] = anime_data['genres'] + ' ' + anime_data['title']

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime_data['genres'].fillna('', inplace=True)


In [7]:
# Create TF-IDF vectors
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(anime_data['combined_features'])

# Calculate cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [8]:
# Recommendation function
def recommend_anime(anime_title):
    index = anime_data[anime_data['title'] == anime_title].index[0]
    similarity_scores = list(enumerate(cosine_sim[index]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_recommendations = similarity_scores[1:6]  # Recommend top 5
    recommended_anime_indices = [i[0] for i in top_recommendations]
    return anime_data['title'].iloc[recommended_anime_indices]

# Example usage
anime_to_recommend = "Death Note"
recommended_anime = recommend_anime(anime_to_recommend)
print(recommended_anime)

778     Death Note Rewrite
199           Death Parade
477        Death Billiards
38                 Monster
5382             AD Police
Name: title, dtype: object


In [9]:
import numpy as np

# Split the dataset into training and testing sets
np.random.seed(42)  # For reproducibility
shuffle_index = np.random.permutation(len(anime_data))
train_size = int(0.8 * len(anime_data))

train_index = shuffle_index[:train_size]
test_index = shuffle_index[train_size:]

train_data = anime_data.iloc[train_index]
test_data = anime_data.iloc[test_index]

In [10]:
print(train_data, test_data)

       anime_id                                             title  \
6329      17209  Suzy&#039;s Zoo: Daisuki! Witzy - Happy Birthday   
2167        173                                           Tactics   
2882       3616                                 Kamen no Maid Guy   
4700      18799                                     Take Your Way   
7258      18831                                           Rinkaku   
...         ...                                               ...   
5510       4563                        Bakkyuu HIT! Crash Bedaman   
5330       2046         Mahou no Tenshi Creamy Mami: Long Goodbye   
4531      13799             Pokemon: Meloetta no Kirakira Recital   
365        1943                                           Paprika   
12271      5559               Nijuusou: Wana ni Ochita Onna-tachi   

                                                  genres     type episodes  \
6329                                                Kids  Special        1   
2167   Comedy, 

In [12]:
from sklearn.metrics import precision_score, recall_score, f1_score
import numpy as np

# Split the dataset into training and testing sets
np.random.seed(42)  # For reproducibility
shuffle_index = np.random.permutation(len(anime_data))
train_size = int(0.8 * len(anime_data))

train_index = shuffle_index[:train_size]
test_index = shuffle_index[train_size:]

train_data = anime_data.iloc[train_index]
test_data = anime_data.iloc[test_index]

liked_threshold = 7.5
anime_data['liked'] = anime_data['rating'] >= liked_threshold

def evaluate_recommendations(target_anime, cosine_sim, anime_data):
    # Get recommendations
    # Call the recommend_anime function directly
    recommended_anime_titles = recommend_anime(target_anime)

    # Get the indices of the recommended anime
    recommended_anime_indices = anime_data[anime_data['title'].isin(recommended_anime_titles)].index

    # True likes based on the "liked" threshold
    true_likes = anime_data['liked'].values

    # Predicted likes (1 if in recommended anime indices, else 0)
    predicted_likes = [1 if i in recommended_anime_indices else 0 for i in range(len(anime_data))]

    # Calculate precision, recall, and F1-score
    precision = precision_score(true_likes, predicted_likes)
    recall = recall_score(true_likes, predicted_likes)
    f1 = f1_score(true_likes, predicted_likes)

    return precision, recall, f1

# Example: Evaluate recommendation for a popular anime
precision, recall, f1 = evaluate_recommendations("Death Note", cosine_sim, anime_data)

# Display precision, recall, and F1-score
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

Precision: 0.8
Recall: 0.0022818026240730175
F1-Score: 0.004550625711035267


In [13]:
print(anime_data)

       anime_id                                              title  \
0         32281                                     Kimi no Na wa.   
1          5114                   Fullmetal Alchemist: Brotherhood   
2         28977                                           Gintama°   
3          9253                                        Steins;Gate   
4          9969                                      Gintama&#039;   
...         ...                                                ...   
12289      9316       Toushindai My Lover: Minami tai Mecha-Minami   
12290      5543                                        Under World   
12291      5621                     Violence Gekiga David no Hoshi   
12292      6133  Violence Gekiga Shin David no Hoshi: Inma Dens...   
12293     26081                   Yasuji no Pornorama: Yacchimae!!   

                                                  genres   type episodes  \
0                   Drama, Romance, School, Supernatural  Movie        1   
1      

**1.Can you explain the difference between user-based and item-based collaborative filtering?**

 ans = User-based filtering: This method finds users similar to the target user based on their ratings or preferences. Recommendations are made by selecting items liked by similar users.

Item-based filtering: Instead of comparing users, item-based filtering compares items (in this case, anime). It recommends items that are similar to those the user has already liked.

**2.What is collaborative filtering, and how does it work**?

ans = Collaborative filtering is a method used by recommendation systems to make predictions based on user interactions (e.g., ratings or clicks). It works by either finding similarities between users (user-based) or items (item-based) to recommend items that similar users liked or similar items that a user has liked. This approach is data-driven and leverages large datasets of user behavior.