In [1]:
# Importing required libraries for data handling, scaling, similarity, and evaluation
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score


In [2]:
# Loading the anime dataset into a DataFrame
anime_df = pd.read_csv("anime.csv")


In [3]:
# Checking the first few rows to understand the structure
anime_df.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [4]:
# Getting a quick summary of columns, data types, and non-null counts
anime_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [5]:
# Descriptive stats for numerical columns like rating, episodes, members
anime_df.describe()

Unnamed: 0,anime_id,rating,members
count,12294.0,12064.0,12294.0
mean,14058.221653,6.473902,18071.34
std,11455.294701,1.026746,54820.68
min,1.0,1.67,5.0
25%,3484.25,5.88,225.0
50%,10260.5,6.57,1550.0
75%,24794.5,7.18,9437.0
max,34527.0,10.0,1013917.0


In [6]:
# Dropping rows where genre or rating is missing — these are key features for similarity
anime_df.dropna(subset=['genre', 'rating'], inplace=True)

# Filling missing 'type' values with 'Unknown' since it's not used in similarity
anime_df['type'].fillna('Unknown', inplace=True)

# Converting 'episodes' column to numeric — replacing non-numeric entries with 0
anime_df['episodes'] = pd.to_numeric(anime_df['episodes'], errors='coerce').fillna(0).astype(int)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  anime_df['type'].fillna('Unknown', inplace=True)


In [7]:
anime_df

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,1,4.15,211
12290,5543,Under World,Hentai,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Hentai,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,1,4.98,175


In [8]:
# making a copy so we don't mess up original data
anime_encoded = anime_df.copy()

# filling empty genre cells with blank string
anime_encoded['genre'] = anime_encoded['genre'].fillna('')

# making sure every row becomes a list, even if broken
genre_split_list = []
for i in range(len(anime_encoded)):
    genre_str = anime_encoded.iloc[i]['genre']
    try:
        genre_split_list.append(genre_str.split(', '))
    except:
        genre_split_list.append([])  # if error, just give empty list

# adding this list to new column
anime_encoded['genre_split'] = genre_split_list

# collecting all unique genres from the dataset
unique_genres = []
for genre_list in genre_split_list:
    for genre in genre_list:
        if genre not in unique_genres:
            unique_genres.append(genre)

# creating new columns for each genre and marking 1 if present
for genre in unique_genres:
    anime_encoded[genre] = 0
    for i in range(len(anime_encoded)):
        genre_cell = anime_encoded.iloc[i]['genre_split']
        if isinstance(genre_cell, list) and genre in genre_cell:
            anime_encoded.at[i, genre] = 1

In [9]:
anime_encoded

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_split,Drama,Romance,...,Shounen Ai,Game,Dementia,Harem,Cars,Kids,Shoujo Ai,Hentai,Yaoi,Yuri
0,32281.0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1.0,9.37,200630.0,"[Drama, Romance, School, Supernatural]",1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,5114.0,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64.0,9.26,793665.0,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,28977.0,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.25,114262.0,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,9253.0,Steins;Gate,"Sci-Fi, Thriller",TV,24.0,9.17,673572.0,"[Sci-Fi, Thriller]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,9969.0,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51.0,9.16,151266.0,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11107,,,,,,,,,,,...,,,,,,,,1.0,0,0
11108,,,,,,,,,,,...,,,,,,,,1.0,0,0
11109,,,,,,,,,,,...,,,,,,,,1.0,0,0
11111,,,,,,,,,,,...,,,,,,,,1.0,0,0


In [10]:
# saving the final encoded dataframe to a CSV file
anime_encoded.to_csv('anime_encoded.csv', index=False)


In [11]:
# removing rows where name or genre is missing
anime_encoded = anime_encoded.dropna(subset=['name', 'genre'])

In [12]:
# Creating a scaler object
scaler = MinMaxScaler()

# Selecting the numeric columns to scale
numeric_cols = ['rating', 'members', 'episodes']

# Applying the scaler and storing the result in a new DataFrame
scaled_values = scaler.fit_transform(anime_encoded[numeric_cols])

# Converting scaled values back to DataFrame with same column names
scaled_df = pd.DataFrame(scaled_values, columns=numeric_cols)

# Adding these scaled columns back to anime_encoded
anime_encoded[numeric_cols] = scaled_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  anime_encoded[numeric_cols] = scaled_df


In [13]:
anime_encoded.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_split,Drama,Romance,...,Shounen Ai,Game,Dementia,Harem,Cars,Kids,Shoujo Ai,Hentai,Yaoi,Yuri
0,32281.0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,0.00055,0.92437,0.197867,"[Drama, Romance, School, Supernatural]",1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,5114.0,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,0.035204,0.911164,0.782769,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,28977.0,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,0.028053,0.909964,0.112683,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,9253.0,Steins;Gate,"Sci-Fi, Thriller",TV,0.013201,0.90036,0.664323,"[Sci-Fi, Thriller]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,9969.0,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,0.028053,0.89916,0.14918,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [14]:
# Creating the feature matrix for similarity calculation.
# This includes all genre columns (binary encoded) and scaled numeric features (rating, members, episodes).
# The resulting matrix will be used to compute cosine similarity between anime titles.
feature_cols = unique_genres + ['rating', 'members', 'episodes']
feature_matrix = anime_encoded[feature_cols]

In [15]:
feature_matrix

Unnamed: 0,Drama,Romance,School,Supernatural,Action,Adventure,Fantasy,Magic,Military,Shounen,...,Harem,Cars,Kids,Shoujo Ai,Hentai,Yaoi,Yuri,rating,members,episodes
0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,0.924370,0.197867,0.000550
1,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0,0,0.911164,0.782769,0.035204
2,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0,0,0.909964,0.112683,0.028053
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,0.900360,0.664323,0.013201
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0,0,0.899160,0.149180,0.028053
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12289,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,,,
12290,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,,,
12291,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,,,
12292,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0,0,,,


In [16]:
feature_matrix.isnull().sum()

Unnamed: 0,0
Drama,0
Romance,0
School,0
Supernatural,0
Action,0
Adventure,0
Fantasy,0
Magic,0
Military,0
Shounen,0


In [17]:
feature_matrix = feature_matrix.fillna(0)


In [18]:
# Compute similarity matrix using the feature matrix
similarity_matrix = cosine_similarity(feature_matrix)

# Function to recommend anime based on similarity scores
def recommend_anime(title, top_n=5, threshold=None):
    # Get the index of the anime with the given title
    try:
        index = anime_encoded[anime_encoded['name'] == title].index[0]
    except IndexError:
        return f"Anime titled '{title}' not found."

    # Get similarity scores for the selected anime
    sim_scores = list(enumerate(similarity_matrix[index]))

    # Sort scores in descending order and skip the anime itself
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:]

    # Apply threshold if specified
    if threshold is not None:
        sim_scores = [x for x in sim_scores if x[1] >= threshold]

    # Get top N similar anime
    top_indices = [i[0] for i in sim_scores[:top_n]]

    # Return selected anime details
    return anime_encoded.iloc[top_indices][['name', 'genre', 'rating']]


In [19]:
recommend_anime("Naruto", top_n=5)

Unnamed: 0,name,genre,rating
615,Naruto: Shippuuden,"Action, Comedy, Martial Arts, Shounen, Super P...",0.752701
1472,Naruto: Shippuuden Movie 4 - The Lost Tower,"Action, Comedy, Martial Arts, Shounen, Super P...",0.703481
1573,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"Action, Comedy, Martial Arts, Shounen, Super P...",0.69988
486,Boruto: Naruto the Movie,"Action, Comedy, Martial Arts, Shounen, Super P...",0.763505
1343,Naruto x UT,"Action, Comedy, Martial Arts, Shounen, Super P...",0.709484


In [20]:
recommend_anime("One Piece",top_n=5)

Unnamed: 0,name,genre,rating
241,One Piece: Episode of Nami - Koukaishi no Nami...,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",0.792317
231,One Piece: Episode of Merry - Mou Hitori no Na...,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",0.794718
896,One Piece: Episode of Sabo - 3 Kyoudai no Kizu...,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",0.733493
143,One Piece Film: Strong World,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",0.810324
163,One Piece Film: Z,"Action, Adventure, Comedy, Drama, Fantasy, Sho...",0.806723


In [21]:
anime_names = anime_encoded['name'].unique().tolist()


In [22]:
for name in anime_names:
    print(name)

Kimi no Na wa.
Fullmetal Alchemist: Brotherhood
Gintama°
Steins;Gate
Gintama&#039;
Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou
Hunter x Hunter (2011)
Ginga Eiyuu Densetsu
Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare
Gintama&#039;: Enchousen
Clannad: After Story
Koe no Katachi
Gintama
Code Geass: Hangyaku no Lelouch R2
Haikyuu!! Second Season
Sen to Chihiro no Kamikakushi
Shigatsu wa Kimi no Uso
Mushishi Zoku Shou 2nd Season
Ookami Kodomo no Ame to Yuki
Code Geass: Hangyaku no Lelouch
Hajime no Ippo
Rurouni Kenshin: Meiji Kenkaku Romantan - Tsuioku-hen
Cowboy Bebop
One Punch Man
Mononoke Hime
Suzumiya Haruhi no Shoushitsu
Monogatari Series: Second Season
Mushishi Zoku Shou
Mushishi
Tengen Toppa Gurren Lagann
Great Teacher Onizuka
Natsume Yuujinchou Go
Hajime no Ippo: New Challenger
Mushishi Zoku Shou: Suzu no Shizuku
Natsume Yuujinchou Shi
Howl no Ugoku Shiro
Fate/Zero 2nd Season
Kizumonogatari II: Nekketsu-hen
Monster
Bakuman. 3rd Season
Death Note
Gintama°: Aizome

In [23]:
import random

# Step 1: Create a list of all anime titles
all_titles = anime_encoded['name'].tolist()

# Step 2: Randomly select 100 titles for testing
test_titles = random.sample(all_titles, 100)

# Step 3: Create training set by excluding test titles
train_data = anime_encoded[~anime_encoded['name'].isin(test_titles)].reset_index(drop=True)
test_data = anime_encoded[anime_encoded['name'].isin(test_titles)].reset_index(drop=True)

In [24]:
train_data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_split,Drama,Romance,...,Shounen Ai,Game,Dementia,Harem,Cars,Kids,Shoujo Ai,Hentai,Yaoi,Yuri
0,32281.0,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,0.000550,0.924370,0.197867,"[Drama, Romance, School, Supernatural]",1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,5114.0,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,0.035204,0.911164,0.782769,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,28977.0,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,0.028053,0.909964,0.112683,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,9253.0,Steins;Gate,"Sci-Fi, Thriller",TV,0.013201,0.900360,0.664323,"[Sci-Fi, Thriller]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,9969.0,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,0.028053,0.899160,0.149180,"[Action, Comedy, Historical, Parody, Samurai, ...",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11912,9316.0,Toushindai My Lover: Minami tai Mecha-Minami,Hentai,OVA,,,,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
11913,5543.0,Under World,Hentai,OVA,,,,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
11914,5621.0,Violence Gekiga David no Hoshi,Hentai,OVA,,,,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
11915,6133.0,Violence Gekiga Shin David no Hoshi: Inma Dens...,Hentai,OVA,,,,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [25]:
test_data

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_split,Drama,Romance,...,Shounen Ai,Game,Dementia,Harem,Cars,Kids,Shoujo Ai,Hentai,Yaoi,Yuri
0,31043.0,Boku dake ga Inai Machi,"Mystery, Psychological, Seinen, Supernatural",TV,0.006601,0.837935,0.396851,"[Mystery, Psychological, Seinen, Supernatural]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,16664.0,Kaguya-hime no Monogatari,"Fantasy, Historical",Movie,0.000550,0.804322,0.044911,"[Fantasy, Historical]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,16049.0,Toaru Kagaku no Railgun S,"Action, Sci-Fi, Super Power",TV,0.013201,0.779112,0.135213,"[Action, Sci-Fi, Super Power]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,416.0,Kurenai no Buta,"Action, Adventure, Comedy, Drama, Historical, ...",Movie,0.000550,0.763505,0.073001,"[Action, Adventure, Comedy, Drama, Historical,...",1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,22265.0,Free!: Eternal Summer,"Comedy, School, Slice of Life, Sports",TV,0.007151,0.752701,0.148519,"[Comedy, School, Slice of Life, Sports]",0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,33153.0,Houkago Initiation,Hentai,OVA,0.000550,0.535414,0.001712,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
96,7262.0,Kagirohi: Shaku Kei,Hentai,OVA,0.000550,0.482593,0.000444,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
97,3587.0,Isaku: Respect,Hentai,OVA,0.000550,0.319328,0.000186,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0
98,4818.0,Houkago Renai Club: Koi no Étude,Hentai,OVA,,,,[Hentai],0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [26]:
# get all genre columns (from Drama to Yuri)
genre_columns = train_data.columns[train_data.columns.get_loc('Drama'):train_data.columns.get_loc('Yuri') + 1]

# add numeric features
numeric_columns = ['rating', 'members', 'episodes']

# combine them
feature_columns = list(genre_columns) + numeric_columns


In [27]:
print(train_data[feature_columns].isnull().sum())
print(test_data[feature_columns].isnull().sum())


Drama              0
Romance            0
School             0
Supernatural       0
Action             0
Adventure          0
Fantasy            0
Magic              0
Military           0
Shounen            0
Comedy             0
Historical         0
Parody             0
Samurai            0
Sci-Fi             0
Thriller           0
Sports             0
Super Power        0
Space              0
Slice of Life      0
Mecha              0
Music              0
Mystery            0
Seinen             0
Martial Arts       0
Vampire            0
Shoujo             0
Horror             0
Police             0
Psychological      0
Demons             0
Ecchi              0
Josei              0
Shounen Ai         0
Game               0
Dementia           0
Harem              0
Cars               0
Kids               0
Shoujo Ai          0
Hentai             0
Yaoi               0
Yuri               0
rating           265
members          265
episodes         265
dtype: int64
Drama            0
Ro

In [28]:
# Fill NaNs in source data before feature extraction
train_data[['rating', 'members', 'episodes']] = train_data[['rating', 'members', 'episodes']].fillna(0)
test_data[['rating', 'members', 'episodes']] = test_data[['rating', 'members', 'episodes']].fillna(0)


In [29]:
train_features = train_data[feature_columns]
test_features = test_data[feature_columns]
train_similarity = cosine_similarity(test_features, train_features)


In [30]:
from sklearn.metrics.pairwise import cosine_similarity

# build feature matrix from training data
train_features = train_data[feature_columns]
test_features = test_data[feature_columns]
train_similarity = cosine_similarity(test_features, train_features)

# get anime titles and genres
train_titles = train_data['name'].tolist()
train_genres = train_data.set_index('name')['genre_split'].to_dict()

# set top-K value
K = 5
total = 0
hits = 0

# loop through each test anime
for i, row in test_data.iterrows():
    test_title = row['name']
    test_genres = set(row['genre_split'])

    # get similarity scores for this test anime
    scores = list(enumerate(train_similarity[i]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)

    # get top-K recommended titles
    top_indices = [i[0] for i in scores[:K]]
    top_titles = [train_titles[i] for i in top_indices]

    # check for genre overlap
    match_count = 0
    for title in top_titles:
        recommended_genres = set(train_genres.get(title, []))
        if test_genres & recommended_genres:
            match_count += 1

    hits += match_count
    total += K

# calculate metrics
precision = hits / total if total > 0 else 0
recall = hits / len(test_data) if len(test_data) > 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# print results
print(f"Precision@{K}: {precision:.4f}")
print(f"Recall@{K}: {recall:.4f}")
print(f"F1-Score: {f1_score:.4f}")


Precision@5: 0.5020
Recall@5: 2.5100
F1-Score: 0.8367


**Performance Analysis**
The recommendation system gave decent results. Precision@5 was 0.5020, which means about half of the recommended anime had similar genres. Recall@5 was 2.5100, showing that each test anime matched with around 2.5 relevant titles. F1-score was 0.8367, which means the system is balanced and not leaning too much towards either precision or recall.


**Areas of Improvement**


To improve the system, we can add more features like studio, release year, or popularity. We can also give more weight to important features like genre or rating. If user data is available, we can combine this with collaborative filtering to make better recommendations.