**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

**Data Loading**

In [None]:
anime = pd.read_csv("anime.csv")

**EDA**

In [None]:
print("Total # of samples in anime dataframe: ", len(anime.anime_id.unique()))

Total # of samples in anime dataframe:  12294


In [None]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


In [None]:
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [None]:
print(anime.shape)

(12294, 7)


**Data Preparation**

In [None]:
anime['genre'] = anime['genre'].str.split(', ') #spliting column's genre string values into lists of individual genres
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",TV,64,9.26,793665
2,28977,Gintama°,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.25,114262
3,9253,Steins;Gate,"[Sci-Fi, Thriller]",TV,24,9.17,673572
4,9969,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.16,151266


In [None]:
anime.isnull().sum()

anime_id      0
name          0
genre        62
type         25
episodes      0
rating      230
members       0
dtype: int64

In [None]:
anime['genre'] = anime['genre'].fillna('Unknown')
anime['type'] = anime['type'].fillna('Unknown')
anime['rating'] = anime['rating'].fillna(anime['rating'].mean())

In [None]:
anime.isnull().sum()

anime_id    0
name        0
genre       0
type        0
episodes    0
rating      0
members     0
dtype: int64

In [None]:
# Explode the 'genre' column, which contains lists of genres, into separate rows for each genre.
# converts a list in a single row into multiple rows, each containing one genre to get unique genre.
exploded_genres = anime.explode('genre')
unique_genres = exploded_genres['genre'].unique()
print("Total # of genres: ", len(unique_genres))
print("List of all unique genres: ", unique_genres)

Total # of genres:  44
List of all unique genres:  ['Drama' 'Romance' 'School' 'Supernatural' 'Action' 'Adventure' 'Fantasy'
 'Magic' 'Military' 'Shounen' 'Comedy' 'Historical' 'Parody' 'Samurai'
 'Sci-Fi' 'Thriller' 'Sports' 'Super Power' 'Space' 'Slice of Life'
 'Mecha' 'Music' 'Mystery' 'Seinen' 'Martial Arts' 'Vampire' 'Shoujo'
 'Horror' 'Police' 'Psychological' 'Demons' 'Ecchi' 'Josei' 'Shounen Ai'
 'Game' 'Dementia' 'Harem' 'Cars' 'Kids' 'Shoujo Ai' 'Unknown' 'Hentai'
 'Yaoi' 'Yuri']


In [None]:
# For each row, join all genres (originally a list) into a single space-separated string
# Remove any spaces within individual genre names before joining and making it new column
anime['genre_str'] = anime['genre'].apply(lambda x: ' '.join(g.replace(' ', '') for g in x))
anime

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_str
0,32281,Kimi no Na wa.,"[Drama, Romance, School, Supernatural]",Movie,1,9.37,200630,Drama Romance School Supernatural
1,5114,Fullmetal Alchemist: Brotherhood,"[Action, Adventure, Drama, Fantasy, Magic, Mil...",TV,64,9.26,793665,Action Adventure Drama Fantasy Magic Military ...
2,28977,Gintama°,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.25,114262,Action Comedy Historical Parody Samurai Sci-Fi...
3,9253,Steins;Gate,"[Sci-Fi, Thriller]",TV,24,9.17,673572,Sci-Fi Thriller
4,9969,Gintama&#039;,"[Action, Comedy, Historical, Parody, Samurai, ...",TV,51,9.16,151266,Action Comedy Historical Parody Samurai Sci-Fi...
...,...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,[Hentai],OVA,1,4.15,211,Hentai
12290,5543,Under World,[Hentai],OVA,1,4.28,183,Hentai
12291,5621,Violence Gekiga David no Hoshi,[Hentai],OVA,4,4.88,219,Hentai
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,[Hentai],OVA,1,4.98,175,Hentai


In [None]:
# Count non-numeric values in the 'episodes' column
non_numeric_count = anime['episodes'].apply(lambda x: pd.to_numeric(x, errors='coerce')).isna().sum()

print(f"Number of non-numeric entries in the 'episodes' column: {non_numeric_count}")

Number of non-numeric entries in the 'episodes' column: 340


In [None]:
# Replace non-numeric values in 'episodes' with NaN
anime['episodes'] = anime['episodes'].apply(lambda x: pd.to_numeric(x, errors='coerce'))

# Fill NaN values with the median of the 'episodes' column
median_episodes = anime['episodes'].median()
anime['episodes'].fillna(median_episodes, inplace=True)

print(f"Replaced non-numeric entries with median value: {median_episodes}")


Replaced non-numeric entries with median value: 2.0


In [None]:
# Scale numerical columns (episodes, rating, members) to 0-1 range
scaler = MinMaxScaler()
anime[['episodes_scaled', 'rating_scaled', 'members_scaled']] = scaler.fit_transform(
    anime[['episodes', 'rating', 'members']]
)

***Content-Based Filtering***

In [None]:
anime_data = anime
anime_data.sample(5)

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_str,episodes_scaled,rating_scaled,members_scaled
9786,22849,Nyamen,[Comedy],TV,12.0,5.62,414,Comedy,0.006054,0.47419,0.000403
7917,30367,Lemon Home Animation Gekijou,[Comedy],Special,2.0,4.65,254,Comedy,0.00055,0.357743,0.000246
4626,2729,X²: Double X,"[Drama, Music, Shoujo]",Music,1.0,6.68,2976,Drama Music Shoujo,0.0,0.601441,0.00293
5892,9598,Haiyoru! Nyaruani: Remember My Love(craft-sensei),"[Comedy, Sci-Fi]",TV,11.0,6.32,17934,Comedy Sci-Fi,0.005504,0.558223,0.017683
7179,15653,Stay the Same,"[Kids, Music]",Music,1.0,5.67,160,Kids Music,0.0,0.480192,0.000153


In [None]:
# Categorical encoding for 'type' using one-hot encoding
type_one_hot = pd.get_dummies(anime_data['type'], prefix='type')

In [None]:
# TF-IDF vectorization for genres
tfv = TfidfVectorizer()

tfv.fit(anime_data['genre_str'])

tfv.get_feature_names_out()

array(['action', 'adventure', 'cars', 'comedy', 'dementia', 'demons',
       'drama', 'ecchi', 'fantasy', 'fi', 'game', 'harem', 'hentai',
       'historical', 'horror', 'josei', 'kids', 'magic', 'martialarts',
       'mecha', 'military', 'music', 'mystery', 'parody', 'police',
       'psychological', 'romance', 'samurai', 'school', 'sci', 'seinen',
       'shoujo', 'shoujoai', 'shounen', 'shounenai', 'sliceoflife',
       'space', 'sports', 'supernatural', 'superpower', 'thriller',
       'vampire', 'yaoi', 'yuri'], dtype=object)

In [None]:
tfidf_matrix = tfv.fit_transform(anime_data['genre_str'])
tfidf_matrix.shape

(12294, 44)

In [None]:
features = np.hstack([
    tfidf_matrix.toarray(),                # Genre similarity
    type_one_hot.values,                  # Type similarity
    anime[['episodes_scaled',             # Episodes (scaled)
           'rating_scaled',               # Rating (scaled)
           'members_scaled']].values      # Members (scaled)
])

In [None]:
print(features.shape)

(12294, 54)


**Cosine Similarity**

In [None]:
# Compute cosine similarity between all items
cosine_sim = cosine_similarity(features)

In [None]:
# Create a DataFrame for similarity scores
cosine_sim_df = pd.DataFrame(cosine_sim, index=anime['name'], columns=anime['name'])
cosine_sim_df.head()

name,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;,Haikyuu!!: Karasuno Koukou VS Shiratorizawa Gakuen Koukou,Hunter x Hunter (2011),Ginga Eiyuu Densetsu,Gintama Movie: Kanketsu-hen - Yorozuya yo Eien Nare,Gintama&#039;: Enchousen,...,Super Erotic Anime,Taimanin Asagi 3,Teleclub no Himitsu,Tenshi no Habataki Jun,The Satisfaction,Toushindai My Lover: Minami tai Mecha-Minami,Under World,Violence Gekiga David no Hoshi,Violence Gekiga Shin David no Hoshi: Inma Densetsu,Yasuji no Pornorama: Yacchimae!!
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kimi no Na wa.,1.0,0.362621,0.30112,0.314156,0.300684,0.4458,0.309954,0.350094,0.645871,0.295392,...,0.124817,0.312699,0.134115,0.119703,0.121411,0.11196,0.117558,0.142879,0.147005,0.562146
Fullmetal Alchemist: Brotherhood,0.362621,1.0,0.670999,0.69943,0.678177,0.699177,0.798717,0.413199,0.337928,0.662052,...,0.112804,0.185527,0.121211,0.108194,0.10973,0.101201,0.106253,0.129158,0.132856,0.150404
Gintama°,0.30112,0.670999,1.0,0.695816,0.999746,0.711873,0.72219,0.361992,0.645053,0.999705,...,0.123996,0.203863,0.133232,0.118907,0.120605,0.111215,0.116777,0.141948,0.14603,0.165333
Steins;Gate,0.314156,0.69943,0.695816,1.0,0.701827,0.617965,0.669633,0.374876,0.354972,0.68814,...,0.114701,0.188639,0.123249,0.110014,0.111577,0.102902,0.10804,0.131318,0.135094,0.152939
Gintama&#039;,0.300684,0.678177,0.999746,0.701827,1.0,0.710881,0.725407,0.360237,0.643695,0.999078,...,0.122741,0.201805,0.131884,0.117705,0.119385,0.110091,0.115596,0.140513,0.144553,0.16366


**Recommendation Function**

In [None]:
def anime_recommendations(nama_anime, similarity_data, items, k):    # Get the similarities of the input anime with others
    closest = similarity_data[nama_anime].sort_values(ascending=False)

    # Drop the input anime itself from recommendations
    closest = closest.drop(nama_anime, errors='ignore')

    # Merge with the anime data to get more details
    closest_animes = pd.DataFrame(closest).reset_index()  # Reset index to keep anime names as a column
    closest_animes.columns = ['Recommended Anime', 'Similarity']  # Rename columns for clarity

    # Merge with the items dataframe based on the 'Recommended Anime' column
    recommendations = closest_animes.merge(items[['name', 'genre', 'type', 'episodes', 'rating', 'members']],
                                           left_on='Recommended Anime', right_on='name', how='left')

    return recommendations[['Recommended Anime', 'genre', 'type', 'episodes', 'rating', 'members']].head(k)


**To know exact name of anime**

In [None]:
anime_input = input("Input anime name: ")
anime_data[anime_data['name'].str.contains(anime_input, case=False)]

Input anime name: naruto


Unnamed: 0,anime_id,name,genre,type,episodes,rating,members,genre_str,episodes_scaled,rating_scaled,members_scaled
486,28755,Boruto: Naruto the Movie,"[Action, Comedy, Martial Arts, Shounen, Super ...",Movie,1.0,8.03,74690,Action Comedy MartialArts Shounen SuperPower,0.0,0.763505,0.07366
615,1735,Naruto: Shippuuden,"[Action, Comedy, Martial Arts, Shounen, Super ...",TV,2.0,7.94,533578,Action Comedy MartialArts Shounen SuperPower,0.00055,0.752701,0.526252
719,16870,The Last: Naruto the Movie,"[Action, Martial Arts, Romance, Shounen, Super...",Movie,1.0,7.88,85013,Action MartialArts Romance Shounen SuperPower,0.0,0.745498,0.083842
784,13667,Naruto: Shippuuden Movie 6 - Road to Ninja,"[Action, Adventure, Martial Arts, Shounen, Sup...",Movie,1.0,7.84,87369,Action Adventure MartialArts Shounen SuperPower,0.0,0.740696,0.086165
841,20,Naruto,"[Action, Comedy, Martial Arts, Shounen, Super ...",TV,220.0,7.81,683297,Action Comedy MartialArts Shounen SuperPower,0.120528,0.737095,0.673916
1103,32365,Boruto: Naruto the Movie - Naruto ga Hokage ni...,"[Action, Comedy, Martial Arts, Shounen, Super ...",Special,1.0,7.68,16868,Action Comedy MartialArts Shounen SuperPower,0.0,0.721489,0.016632
1237,10589,Naruto: Shippuuden Movie 5 - Blood Prison,"[Action, Adventure, Martial Arts, Mystery, Sho...",Movie,1.0,7.62,75660,Action Adventure MartialArts Mystery Shounen S...,0.0,0.714286,0.074617
1343,10075,Naruto x UT,"[Action, Comedy, Martial Arts, Shounen, Super ...",OVA,1.0,7.58,23465,Action Comedy MartialArts Shounen SuperPower,0.0,0.709484,0.023138
1472,8246,Naruto: Shippuuden Movie 4 - The Lost Tower,"[Action, Comedy, Martial Arts, Shounen, Super ...",Movie,1.0,7.53,84527,Action Comedy MartialArts Shounen SuperPower,0.0,0.703481,0.083362
1573,6325,Naruto: Shippuuden Movie 3 - Hi no Ishi wo Tsu...,"[Action, Comedy, Martial Arts, Shounen, Super ...",Movie,1.0,7.5,83515,Action Comedy MartialArts Shounen SuperPower,0.0,0.69988,0.082364


**Content-Based Recommendation on inputing an Anime name:**

In [None]:
# Get top-N Recommendations based from anime input list
anime_recommendations('Naruto: Shippuuden', cosine_sim_df, anime_data, k=10)

Unnamed: 0,Recommended Anime,genre,type,episodes,rating,members
0,Naruto,"[Action, Comedy, Martial Arts, Shounen, Super ...",TV,220.0,7.81,683297
1,Dragon Ball Z,"[Action, Adventure, Comedy, Fantasy, Martial A...",TV,291.0,8.32,375662
2,Dragon Ball,"[Adventure, Comedy, Fantasy, Martial Arts, Sho...",TV,153.0,8.16,316102
3,Dragon Ball Kai,"[Action, Adventure, Comedy, Fantasy, Martial A...",TV,97.0,7.95,116832
4,Dragon Ball Super,"[Action, Adventure, Comedy, Fantasy, Martial A...",TV,2.0,7.4,111443
5,Rekka no Honoo,"[Action, Adventure, Martial Arts, Shounen, Sup...",TV,42.0,7.44,35258
6,Dragon Ball Kai (2014),"[Action, Adventure, Comedy, Fantasy, Martial A...",TV,61.0,8.01,42666
7,Kurokami The Animation,"[Action, Martial Arts, Super Power]",TV,23.0,7.29,72750
8,Project ARMS,"[Action, Martial Arts, Super Power]",TV,26.0,7.15,6903
9,Katekyo Hitman Reborn!,"[Action, Comedy, Shounen, Super Power]",TV,203.0,8.37,258103
