In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

## 1. importing data

### load data

In [2]:
anime = pd.read_csv('data/anime.csv')
anime.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


### check data shapes & types

In [3]:
anime.shape

(12294, 7)

In [4]:
anime.dtypes

anime_id      int64
name         object
genre        object
type         object
episodes     object
rating      float64
members       int64
dtype: object

In [5]:
anime.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12294 entries, 0 to 12293
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   anime_id  12294 non-null  int64  
 1   name      12294 non-null  object 
 2   genre     12232 non-null  object 
 3   type      12269 non-null  object 
 4   episodes  12294 non-null  object 
 5   rating    12064 non-null  float64
 6   members   12294 non-null  int64  
dtypes: float64(1), int64(2), object(4)
memory usage: 672.5+ KB


### check duplicates data

In [6]:
anime.duplicated(subset=['anime_id']).sum()

0

In [7]:
anime.duplicated(subset=['name', 'type']).sum()

0

### handle episodes with `Unknown` value

In [8]:
(anime.episodes == 'Unknown').sum()

340

In [9]:
anime.episodes.replace('Unknown', np.nan, inplace=True)

### set `anime_id` as index

In [10]:
anime = anime.set_index('anime_id', drop=True)
anime.head()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


### create load function

In [11]:
def load_anime_data(anime_path):
    '''
    function to load anime data

    parameters
    ----------
    anime_path : str
        path of anime data (.csv)

    returns
    -------
    anime_data : pandas dataframe
        anime data
    '''
    
    # load data
    anime = pd.read_csv(anime_path)
    print('data shape:', anime.shape)
    
    # change string 'Unknown' with NaN
    anime.episodes.replace('Unknown', np.nan, inplace=True)

    # set track_id as index
    anime = anime.set_index('anime_id', drop=True)

    return anime

In [12]:
anime = load_anime_data('data/anime.csv')

data shape: (12294, 7)


In [13]:
anime.head()

Unnamed: 0_level_0,name,genre,type,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


## 2. data preparation

### numerical features

In [14]:
anime.head().T

anime_id,32281,5114,28977,9253,9969
name,Kimi no Na wa.,Fullmetal Alchemist: Brotherhood,Gintama°,Steins;Gate,Gintama&#039;
genre,"Drama, Romance, School, Supernatural","Action, Adventure, Drama, Fantasy, Magic, Mili...","Action, Comedy, Historical, Parody, Samurai, S...","Sci-Fi, Thriller","Action, Comedy, Historical, Parody, Samurai, S..."
type,Movie,TV,TV,TV,TV
episodes,1,64,51,24,51
rating,9.37,9.26,9.25,9.17,9.16
members,200630,793665,114262,673572,151266


In [15]:
# set numerical feature columns
num_cols = ['episodes', 'rating', 'members']

In [16]:
# create numerical features anime
anime_num = anime[num_cols]
anime_num.head()

Unnamed: 0_level_0,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32281,1,9.37,200630
5114,64,9.26,793665
28977,51,9.25,114262
9253,24,9.17,673572
9969,51,9.16,151266


In [17]:
anime_num.shape

(12294, 3)

In [18]:
# create & fitting object
scaler = MinMaxScaler()
scaler.fit(anime_num)

MinMaxScaler()

In [19]:
# get normalize data
anime_num_norm = pd.DataFrame(scaler.transform(anime_num))
anime_num_norm.index = anime_num.index
anime_num_norm.columns = anime_num.columns

anime_num_norm.head()

Unnamed: 0_level_0,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32281,0.0,0.92437,0.197872
5114,0.034673,0.911164,0.78277
28977,0.027518,0.909964,0.112689
9253,0.012658,0.90036,0.664325
9969,0.027518,0.89916,0.149186


In [20]:
# validation
anime_num_norm.describe()

Unnamed: 0,episodes,rating,members
count,11954.0,12064.0,12294.0
mean,0.006264,0.576699,0.017818
std,0.025793,0.123259,0.054068
min,0.0,0.0,0.0
25%,0.0,0.505402,0.000217
50%,0.00055,0.588235,0.001524
75%,0.006054,0.661465,0.009303
max,1.0,1.0,1.0


### create numerical vectorized function

In [21]:
def numerical_vectorizer(data, num_cols):
    '''
    create numerical vector from a given data

    parameters
    ----------
    data : pandas dataframe
        sample data

    num_cols : list
        choosen numerical columns

    returns
    -------
    data_num_clean : pandas dataframe
        sample data with vectorized numerical columns
    '''
    
    data = data.copy()

    # filter data
    data_num = data[num_cols]

    # transform data
    scaler = MinMaxScaler()
    scaler.fit(data_num)

    data_num_clean = pd.DataFrame(scaler.transform(data_num))
    data_num_clean.index = data_num.index
    data_num_clean.columns = data_num.columns

    print('shape of original data:', data.shape)
    print('shape of numerical data:', data_num_clean.shape)

    return data_num_clean

In [22]:
anime_num = numerical_vectorizer(data=anime, num_cols=num_cols)

shape of original data: (12294, 6)
shape of numerical data: (12294, 3)


In [23]:
anime_num.head()

Unnamed: 0_level_0,episodes,rating,members
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32281,0.0,0.92437,0.197872
5114,0.034673,0.911164,0.78277
28977,0.027518,0.909964,0.112689
9253,0.012658,0.90036,0.664325
9969,0.027518,0.89916,0.149186


### categorical features

In [24]:
# set categorical feature columns
cat_cols = ['genre', 'type']

In [25]:
# create categorical features anime
anime_cat = anime[cat_cols]
anime_cat.head()

Unnamed: 0_level_0,genre,type
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
32281,"Drama, Romance, School, Supernatural",Movie
5114,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV
28977,"Action, Comedy, Historical, Parody, Samurai, S...",TV
9253,"Sci-Fi, Thriller",TV
9969,"Action, Comedy, Historical, Parody, Samurai, S...",TV


In [26]:
anime_cat.shape

(12294, 2)

In [27]:
anime_genre = anime_cat.genre.str.get_dummies(sep=',').add_prefix('genre_')
anime_cat = anime_cat.drop('genre', axis=1)
anime_cat = anime_cat.join(anime_genre)

anime_cat.head()

Unnamed: 0_level_0,type,genre_ Adventure,genre_ Cars,genre_ Comedy,genre_ Dementia,genre_ Demons,genre_ Drama,genre_ Ecchi,genre_ Fantasy,genre_ Game,...,genre_Shoujo,genre_Shounen,genre_Slice of Life,genre_Space,genre_Sports,genre_Super Power,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,Movie,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5114,TV,1,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
28977,TV,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9253,TV,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9969,TV,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
anime_type = anime_cat.type.str.get_dummies().add_prefix('type_')
anime_cat = anime_cat.drop('type', axis=1)
anime_cat = anime_cat.join(anime_type)

anime_cat.head()

Unnamed: 0_level_0,genre_ Adventure,genre_ Cars,genre_ Comedy,genre_ Dementia,genre_ Demons,genre_ Drama,genre_ Ecchi,genre_ Fantasy,genre_ Game,genre_ Harem,...,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5114,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
28977,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9253,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9969,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [29]:
anime_cat.shape

(12294, 88)

### create categorical vectorized function

In [30]:
def categorical_vectorizer(data, cat_cols):
    '''
    create categorical vector from a given data

    parameters
    ----------
    data : pandas dataframe
        sample data

    cat_cols : list
        choosen categorical columns

    returns
    -------
    data_num_clean : pandas dataframe
        sample data with vectorized categorical columns
    '''
    
    data = data.copy()

    # filter data
    data_cat = data[cat_cols]

    # one hot encoding column genre
    anime_genre = data_cat.genre.str.get_dummies(sep=',').add_prefix('genre_')
    
    # one hot encoding column type
    anime_type = data_cat.type.str.get_dummies().add_prefix('type_')
    
    data_cat_clean = data_cat.drop(['genre', 'type'], axis=1)
    data_cat_clean = data_cat_clean.join(anime_genre).join(anime_type)

    print('shape of original data:', data.shape)
    print('shape of numerical data:', data_cat_clean.shape)

    return data_cat_clean

In [31]:
anime_cat = categorical_vectorizer(data=anime, cat_cols=cat_cols)

shape of original data: (12294, 6)
shape of numerical data: (12294, 88)


In [32]:
anime_cat.head()

Unnamed: 0_level_0,genre_ Adventure,genre_ Cars,genre_ Comedy,genre_ Dementia,genre_ Demons,genre_ Drama,genre_ Ecchi,genre_ Fantasy,genre_ Game,genre_ Harem,...,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5114,1,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
28977,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9253,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9969,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### combine numerical and categorical features

In [33]:
anime_ready = pd.concat((anime_num, anime_cat), axis=1)
anime_ready.head()

Unnamed: 0_level_0,episodes,rating,members,genre_ Adventure,genre_ Cars,genre_ Comedy,genre_ Dementia,genre_ Demons,genre_ Drama,genre_ Ecchi,...,genre_Supernatural,genre_Thriller,genre_Vampire,genre_Yaoi,type_Movie,type_Music,type_ONA,type_OVA,type_Special,type_TV
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
32281,0.0,0.92437,0.197872,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
5114,0.034673,0.911164,0.78277,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
28977,0.027518,0.909964,0.112689,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9253,0.012658,0.90036,0.664325,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
9969,0.027518,0.89916,0.149186,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [34]:
anime_ready.shape

(12294, 91)

## 3. content based recommendations

In [35]:
def jaccard_similarity(vec_a, vec_b):
    '''
    calculate jaccard similarity
    '''
    
    # convert to vector
    vec_a = np.array(vec_a)
    vec_b = np.array(vec_b)

    # find intersection & number of items
    n_intersect = np.sum((vec_a==1) & (vec_b==1))
    n_item = np.sum((vec_a==1) | (vec_b==1))

    # calculate similarity
    sim = n_intersect / n_item

    return sim

In [36]:
def euclidean_distance(vec_a, vec_b):
    '''
    calculate euclidean distance between vec a and vec b
    '''
    
    # transform to vector
    vec_a = np.array(vec_a)
    vec_B = np.array(vec_b)

    # calculate distance
    inside_the_root = np.sum((vec_a - vec_b)**2)
    dist = np.sqrt(inside_the_root)

    return dist

def euclidean_similarity(vec_a, vec_b):
    '''
    compute euclidean similarity between vec a and vec b
    '''
    
    # calculate Euclidean distance
    dist_ab = euclidean_distance(vec_a, vec_b)

    # calculate similarity
    sim = 1.0 / (1.0 + dist_ab)

    return sim

In [37]:
def cosine_similarity(vec_a, vec_b):
    '''
    calculate cosine similarity between vec a and vec b
    '''
    
    # find norm
    norm_a = np.linalg.norm(vec_a)
    norm_b = np.linalg.norm(vec_b)

    # find dot
    dot_ab = np.dot(vec_a, vec_b)

    # calculate similarity
    sim = dot_ab / (norm_a * norm_b)

    return sim

In [38]:
def pearson_similarity(vec_a, vec_b):
    '''
    calculate pearson similarity between vec a and vec b
    '''
    
    # calculate numerator & denominator
    numerator = np.cov(vec_a, vec_b)[0, 1]
    denominator = np.sqrt(np.cov(vec_a) * np.cov(vec_b))

    # calculate similarity
    sim = numerator / denominator

    return sim

In [39]:
anime_a = anime_ready.loc[9969]
anime_b = anime_ready.loc[918]

# calculate similarity
jac = jaccard_similarity(anime_a, anime_b)
euc = euclidean_similarity(anime_a, anime_b)
cos = cosine_similarity(anime_a, anime_b)
per = pearson_similarity(anime_a, anime_b)

jac, euc, cos, per

(1.0, 0.8327197458081089, 0.9977333332184977, 0.9975025888054089)

### find similar items based on user last consumed item

In [40]:
latest_anime_id = 5114
anime.loc[[latest_anime_id]].T

anime_id,5114
name,Fullmetal Alchemist: Brotherhood
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
type,TV
episodes,64
rating,9.26
members,793665


In [41]:
# generate similarity score
n_anime = len(anime_num.index)
similarity_score = np.zeros(n_anime)

anime_reference = anime_num.loc[latest_anime_id]

# iterate whole anime (anime_num)
for i, anime_id in enumerate(tqdm(anime_num.index)):
    # extract anime sample
    anime_sample = anime_num.loc[anime_id]

    # calculate similarity
    sim = euclidean_similarity(anime_reference, anime_sample)

    # append
    similarity_score[i] = sim

100%|███████████████████████████████████| 12294/12294 [00:07<00:00, 1702.34it/s]


In [42]:
similarity_score

array([0.63048782, 1.        , 0.59875933, ..., 0.51456744, 0.51630552,
       0.52450925])

In [43]:
# sort in descending orders of similarity_score
sorted_id = np.argsort(similarity_score)[::-1]

# return n top similar anime
n = 5
top_anime_id = anime_ready.index[sorted_id[1:n+1]]
top_anime_id

Int64Index([1575, 9253, 6547, 16498, 4224], dtype='int64', name='anime_id')

In [44]:
anime.loc[[latest_anime_id] + list(top_anime_id)].T

anime_id,5114,1575,9253,6547,16498,4224
name,Fullmetal Alchemist: Brotherhood,Code Geass: Hangyaku no Lelouch,Steins;Gate,Angel Beats!,Shingeki no Kyojin,Toradora!
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili...","Action, Mecha, Military, School, Sci-Fi, Super...","Sci-Fi, Thriller","Action, Comedy, Drama, School, Supernatural","Action, Drama, Fantasy, Shounen, Super Power","Comedy, Romance, School, Slice of Life"
type,TV,TV,TV,TV,TV,TV
episodes,64,25,24,13,25,25
rating,9.26,8.83,9.17,8.39,8.54,8.45
members,793665,715151,673572,717796,896229,633817


### create anime recomendation function

In [45]:
def anime_recommendation(anime_id, n, anime_data, similarity_func):
    '''
    recommend n item based on latest watch anime_id
    '''
    
    # generate similarity score
    n_anime = len(anime_data.index)
    similarity_score = np.zeros(n_anime)

    anime_reference = anime_data.loc[latest_anime_id]
    
    # iterate whole anime (anime_num)
    for i, anime_id in enumerate(tqdm(anime_data.index)):
        # extract anime sample
        anime_sample = anime_data.loc[anime_id]

        # calculate similarity
        sim = similarity_func(anime_reference, anime_sample)

        # append
        similarity_score[i] = sim
    
    # sort in descending orders of similarity_score
    sorted_id = np.argsort(similarity_score)[::-1]

    # return n top similar anime
    top_anime_id = anime_ready.index[sorted_id[1:n+1]]

    return top_anime_id

In [46]:
reference_anime_id = 5114
anime.loc[[reference_anime_id]].T

anime_id,5114
name,Fullmetal Alchemist: Brotherhood
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili..."
type,TV
episodes,64
rating,9.26
members,793665


### categorical features + jaccard similarity function

In [47]:
top_anime_id = anime_recommendation(
    anime_id=reference_anime_id,
    n=10,
    anime_data=anime_cat,
    similarity_func=jaccard_similarity
)

anime.loc[[reference_anime_id] + list(top_anime_id)].T

100%|███████████████████████████████████| 12294/12294 [00:02<00:00, 4534.28it/s]


anime_id,5114,121,31741,1775,8086,18115,14513,28833,473,9135,6702
name,Fullmetal Alchemist: Brotherhood,Fullmetal Alchemist,Magi: Sinbad no Bouken (TV),Jikuu Tenshou Nazca,Densetsu no Yuusha no Densetsu,Magi: The Kingdom of Magic,Magi: The Labyrinth of Magic,Chain Chronicle: Haecceitas no Hikari,Tide-Line Blue,Fullmetal Alchemist: The Sacred Star of Milos,Fairy Tail
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili...","Action, Adventure, Comedy, Drama, Fantasy, Mag...","Action, Adventure, Fantasy, Magic, Shounen","Action, Adventure, Drama, Fantasy, Magic","Action, Adventure, Fantasy, Magic, Shounen","Action, Adventure, Fantasy, Magic, Shounen","Action, Adventure, Fantasy, Magic, Shounen","Action, Adventure, Fantasy, Magic, Shounen","Action, Adventure, Drama, Military, Shounen","Action, Adventure, Comedy, Drama, Fantasy, Mag...","Action, Adventure, Comedy, Fantasy, Magic, Sho..."
type,TV,TV,TV,TV,TV,TV,TV,TV,TV,Movie,TV
episodes,64,51,13,12,24,25,25,12,12,1,175
rating,9.26,8.33,8.22,5.88,7.83,8.5,8.24,,6.77,7.5,8.22
members,793665,600384,73480,3080,130689,245026,317513,5678,4670,87944,584590


### numerical features + euclidean similarity function

In [48]:
top_anime_id = anime_recommendation(
    anime_id=reference_anime_id,
    n=10,
    anime_data=anime_num,
    similarity_func=euclidean_similarity
)

anime.loc[[reference_anime_id] + list(top_anime_id)].T

100%|███████████████████████████████████| 12294/12294 [00:06<00:00, 1802.29it/s]


anime_id,5114,1575,9253,6547,16498,4224,10620,11757,19815,121,2904
name,Fullmetal Alchemist: Brotherhood,Code Geass: Hangyaku no Lelouch,Steins;Gate,Angel Beats!,Shingeki no Kyojin,Toradora!,Mirai Nikki (TV),Sword Art Online,No Game No Life,Fullmetal Alchemist,Code Geass: Hangyaku no Lelouch R2
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili...","Action, Mecha, Military, School, Sci-Fi, Super...","Sci-Fi, Thriller","Action, Comedy, Drama, School, Supernatural","Action, Drama, Fantasy, Shounen, Super Power","Comedy, Romance, School, Slice of Life","Action, Mystery, Psychological, Shounen, Super...","Action, Adventure, Fantasy, Game, Romance","Adventure, Comedy, Ecchi, Fantasy, Game, Super...","Action, Adventure, Comedy, Drama, Fantasy, Mag...","Action, Drama, Mecha, Military, Sci-Fi, Super ..."
type,TV,TV,TV,TV,TV,TV,TV,TV,TV,TV,TV
episodes,64,25,24,13,25,25,26,25,12,51,25
rating,9.26,8.83,9.17,8.39,8.54,8.45,8.07,7.83,8.47,8.33,8.98
members,793665,715151,673572,717796,896229,633817,657190,893100,602291,600384,572888


### numerical + categorical features + pearson similarity function

In [49]:
top_anime_id = anime_recommendation(
    anime_id=reference_anime_id,
    n=10,
    anime_data=anime_ready,
    similarity_func=pearson_similarity
)

anime.loc[[reference_anime_id] + list(top_anime_id)].T

100%|███████████████████████████████████| 12294/12294 [00:06<00:00, 1763.51it/s]


anime_id,5114,31445,31422,33737,33788,33479,32892,32518,33026,32900,1735
name,Fullmetal Alchemist: Brotherhood,Minna Atsumare! Falcom Gakuen 3rd Season,Minami Kamakura Koukou Joshi Jitensha-bu,Megaton-kyuu Musashi,Marginal#4: Kiss kara Tsukuru Big Bang,Muzumuzu Eighteen,Himitsukessha Taka no Tsume GT,Shoujo kara Shoujo e...,Time Bokan 24,Mahouka Koukou no Rettousei Movie: Hoshi wo Yo...,Naruto: Shippuuden
genre,"Action, Adventure, Drama, Fantasy, Magic, Mili...","Comedy, Parody, School, Seinen","School, Shounen, Sports","Mecha, School","Music, Shoujo",Slice of Life,"Comedy, Parody, Super Power",Hentai,"Adventure, Comedy, Mecha","Action, Magic, Romance, School, Sci-Fi, Supern...","Action, Comedy, Martial Arts, Shounen, Super P..."
type,TV,TV,TV,TV,TV,TV,ONA,OVA,TV,Movie,TV
episodes,64,,,,,,,,,1,
rating,9.26,,,,,3.8,5.14,7.08,5.28,,7.94
members,793665,245,2216,173,1766,72,80,1964,2243,13884,533578
