In [3]:
# Libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import json
#reading the datasets
anime_data=pd.read_csv('data/anime_from_anime_recommendations.csv')
rating_data=pd.read_csv('data/rating_from_anime_recommendations.csv')

anime_fulldata=pd.merge(anime_data,rating_data,on='anime_id',suffixes= ['', '_user'])
anime_fulldata = anime_fulldata.rename(columns={'name': 'anime_title', 'rating_user': 'user_rating'})
anime_fulldata.anime_title.value_counts()

Death Note                                                        39340
Sword Art Online                                                  30583
Shingeki no Kyojin                                                29584
Code Geass: Hangyaku no Lelouch                                   27718
Elfen Lied                                                        27506
Angel Beats!                                                      27183
Naruto                                                            25925
Fullmetal Alchemist                                               25032
Fullmetal Alchemist: Brotherhood                                  24574
Toradora!                                                         24283
Code Geass: Hangyaku no Lelouch R2                                24242
Highschool of the Dead                                            23065
Sen to Chihiro no Kamikakushi                                     22974
Mirai Nikki (TV)                                                

In [4]:
#Replacing -1 with NaN in user_rating column
anime_feature=anime_fulldata.copy()
anime_feature["user_rating"].replace({-1: np.nan}, inplace=True)
anime_feature.head()

#dropping all the null values as it aids nothing
anime_feature = anime_feature.dropna(axis = 0, how ='any') 
anime_feature.isnull().sum()
counts = anime_feature['user_id'].value_counts()
anime_feature = anime_feature[anime_feature['user_id'].isin(counts[counts >= 200].index)]

In [5]:
anime_pivot=anime_feature.pivot_table(index='anime_title',columns='user_id',values='user_rating').fillna(0)
anime_pivot.head()

user_id,5,7,17,38,43,46,123,129,139,160,...,73406,73417,73422,73457,73460,73476,73499,73502,73503,73507
anime_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
&quot;0&quot;,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Kyou no Oyatsu: Hatsukoi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Memoire,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
&quot;Bungaku Shoujo&quot; Movie,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0
&quot;Eiji&quot;,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
from scipy.sparse import csr_matrix
#Creating a sparse matrix
anime_matrix = csr_matrix(anime_pivot.values)

from sklearn.neighbors import NearestNeighbors

#Fitting the model
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(anime_matrix)

NearestNeighbors(algorithm='brute', metric='cosine')

## User is allowed to enter input instead of it being randomly chosen.

In [105]:
# for when the query is a string
arr = []
for i in range(len(anime_pivot)):
    arr.append(anime_pivot.index[int(i)])

# for when the query is a random (<class 'int'>)
query_index = np.random.choice(anime_pivot.shape[0])

def recommend(query_index):
    if(isinstance(query_index, str)):
        # finds the id from the string.
        idx = arr.index(query_index)
    else:
        idx = query_index
    
    distances, indices = model_knn.kneighbors(anime_pivot.iloc[idx,:].values.reshape(1, -1), n_neighbors = 6)
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print('Recommendations for {0} with id {1}:'.format(anime_pivot.index[idx], idx))
        else:
            print('{0}: {1}, with distance of {2}:'.format(i, anime_pivot.index[indices.flatten()[i]], round(distances.flatten()[i], 4)))
    print()

In [92]:
recommend(query_index)

recommend('Sinbad: Soratobu Hime to Himitsu no Shima')

Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5378614501937693:
2: Yozakura Quartet: Hana no Uta, with distance of 0.614989001018958:
3: Toaru Majutsu no Index, with distance of 0.6302324331148718:
4: Shakugan no Shana, with distance of 0.6469201431371681:
5: Fate/stay night, with distance of 0.6570393599659738:

Recommendations for Sinbad: Soratobu Hime to Himitsu no Shima with id 7959:
1: Sinbad: Mahou no Lamp to Ugoku Shima, with distance of 0.4265376556366717:
2: Tiger &amp; Bunny: Too Many Cooks Spoil the Broth, with distance of 0.5084608476885757:
3: Hikari: Kariya wo Tsunagu Monogatari, with distance of 0.5084608476885757:
4: Kindaichi Shounen no Jikenbo: Shinigami Byouin Satsujin Jiken, with distance of 0.7596739523591647:
5: Juusenshi Gulkeeva, with distance of 0.7626332250015028:



In [106]:
# testing the function with the string name and int id.
recommend('Yozakura Quartet')
recommend(9575)

Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:

Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:



In [None]:
inp = int(input("enter an anime id:\n"))
recommend(inp)

In [123]:
inp = input("enter an anime title:\n")
recommend(inp)

enter an anime title:
Yozakura Quartet
Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:

