In [1]:
# Libraries
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import json
import pickle
#reading the datasets
anime_data=pd.read_csv('data/anime_from_anime_recommendations.csv')
rating_data=pd.read_csv('data/rating_from_anime_recommendations.csv')

anime_fulldata=pd.merge(anime_data,rating_data,on='anime_id',suffixes= ['', '_user'])
anime_fulldata = anime_fulldata.rename(columns={'name': 'anime_title', 'rating_user': 'user_rating'})
anime_fulldata.anime_title.value_counts()

Death Note                            39340
Sword Art Online                      30583
Shingeki no Kyojin                    29584
Code Geass: Hangyaku no Lelouch       27718
Elfen Lied                            27506
                                      ...  
Patchin shite! Obaachan                   1
Ikkyuu-san: Haru Da! Yancha Hime          1
Yuuto-kun ga Iku Movie                    1
Goi-sensei to Tarou                       1
Ganbare Goemon: Jigen Jou no Akumu        1
Name: anime_title, Length: 11196, dtype: int64

In [2]:
#Replacing -1 with NaN in user_rating column
anime_feature=anime_fulldata.copy()
anime_feature["user_rating"].replace({-1: np.nan}, inplace=True)
anime_feature.head()


#dropping all the null values as it aids nothing
anime_feature = anime_feature.dropna(axis = 0, how ='any') 
anime_feature.isnull().sum()
counts = anime_feature['user_id'].value_counts()
anime_feature = anime_feature[anime_feature['user_id'].isin(counts[counts >= 200].index)]
pickle.dump(anime_feature, open('models/anime_feature.pkl', 'wb') )

In [3]:
anime_pivot=anime_feature.pivot_table(index='anime_title',columns='user_id',values='user_rating').fillna(0)
anime_pivot.head()

pickle.dump(anime_pivot, open('models/anime_pivot.pkl', 'wb') )

In [4]:
from scipy.sparse import csr_matrix
#Creating a sparse matrix
anime_matrix = csr_matrix(anime_pivot.values)

from sklearn.neighbors import NearestNeighbors

#Fitting the model
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(anime_matrix)
pickle.dump(model_knn, open('models/model_knn.pkl', 'wb') )

## User is allowed to enter input instead of it being randomly chosen.

In [5]:
# for when the query is a string
arr = []
for i in range(len(anime_pivot)):
    arr.append(anime_pivot.index[int(i)])

# for when the query is a random (<class 'int'>)
query_index = np.random.choice(anime_pivot.shape[0])

def recommend(query_index):
    if(isinstance(query_index, str)):
        # finds the id from the string.
        idx = arr.index(query_index)
    else:
        idx = query_index
    
    distances, indices = model_knn.kneighbors(anime_pivot.iloc[idx,:].values.reshape(1, -1), n_neighbors = 6)
    for i in range(0, len(distances.flatten())):
        if i == 0:
            print('Recommendations for {0} with id {1}:'.format(anime_pivot.index[idx], idx))
        else:
            print('{0}: {1}, with distance of {2}:'.format(i, anime_pivot.index[indices.flatten()[i]], round(distances.flatten()[i], 4)))
    print()

In [6]:
recommend("Death Note")
recommend(query_index)

recommend('Sinbad: Soratobu Hime to Himitsu no Shima')

Recommendations for Death Note with id 1654:
1: Code Geass: Hangyaku no Lelouch, with distance of 0.1898:
2: Code Geass: Hangyaku no Lelouch R2, with distance of 0.2079:
3: Fullmetal Alchemist: Brotherhood, with distance of 0.2334:
4: Elfen Lied, with distance of 0.2346:
5: Shingeki no Kyojin, with distance of 0.2364:

Recommendations for Hokuto no Ken 2 with id 3403:
1: Hokuto no Ken, with distance of 0.2699:
2: Hokuto no Ken Zero: Kenshirou Den, with distance of 0.5149:
3: Hokuto no Ken: Toki-den, with distance of 0.5349:
4: Hokuto no Ken: Yuria-den, with distance of 0.5353:
5: Shin Hokuto no Ken, with distance of 0.5379:

Recommendations for Sinbad: Soratobu Hime to Himitsu no Shima with id 7959:
1: Sinbad: Mahou no Lamp to Ugoku Shima, with distance of 0.4265:
2: Tiger &amp; Bunny: Too Many Cooks Spoil the Broth, with distance of 0.5085:
3: Hikari: Kariya wo Tsunagu Monogatari, with distance of 0.5085:
4: Kindaichi Shounen no Jikenbo: Shinigami Byouin Satsujin Jiken, with distance 

In [21]:
# testing the function with the string name and int id.
recommend('Yozakura Quartet')
recommend(9575)

Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:

Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:



In [None]:
inp = int(input("enter an anime id:\n"))
recommend(inp)

In [123]:
inp = input("enter an anime title:\n")
recommend(inp)

enter an anime title:
Yozakura Quartet
Recommendations for Yozakura Quartet with id 9575:
1: Yozakura Quartet: Hoshi no Umi, with distance of 0.5379:
2: Yozakura Quartet: Hana no Uta, with distance of 0.615:
3: Toaru Majutsu no Index, with distance of 0.6302:
4: Shakugan no Shana, with distance of 0.6469:
5: Fate/stay night, with distance of 0.657:

