In [71]:
import pandas as pd 
import numpy as np 


## Book data set

In [72]:
books = pd.read_csv('books.csv', encoding='latin-1', low_memory=False)

In [74]:
books.shape

(271360, 8)

In [75]:
books.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271360 non-null  object
 1   Book-Title           271360 non-null  object
 2   Book-Author          271358 non-null  object
 3   Year-Of-Publication  271360 non-null  object
 4   Publisher            271358 non-null  object
 5   Image-URL-S          271360 non-null  object
 6   Image-URL-M          271360 non-null  object
 7   Image-URL-L          271357 non-null  object
dtypes: object(8)
memory usage: 16.6+ MB


In [76]:
books.columns

Index(['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher',
       'Image-URL-S', 'Image-URL-M', 'Image-URL-L'],
      dtype='object')

In [77]:
books = books[['ISBN','Book-Title','Book-Author', 'Year-Of-Publication', 'Publisher',]]

In [81]:
books.rename(columns={'Book-Title':'title','Book-Author':'author','Year-Of-Publication':'year','Publisher':'publisher'} , inplace=True)

In [82]:
books.columns

Index(['ISBN', 'title', 'author', 'year', 'publisher'], dtype='object')

## user data set

In [83]:
users = pd.read_csv('Users.csv')

In [85]:
users.shape

(278858, 3)

In [86]:
users.rename(columns={'User-ID':'user_id', 'Location':'location', 'Age':'age'}, inplace=True)

In [87]:
users.isnull().sum()

user_id          0
location         0
age         110762
dtype: int64

# Rating data set

In [88]:
ratings = pd.read_csv('Ratings.csv')

In [89]:
ratings.columns

Index(['User-ID', 'ISBN', 'Book-Rating'], dtype='object')

In [90]:
ratings.rename(columns={'User-ID':'user_id', 'Book-Rating':'rating'}, inplace=True)

In [94]:
ratings.shape

(1149780, 3)

In [93]:
x = ratings['user_id'].value_counts()>200
x.shape

(105283,)

In [95]:
y =x[x].index

In [97]:
y.shape

(899,)

In [100]:
ratings = ratings[ratings['user_id'].isin(y)]

In [101]:
ratings.shape

(526356, 3)

In [102]:
ratings.head()

Unnamed: 0,user_id,ISBN,rating
1456,277427,002542730X,10
1457,277427,0026217457,0
1458,277427,003008685X,8
1459,277427,0030615321,0
1460,277427,0060002050,0


In [103]:
ratings_with_books=ratings.merge(books,on='ISBN')

In [104]:
ratings_with_books.shape

(487671, 7)

In [105]:
number_rating = ratings_with_books.groupby('title')['rating'].count().reset_index()

In [106]:
number_rating.head()

Unnamed: 0,title,rating
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [107]:
number_rating.rename(columns={'rating':'number of ratings'}, inplace=True)

In [108]:
number_rating.head()

Unnamed: 0,title,number of ratings
0,A Light in the Storm: The Civil War Diary of ...,2
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,Beyond IBM: Leadership Marketing and Finance ...,1
4,Clifford Visita El Hospital (Clifford El Gran...,1


In [109]:
final_rating = ratings_with_books.merge(number_rating,on='title')

In [110]:
final_rating.head(2)

Unnamed: 0,user_id,ISBN,rating,title,author,year,publisher,number of ratings
0,277427,002542730X,10,Politically Correct Bedtime Stories: Modern Ta...,James Finn Garner,1994,John Wiley &amp; Sons Inc,82
1,277427,0026217457,0,Vegetarian Times Complete Cookbook,Lucy Moll,1995,John Wiley &amp; Sons,7


In [111]:
final_rating.shape

(487671, 8)

In [112]:
final_rating=final_rating[final_rating['number of ratings'] >= 50]

In [113]:
final_rating.shape

(61853, 8)

In [114]:
final_rating.drop_duplicates(['user_id','title'] , inplace=True)

In [115]:
final_rating.shape

(59850, 8)

In [116]:
book_pivot = final_rating.pivot_table(columns='user_id', index='title',values='rating')

In [118]:
book_pivot.head(3)

user_id,254,2276,2766,2977,3363,3757,4017,4385,6242,6251,...,274004,274061,274301,274308,274808,275970,277427,277478,277639,278418
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1984,9.0,,,,,,,,,,...,,,,,,0.0,,,,
1st to Die: A Novel,,,,,,,,,,,...,,,,,,,,,,
2nd Chance,,10.0,,,,,,,,,...,,,,0.0,,,,,0.0,


In [119]:
book_pivot.shape

(742, 888)

In [120]:
book_pivot.fillna(0,inplace=True)

In [126]:
from scipy.sparse import csr_matrix 
book_sparse = csr_matrix(book_pivot)

In [127]:
type(book_sparse)

scipy.sparse._csr.csr_matrix

In [128]:
from sklearn.neighbors import NearestNeighbors 
model = NearestNeighbors(algorithm='brute')

In [129]:
model.fit(book_sparse)

In [146]:
distances, suggestions=model.kneighbors(book_pivot.iloc[237, :]
                                        .values
                                        .reshape(1,-1),
                                        n_neighbors=10)

In [147]:
distances

array([[ 0.        , 67.75691847, 68.05145112, 72.277244  , 75.81556568,
        76.30203143, 76.32823855, 76.62245102, 76.64202503, 76.75285011]])

In [148]:
suggestions

array([[237, 238, 240, 241, 184, 536, 291, 686, 675, 689]], dtype=int64)

In [149]:
for i in range(len(suggestions)):
    print(book_pivot.index[suggestions[i]])
    

Index(['Harry Potter and the Chamber of Secrets (Book 2)',
       'Harry Potter and the Goblet of Fire (Book 4)',
       'Harry Potter and the Prisoner of Azkaban (Book 3)',
       'Harry Potter and the Sorcerer's Stone (Book 1)', 'Exclusive',
       'The Cradle Will Fall', 'Jacob Have I Loved',
       'Tom Clancy's Op-Center (Tom Clancy's Op Center (Paperback))',
       'The Witness', 'Toxin'],
      dtype='object', name='title')


In [152]:
np.where(book_pivot.index=='Animal Farm')[0][0]

54

In [161]:
def recommend_book(book_name): 
    book_id=np.where(book_pivot.index==book_name)[0][0] 
    distances, suggestions=model.kneighbors(book_pivot.iloc[book_id, :]
                                        .values
                                        .reshape(1,-1),
                                        n_neighbors=10)
    for i in range(len(suggestions)): 
        if i== 0: 
            print("The suggestions for", book_name, "are : ")
        if not i: 
            print(book_pivot.index[suggestions[i]])

In [163]:
recommend_book('Exclusive')

The suggestions for Exclusive are : 
Index(['Exclusive', 'The Cradle Will Fall', 'Jacob Have I Loved',
       'Fine Things', 'The Long Road Home', 'No Safe Place', 'Eyes of a Child',
       'Deck the Halls (Holiday Classics)', 'Long After Midnight',
       'Unspeakable'],
      dtype='object', name='title')
