## Importing necessary libraries

In [18]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances

## Import Data

In [2]:
Book = pd.read_csv("book.csv",encoding='latin-1')
Book

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6
...,...,...,...,...
9995,9996,162121,American Fried: Adventures of a Happy Eater.,7
9996,9997,162121,Cannibal In Manhattan,9
9997,9998,162121,How to Flirt: A Practical Guide,7
9998,9999,162121,Twilight,8


## Data understanding

In [3]:
Book.shape

(10000, 4)

In [4]:
Book.describe(include='all')

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
count,10000.0,10000.0,10000,10000.0
unique,,,9659,
top,,,Fahrenheit 451,
freq,,,5,
mean,5000.5,95321.2498,,7.5663
std,2886.89568,117645.703609,,1.82152
min,1.0,8.0,,1.0
25%,2500.75,2103.0,,7.0
50%,5000.5,3757.0,,8.0
75%,7500.25,162052.0,,9.0


In [5]:
Book.dtypes

Unnamed: 0      int64
User.ID         int64
Book.Title     object
Book.Rating     int64
dtype: object

In [6]:
Book.isnull().sum()

Unnamed: 0     0
User.ID        0
Book.Title     0
Book.Rating    0
dtype: int64

## Data Cleaning

In [7]:
del Book['Unnamed: 0']

In [8]:
Book = Book.rename(columns={'User.ID':'UserID','Book.Title':'Book_Title','Book.Rating':'Rating'})
Book

Unnamed: 0,UserID,Book_Title,Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6
...,...,...,...
9995,162121,American Fried: Adventures of a Happy Eater.,7
9996,162121,Cannibal In Manhattan,9
9997,162121,How to Flirt: A Practical Guide,7
9998,162121,Twilight,8


In [9]:
Book['UserID'].nunique()

2182

In [10]:
Book['UserID'].unique()

array([276726, 276729, 276736, ..., 162113, 162121, 162129], dtype=int64)

In [11]:
Book['Book_Title'].nunique()

9659

In [12]:
Book['Book_Title'].unique()

array(['Classical Mythology', 'Clara Callan', 'Decision in Normandy', ...,
       'How to Flirt: A Practical Guide', 'Twilight',
       'Kids Say the Darndest Things'], dtype=object)

## Data prepartaion

In [13]:
Book_recommendation = pd.pivot_table(data = Book,index = 'UserID', columns = 'Book_Title',values = 'Rating').reset_index(drop=True)
Book_recommendation.head(20)

Book_Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,
7,,,,,,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,


In [15]:
Book_recommendation.fillna(value=0,axis=0,inplace=True)
Book_recommendation

Book_Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,...AND THE HORSE HE RODE IN ON : THE PEOPLE V. KENNETH STARR,01-01-00: A Novel of the Millennium,"1,401 More Things That P*Ss Me Off",10 Commandments Of Dating,"100 Great Fantasy Short, Short Stories",...,Zora Hurston and the Chinaberry Tree (Reading Rainbow Book),\Even Monkeys Fall from Trees\ and Other Japanese Proverbs,\I Won't Learn from You\: And Other Thoughts on Creative Maladjustment,"\More More More,\ Said the Baby",\O\ Is for Outlaw,"\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character","\Well, there's your problem\: Cartoons",iI Paradiso Degli Orchi,stardust,Ã?Â?bermorgen.
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2178,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2179,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0
2180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
Book_recommendation.dtypes

Book_Title
 Jason, Madison &amp                                                       float64
 Other Stories;Merril;1985;McClelland &amp                                 float64
 Repairing PC Drives &amp                                                  float64
'48                                                                        float64
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities      float64
                                                                            ...   
\Surely You're Joking, Mr. Feynman!\: Adventures of a Curious Character    float64
\Well, there's your problem\: Cartoons                                     float64
iI Paradiso Degli Orchi                                                    float64
stardust                                                                   float64
Ã?Â?bermorgen.                                                             float64
Length: 9659, dtype: object

## Model Building and Identifying the similar users for recommedation

In [17]:
Book_recommendation.values

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 7., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [19]:
Book_similarity = pairwise_distances(X = Book_recommendation.values)
Book_similarity

array([[ 0.        , 16.03121954, 16.03121954, ..., 29.05167809,
        16.88194302, 22.737634  ],
       [16.03121954,  0.        ,  8.48528137, ..., 25.67099531,
        10.        , 18.22086716],
       [16.03121954,  8.48528137,  0.        , ..., 25.67099531,
        10.        , 18.22086716],
       ...,
       [29.05167809, 25.67099531, 25.67099531, ...,  0.        ,
        26.21068484, 30.31501278],
       [16.88194302, 10.        , 10.        , ..., 26.21068484,
         0.        , 18.97366596],
       [22.737634  , 18.22086716, 18.22086716, ..., 30.31501278,
        18.97366596,  0.        ]])

In [20]:
New_Book_df = pd.DataFrame(Book_similarity)
New_Book_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2172,2173,2174,2175,2176,2177,2178,2179,2180,2181
0,0.000000,16.031220,16.031220,17.916473,17.521415,17.378147,18.439089,16.431677,16.431677,20.049938,...,17.916473,22.825424,16.881943,37.854986,18.493242,16.881943,17.378147,29.051678,16.881943,22.737634
1,16.031220,0.000000,8.485281,11.661904,11.045361,10.816654,12.449900,9.219544,9.219544,14.730920,...,11.661904,18.330303,10.000000,35.327043,12.529964,10.000000,10.816654,25.670995,10.000000,18.220867
2,16.031220,8.485281,0.000000,11.661904,11.045361,10.816654,12.449900,9.219544,9.219544,14.730920,...,11.661904,18.330303,10.000000,35.327043,12.529964,10.000000,10.816654,25.670995,10.000000,18.220867
3,17.916473,11.661904,11.661904,0.000000,13.638182,13.453624,14.798649,12.206556,12.206556,16.763055,...,14.142136,20.000000,12.806248,36.221541,14.866069,12.806248,13.453624,26.888659,12.806248,19.899749
4,17.521415,11.045361,11.045361,13.638182,0.000000,12.922848,14.317821,11.618950,11.618950,16.340135,...,13.638182,19.646883,12.247449,36.027767,14.387495,12.247449,12.922848,26.627054,12.247449,19.544820
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177,16.881943,10.000000,10.000000,12.806248,12.247449,12.041595,13.527749,10.630146,10.630146,15.652476,...,12.806248,19.078784,11.313708,35.721142,13.601471,0.000000,12.041595,26.210685,11.313708,18.973666
2178,17.378147,10.816654,10.816654,13.453624,12.922848,12.727922,14.142136,11.401754,11.401754,16.186414,...,13.453624,19.519221,12.041595,35.958309,14.212670,12.041595,0.000000,26.532998,12.041595,19.416488
2179,29.051678,25.670995,25.670995,26.888659,26.627054,26.532998,27.239677,25.922963,25.922963,28.354894,...,26.888659,30.380915,26.210685,42.836900,27.276363,26.210685,26.532998,0.000000,26.210685,30.315013
2180,16.881943,10.000000,10.000000,12.806248,12.247449,12.041595,13.527749,10.630146,10.630146,15.652476,...,12.806248,19.078784,11.313708,35.721142,13.601471,11.313708,12.041595,26.210685,0.000000,18.973666


In [23]:
#Replace the index

New_Book_df.index = Book['UserID'].unique()
New_Book_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2172,2173,2174,2175,2176,2177,2178,2179,2180,2181
276726,0.0,16.03122,16.03122,17.916473,17.521415,17.378147,18.439089,16.431677,16.431677,20.049938,...,17.916473,22.825424,16.881943,37.854986,18.493242,16.881943,17.378147,29.051678,16.881943,22.737634
276729,16.03122,0.0,8.485281,11.661904,11.045361,10.816654,12.4499,9.219544,9.219544,14.73092,...,11.661904,18.330303,10.0,35.327043,12.529964,10.0,10.816654,25.670995,10.0,18.220867
276736,16.03122,8.485281,0.0,11.661904,11.045361,10.816654,12.4499,9.219544,9.219544,14.73092,...,11.661904,18.330303,10.0,35.327043,12.529964,10.0,10.816654,25.670995,10.0,18.220867
276737,17.916473,11.661904,11.661904,0.0,13.638182,13.453624,14.798649,12.206556,12.206556,16.763055,...,14.142136,20.0,12.806248,36.221541,14.866069,12.806248,13.453624,26.888659,12.806248,19.899749
276744,17.521415,11.045361,11.045361,13.638182,0.0,12.922848,14.317821,11.61895,11.61895,16.340135,...,13.638182,19.646883,12.247449,36.027767,14.387495,12.247449,12.922848,26.627054,12.247449,19.54482


In [25]:
#Replace the columns

New_Book_df.columns = Book['UserID'].unique()
New_Book_df.head()

Unnamed: 0,276726,276729,276736,276737,276744,276745,276747,276748,276751,276754,...,162085,162091,162092,162095,162103,162107,162109,162113,162121,162129
276726,0.0,16.03122,16.03122,17.916473,17.521415,17.378147,18.439089,16.431677,16.431677,20.049938,...,17.916473,22.825424,16.881943,37.854986,18.493242,16.881943,17.378147,29.051678,16.881943,22.737634
276729,16.03122,0.0,8.485281,11.661904,11.045361,10.816654,12.4499,9.219544,9.219544,14.73092,...,11.661904,18.330303,10.0,35.327043,12.529964,10.0,10.816654,25.670995,10.0,18.220867
276736,16.03122,8.485281,0.0,11.661904,11.045361,10.816654,12.4499,9.219544,9.219544,14.73092,...,11.661904,18.330303,10.0,35.327043,12.529964,10.0,10.816654,25.670995,10.0,18.220867
276737,17.916473,11.661904,11.661904,0.0,13.638182,13.453624,14.798649,12.206556,12.206556,16.763055,...,14.142136,20.0,12.806248,36.221541,14.866069,12.806248,13.453624,26.888659,12.806248,19.899749
276744,17.521415,11.045361,11.045361,13.638182,0.0,12.922848,14.317821,11.61895,11.61895,16.340135,...,13.638182,19.646883,12.247449,36.027767,14.387495,12.247449,12.922848,26.627054,12.247449,19.54482


In [26]:
Book_similarity = 1 - pairwise_distances(X = Book_recommendation.values, metric='cosine')
Book_similarity

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [27]:
New_Book_df = pd.DataFrame(Book_similarity)
New_Book_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2172,2173,2174,2175,2176,2177,2178,2179,2180,2181
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
#Replace the index

New_Book_df.index = Book['UserID'].unique()
New_Book_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2172,2173,2174,2175,2176,2177,2178,2179,2180,2181
276726,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276729,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
#Replace the columns

New_Book_df.columns = Book['UserID'].unique()
New_Book_df.head()

Unnamed: 0,276726,276729,276736,276737,276744,276745,276747,276748,276751,276754,...,162085,162091,162092,162095,162103,162107,162109,162113,162121,162129
276726,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276729,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
# Comparing 1st row 1st column or 2nd row 2nd column gives as 1 and that is not usefull to us
#so lets fill value to all diagonal

np.fill_diagonal(a = Book_similarity, val = 0)

In [36]:
New_Book_df.head()

Unnamed: 0,276726,276729,276736,276737,276744,276745,276747,276748,276751,276754,...,162085,162091,162092,162095,162103,162107,162109,162113,162121,162129
276726,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276729,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [42]:
#To findout most similar users
New_Book_df.idxmax(axis=1)

276726    276726
276729    276726
276736    276726
276737    276726
276744    276726
           ...  
162107    276726
162109    276726
162113    161453
162121    276726
162129    276726
Length: 2182, dtype: int64

In [44]:
# extract the books which userId 162107 & 276726 have watched

Book[(Book['UserID']==162107) | (Book['UserID']==276726)]

Unnamed: 0,UserID,Book_Title,Rating
0,276726,Classical Mythology,5
9987,162107,What's Bred in the Bone,7


In [46]:
# extract the books which userId 276729 & 276726 have watched

Book[(Book['UserID']==276729) | (Book['UserID']==276726)]

Unnamed: 0,UserID,Book_Title,Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6


In [47]:
user_1=Book[(Book['UserID']==276729)]
user_2=Book[(Book['UserID']==276726)]

In [48]:
user_1

Unnamed: 0,UserID,Book_Title,Rating
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6


In [49]:
user_2

Unnamed: 0,UserID,Book_Title,Rating
0,276726,Classical Mythology,5


In [51]:
pd.merge(user_1,user_2,on='Book_Title',how='outer')

Unnamed: 0,UserID_x,Book_Title,Rating_x,UserID_y,Rating_y
0,276729.0,Clara Callan,3.0,,
1,276729.0,Decision in Normandy,6.0,,
2,,Classical Mythology,,276726.0,5.0
