### 0.Imports

In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### 1.Load processed data

In [2]:
data = pd.read_csv('data.csv')

In [3]:
# making the new column containing combination of all the features
data['comb'] = data['author_genres'] + ' ' + data['author_name'] + ' '+ data['book_title'] + ' '+ data['genre_1'] +' ' + data['genre_2']

In [4]:
data.head()

Unnamed: 0,author_genres,author_name,book_title,genre_1,genre_2,comb
0,historical fiction,victoria thompson,murder on st. mark's place,mystery,historical,historical fiction victoria thompson murder on...
1,literature fiction mystery thrillers,stieg larsson,the girl with the dragon tattoo,fiction,mystery,literature fiction mystery thrillers stieg lar...
2,romance,mimi jean pamfiloff,tailored for trouble,romance,contemporary,romance mimi jean pamfiloff tailored for troub...
3,fiction memoir,josé donoso,the obscene bird of night,fiction,magical realism,fiction memoir josé donoso the obscene bird of...
4,young adult fantasy,patricia c. wrede,sorcery & cecelia or the enchanted chocolate pot,fantasy,young adult,young adult fantasy patricia c. wrede sorcery ...


### 2.Feature Encoding

In [5]:
# creating a count matrix
cv = CountVectorizer()
count_matrix = cv.fit_transform(data['comb'])

### 3.Compute the similarity score

In [6]:
# creating a similarity score matrix
sim = cosine_similarity(count_matrix)

### 4.Save weights and modified dataframe

In [7]:
# saving the similarity score matrix in a file for later use
np.save('similarity_matrix', sim)

In [8]:
# saving dataframe to csv for later use in main file
data.to_csv('data.csv',index=False)

### 5.Find the recommendations

In [9]:
#provide the book name
b = 'Harry Potter and the Half Blood Prince'
#convert to lowercase
b = b.lower()

In [10]:
#check if book exits
b in data['book_title'].unique()

True

In [11]:
#get the index value of the book in data dataframe
i = data.loc[data['book_title'] == b].index[0]
print(i)

9939


In [12]:
#get the score in the similarity matrix for this particular book
sim[i]

array([0.08006408, 0.25819889, 0.        , ..., 0.10206207, 0.19245009,
       0.08333333])

In [23]:
#we will enumerate through the array and store it in lst
lst = list(enumerate(sim[i]))
# sorting this list in decreasing order based on the similarity score
lst = sorted(lst, key = lambda x:x[1] ,reverse=True)
# taking top 1- book scores
# not taking the first index since it is the same book
lst = lst[1:11]

In [16]:
lst

[(2960, 0.7500000000000002),
 (8276, 0.7453559924999299),
 (128, 0.6390096504226939),
 (2327, 0.6019292654288462),
 (12121, 0.5960395606792697),
 (13175, 0.5896618941607872),
 (8933, 0.560112033611204),
 (3963, 0.5555555555555557),
 (3692, 0.5539117094069973),
 (2553, 0.5527707983925667)]

In [17]:
# making an empty list that will containg all 10 book recommendations
l = []
for i in range(len(lst)):
    a = lst[i][0]
    l.append(data['book_title'][a])

In [21]:
#recommendation list
l

['harry potter and the prisoner of azkaban',
 'harry potter and the order of the phoenix',
 'harry potter series box set',
 'the seer and the sword',
 'the blood keeper',
 "james potter and the hall of elders' crossing",
 'the hero and the crown',
 'thirst no. 1  the last vampire  black blood  and red dice',
 'tell the wind and fire',
 'the princess  the crone  and the dung cart knight']

In [22]:
#Lets check complete information about these books also see if these recommendations make sense.
data[data['book_title'].isin(l)]


Unnamed: 0,author_genres,author_name,book_title,genre_1,genre_2,comb
128,fiction,j.k. rowling,harry potter series box set,fantasy,young adult,fiction j.k. rowling harry potter series box s...
2327,young adult science fiction fantasy,victoria hanley,the seer and the sword,fantasy,young adult,young adult science fiction fantasy victoria h...
2553,science fiction fantasy children s books young...,gerald morris,the princess the crone and the dung cart knight,fantasy,young adult,science fiction fantasy children s books young...
2960,fiction,j.k. rowling,harry potter and the prisoner of azkaban,fantasy,young adult,fiction j.k. rowling harry potter and the pris...
3692,science fiction fantasy young adult,sarah rees brennan,tell the wind and fire,fantasy,young adult,science fiction fantasy young adult sarah rees...
3963,young adult horror science fiction fantasy,christopher pike,thirst no. 1 the last vampire black blood a...,fantasy,young adult,young adult horror science fiction fantasy chr...
8276,fiction,j.k. rowling,harry potter and the order of the phoenix,fantasy,young adult,fiction j.k. rowling harry potter and the orde...
8933,science fiction fantasy,robin mckinley,the hero and the crown,fantasy,young adult,science fiction fantasy robin mckinley the her...
12121,young adult science fiction fantasy,tessa gratton,the blood keeper,young adult,fantasy,young adult science fiction fantasy tessa grat...
13175,science fiction fantasy young adult thriller a...,g. norman lippert,james potter and the hall of elders' crossing,fantasy,young adult,science fiction fantasy young adult thriller a...
