In [1]:
import numpy as np
import pandas as pd

In [2]:
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
books['Image-URL-M'] = books['Image-URL-M'].apply(lambda x : x[:4] + 's' + x[4:])
books['Image-URL-M'][0]

In [4]:
users.head()

In [5]:
ratings.head()

In [6]:
print(books.shape)
print(ratings.shape)
print(users.shape)

In [7]:
books.isnull().sum()

In [8]:
ratings.isnull().sum()

In [9]:
books.duplicated().sum()

In [10]:
ratings.duplicated().sum()

In [11]:
users.duplicated().sum()

# popularity based recommender

In [12]:
#This is formula based recommender system . here we have used averge rating u can use other as per the logic

In [13]:
ratings_with_name = ratings.merge(books,on='ISBN')

In [14]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)
num_rating_df

In [15]:
avg_rating_df = ratings_with_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_rating'},inplace=True)
avg_rating_df

In [16]:
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')
popular_df

In [17]:
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

In [18]:
popular_df['Image-URL-M'][0]

# Collabrative recommender system

In [19]:
# here we consider the books which has rating given by the more than 50 users and
#The users which given the rating more than 250 times

In [20]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [21]:
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]

In [22]:
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [23]:
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [24]:
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')

In [25]:
pt.fillna(0,inplace=True)

In [26]:
pt

In [27]:
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
similarity_scores = cosine_similarity(pt)

In [29]:
similarity_scores.shape

In [30]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    
    return data

In [31]:
recommend('1984')

In [32]:
pt.index[545]

In [33]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))

In [34]:
books.drop_duplicates('Book-Title')

In [35]:
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))