# ðŸ“š Book Recommendation System
This notebook builds a Book Recommendation System using:
- Popularity Based Filtering
- Collaborative Filtering
- Cosine Similarity

In [1]:
# Import required libraries
import numpy as np
import pandas as pd

In [2]:
# Load datasets
books = pd.read_csv('books.csv')
users = pd.read_csv('users.csv')
ratings = pd.read_csv('ratings.csv')

In [3]:
# Check dataset size
print(books.shape)
print(ratings.shape)
print(users.shape)

In [4]:
# Check missing values
books.isnull().sum()
users.isnull().sum()
ratings.isnull().sum()

In [5]:
# Check duplicate values
books.duplicated().sum()
ratings.duplicated().sum()
users.duplicated().sum()

## âœ… Popularity Based Recommendation System

In [6]:
# Merge ratings with book names
ratings_with_name = ratings.merge(books,on='ISBN')

In [7]:
# Count number of ratings per book
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'},inplace=True)

In [8]:
# Calculate average rating per book
avg_rating_df = ratings_with_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_rating'},inplace=True)

In [9]:
# Merge both rating counts and average ratings
popular_df = num_rating_df.merge(avg_rating_df,on='Book-Title')

In [10]:
# Filter top popular books
popular_df = popular_df[popular_df['num_ratings']>=250].sort_values('avg_rating',ascending=False).head(50)

In [11]:
# Add author and image info
popular_df = popular_df.merge(books,on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Image-URL-M','num_ratings','avg_rating']]

## âœ… Collaborative Filtering Based Recommendation System

In [12]:
# Select users who rated more than 200 books
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] > 200
padhe_likhe_users = x[x].index

In [13]:
# Filter ratings of active users
filtered_rating = ratings_with_name[ratings_with_name['User-ID'].isin(padhe_likhe_users)]

In [14]:
# Select books with at least 50 ratings
y = filtered_rating.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [15]:
# Final filtered dataset
final_ratings = filtered_rating[filtered_rating['Book-Title'].isin(famous_books)]

In [16]:
# Create Pivot Table
pt = final_ratings.pivot_table(index='Book-Title',columns='User-ID',values='Book-Rating')
pt.fillna(0,inplace=True)

In [17]:
# Calculate Cosine Similarity
from sklearn.metrics.pairwise import cosine_similarity
similarity_scores = cosine_similarity(pt)

In [18]:
# Recommendation function used in Flask backend
def recommend(book_name):
    index = np.where(pt.index==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_scores[index])),key=lambda x:x[1],reverse=True)[1:5]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        data.append(item)
    return data

## âœ… Save Model Files for Flask App

In [19]:
import pickle
pickle.dump(popular_df,open('popular.pkl','wb'))
pickle.dump(pt,open('pt.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))