In [65]:
import pandas as pd
import numpy as np

In [66]:
books = pd.read_csv('books.csv')
books.head()

Unnamed: 0,bookID,title,authors,average_rating,isbn,isbn13,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,439785960,9780440000000.0,eng,652,2095690,27591,9/16/2006,Scholastic Inc.
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,439358078,9780440000000.0,eng,870,2153167,29221,9/1/2004,Scholastic Inc.
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,439554896,9780440000000.0,eng,352,6333,244,11/1/2003,Scholastic
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,9780440000000.0,eng,435,2339585,36325,5/1/2004,Scholastic Inc.
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,439682584,9780440000000.0,eng,2690,41428,164,9/13/2004,Scholastic


In [67]:
books.dtypes

bookID                  int64
title                  object
authors                object
average_rating        float64
isbn                   object
isbn13                float64
language_code          object
  num_pages             int64
ratings_count           int64
text_reviews_count      int64
publication_date       object
publisher              object
dtype: object

In [68]:
#dropping isbn13 column
books.drop('isbn13',axis=1, inplace= True)

In [69]:
#fixing numpages column name
books.rename(columns={'  num_pages':'num_pages'}, inplace=True)

In [70]:
books.isna().sum()

bookID                0
title                 0
authors               0
average_rating        0
isbn                  0
language_code         0
num_pages             0
ratings_count         0
text_reviews_count    0
publication_date      0
publisher             0
dtype: int64

## Publication date
- change data type of publication date to 'datetime'

In [71]:
#changin publication date to datetime datatype
books['publication_date'] = pd.to_datetime(books['publication_date'],format= '%m/%d/%Y',errors='coerce')
books.dtypes


bookID                         int64
title                         object
authors                       object
average_rating               float64
isbn                          object
language_code                 object
num_pages                      int64
ratings_count                  int64
text_reviews_count             int64
publication_date      datetime64[ns]
publisher                     object
dtype: object

In [72]:
print(books.isna().sum())

bookID                0
title                 0
authors               0
average_rating        0
isbn                  0
language_code         0
num_pages             0
ratings_count         0
text_reviews_count    0
publication_date      2
publisher             0
dtype: int64


In [73]:
#filling nan values to mode value.

# print(books.publication_date.mode())
books['publication_date']= books.publication_date.fillna(value = '2005-10-01')
print(books.isna().sum())

bookID                0
title                 0
authors               0
average_rating        0
isbn                  0
language_code         0
num_pages             0
ratings_count         0
text_reviews_count    0
publication_date      0
publisher             0
dtype: int64


## Average Rating
- creating Category column for average rating.

In [74]:
print(books.average_rating.min())
print(books.average_rating.max())


0.0
5.0


In [75]:
bins = [0, 1, 2, 3, 4, 5]
bins_labes = ['didn\'t like it', 'it was OK', 'liked it', 'Really liked it', 'it was amazing']

books['avg_book_rating'] = pd.cut(books['average_rating'], bins, labels= bins_labes)
books.head()

Unnamed: 0,bookID,title,authors,average_rating,isbn,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher,avg_book_rating
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,439785960,eng,652,2095690,27591,2006-09-16,Scholastic Inc.,it was amazing
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,439358078,eng,870,2153167,29221,2004-09-01,Scholastic Inc.,it was amazing
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,439554896,eng,352,6333,244,2003-11-01,Scholastic,it was amazing
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,043965548X,eng,435,2339585,36325,2004-05-01,Scholastic Inc.,it was amazing
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,439682584,eng,2690,41428,164,2004-09-13,Scholastic,it was amazing


In [76]:
order = ['didn\'t like it', 'it was OK', 'liked it', 'Really liked it', 'it was amazing']
books['avg_book_rating'] = pd.Categorical(books['avg_book_rating'], order, ordered= True)
books.dtypes

bookID                         int64
title                         object
authors                       object
average_rating               float64
isbn                          object
language_code                 object
num_pages                      int64
ratings_count                  int64
text_reviews_count             int64
publication_date      datetime64[ns]
publisher                     object
avg_book_rating             category
dtype: object

In [77]:
#reordering columns 

books = books[['bookID', 'title', 'authors', 'average_rating', 'avg_book_rating', 'isbn', 'language_code', 'num_pages', 'ratings_count', 'text_reviews_count', 'publication_date', 'publisher']]
books.head()

Unnamed: 0,bookID,title,authors,average_rating,avg_book_rating,isbn,language_code,num_pages,ratings_count,text_reviews_count,publication_date,publisher
0,1,Harry Potter and the Half-Blood Prince (Harry ...,J.K. Rowling/Mary GrandPré,4.57,it was amazing,439785960,eng,652,2095690,27591,2006-09-16,Scholastic Inc.
1,2,Harry Potter and the Order of the Phoenix (Har...,J.K. Rowling/Mary GrandPré,4.49,it was amazing,439358078,eng,870,2153167,29221,2004-09-01,Scholastic Inc.
2,4,Harry Potter and the Chamber of Secrets (Harry...,J.K. Rowling,4.42,it was amazing,439554896,eng,352,6333,244,2003-11-01,Scholastic
3,5,Harry Potter and the Prisoner of Azkaban (Harr...,J.K. Rowling/Mary GrandPré,4.56,it was amazing,043965548X,eng,435,2339585,36325,2004-05-01,Scholastic Inc.
4,8,Harry Potter Boxed Set Books 1-5 (Harry Potte...,J.K. Rowling/Mary GrandPré,4.78,it was amazing,439682584,eng,2690,41428,164,2004-09-13,Scholastic
