In [12]:
import pandas as pd

# Load the dataset
df = pd.read_csv('books.csv')

# Data cleaning and preprocessing
# Convert 'ratings_count' to numeric and 'average_rating' to float
df['ratings_count'] = pd.to_numeric(df['ratings_count'], errors='coerce')
df['average_rating'] = df['average_rating'].astype(float)

# Drop rows with missing values
df.dropna(subset=['title', 'ratings_count', 'average_rating'], inplace=True)

# Focus on Harry Potter book series
harry_potter_books = df[df['title'].str.contains("Harry Potter")]

# Find the most selling books within the Harry Potter series
most_selling_harry_potter = harry_potter_books.nlargest(5, 'ratings_count')

# Calculate the average rating of the Harry Potter books
average_rating_harry_potter = harry_potter_books['average_rating'].mean()

# Display the results
print("Most Selling Harry Potter Books:")
print(most_selling_harry_potter[['title', 'ratings_count']])
print("\nAverage Rating of Harry Potter Books:")
print(average_rating_harry_potter)


Most Selling Harry Potter Books:
                                                title  ratings_count
1   Harry Potter and the Sorcerer's Stone (Harry P...        4602479
6   Harry Potter and the Prisoner of Azkaban (Harr...        1832823
9   Harry Potter and the Chamber of Secrets (Harry...        1779331
10  Harry Potter and the Goblet of Fire (Harry Pot...        1753043
11  Harry Potter and the Deathly Hallows (Harry Po...        1746574

Average Rating of Harry Potter Books:
4.482727272727273
