In [7]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

all_books_data = []
for page_num in range(1, 51):
    url = base_url.format(page_num)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    for book in soup.find_all('article', class_='product_pod'):
        title = book.h3.a['title']
        rating = book.p['class'][1]
        price = book.select('.price_color')[0].text
        availability = book.select('.availability')[0].text.strip()

        all_books_data.append([title, rating, price, availability])

columns = ['Title', 'Rating', 'Price', 'Availability']
df = pd.DataFrame(all_books_data, columns=columns)

df.to_csv('all_books_data.csv', index=False)

df.to_excel('all_books_data.xlsx', index=False)


Unnamed: 0,Title,Rating,Price,Availability
0,A Light in the Attic,Three,51.77,In stock
1,Tipping the Velvet,One,53.74,In stock
2,Soumission,One,50.10,In stock
3,Sharp Objects,Four,47.82,In stock
4,Sapiens: A Brief History of Humankind,Five,54.23,In stock
...,...,...,...,...
995,Alice in Wonderland (Alice's Adventures in Won...,One,55.53,In stock
996,"Ajin: Demi-Human, Volume 1 (Ajin: Demi-Human #1)",Four,57.06,In stock
997,A Spy's Devotion (The Regency Spies of London #1),Five,16.97,In stock
998,1st to Die (Women's Murder Club #1),One,53.98,In stock


## Tansformations

In [11]:
df['Price'] = df['Price'].str.extract('(\d+\.\d+)').astype(float)

AttributeError: Can only use .str accessor with string values!

In [12]:
# Transform the Ratings column from words to numbers
rating_mapping = {'One': 1, 'Two': 2, 'Three': 3, 'Four': 4, 'Five': 5}
df['Rating'] = df['Rating'].map(rating_mapping)


In [13]:

df.to_csv('transformed_books_data.csv', index=False)