In [18]:
from bs4 import BeautifulSoup
import requests, openpyxl, time
import pandas as pd

In [23]:
# Create Excel workbook and sheet
excel = openpyxl.Workbook()
sheet = excel.active
sheet.title = "Books"
sheet.append(['Title', 'Star Rating', 'Price', 'Availability'])

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

try:
    for page in range(1, 51):  # Loop through pages 1 to 50
        print(f"Scraping page {page}...")

        url = base_url.format(page)
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
        }

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')

        books = soup.find_all('article', class_='product_pod')

        for book in books:
            title = book.h3.a['title']
            star_rating = book.p['class'][1]
            price = book.find('p', class_='price_color').text.strip()
            stock = book.find('p', class_='instock availability').text.strip()

            sheet.append([title, star_rating, price, stock])

        time.sleep(1)  # Be polite to the server (avoid too many rapid requests)

except Exception as e:
    print(e)

# Save Excel file
excel.save('All_Books.xlsx')
print("Scraping completed and data saved as 'All_Books.xlsx'")


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Scraping page 16...
Scraping page 17...
Scraping page 18...
Scraping page 19...
Scraping page 20...
Scraping page 21...
Scraping page 22...
Scraping page 23...
Scraping page 24...
Scraping page 25...
Scraping page 26...
Scraping page 27...
Scraping page 28...
Scraping page 29...
Scraping page 30...
Scraping page 31...
Scraping page 32...
Scraping page 33...
Scraping page 34...
Scraping page 35...
Scraping page 36...
Scraping page 37...
Scraping page 38...
Scraping page 39...
Scraping page 40...
Scraping page 41...
Scraping page 42...
Scraping page 43...
Scraping page 44...
Scraping page 45...
Scraping page 46...
Scraping page 47...
Scraping page 48...
Scraping page 49...
Scraping page 50...
Scraping 

In [21]:
# Create Excel workbook
excel = openpyxl.Workbook()
sheet = excel.active
sheet.title = "Books by Genre"
sheet.append(['Genre', 'Title', 'Star Rating', 'Price', 'Availability'])

base_url = "https://books.toscrape.com/"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
}

try:
    # Step 1: Get all genre links
    response = requests.get(base_url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')

    genre_list = soup.find('ul', class_='nav-list').find('ul').find_all('li')

    for genre in genre_list:
        genre_name = genre.a.text.strip()
        genre_url = base_url + genre.a['href']

        print(f"Scraping genre: {genre_name}")

        page_num = 1
        while True:
            print(f" - Page {page_num} of {genre_name}")
            paged_url = genre_url.replace('index.html', f'page-{page_num}.html')
            
            response = requests.get(paged_url, headers=headers)
            
            if response.status_code == 404:
                # No more pages for this genre
                break
            
            soup = BeautifulSoup(response.content, 'html.parser')
            books = soup.find_all('article', class_='product_pod')

            for book in books:
                title = book.h3.a['title']
                star_rating = book.p['class'][1]
                price = book.find('p', class_='price_color').text.strip()
                stock = book.find('p', class_='instock availability').text.strip()

                sheet.append([genre_name, title, star_rating, price, stock])

            page_num += 1
            time.sleep(1)  # Be polite to the server

except Exception as e:
    print(e)

# Save the Excel file
excel.save('Books_By_Genre.xlsx')
print("Scraping completed and saved as 'Books_By_Genre.xlsx'")


Scraping genre: Travel
 - Page 1 of Travel
Scraping genre: Mystery
 - Page 1 of Mystery
 - Page 2 of Mystery
 - Page 3 of Mystery
Scraping genre: Historical Fiction
 - Page 1 of Historical Fiction
 - Page 2 of Historical Fiction
 - Page 3 of Historical Fiction
Scraping genre: Sequential Art
 - Page 1 of Sequential Art
 - Page 2 of Sequential Art
 - Page 3 of Sequential Art
 - Page 4 of Sequential Art
 - Page 5 of Sequential Art
Scraping genre: Classics
 - Page 1 of Classics
Scraping genre: Philosophy
 - Page 1 of Philosophy
Scraping genre: Romance
 - Page 1 of Romance
 - Page 2 of Romance
 - Page 3 of Romance
Scraping genre: Womens Fiction
 - Page 1 of Womens Fiction
Scraping genre: Fiction
 - Page 1 of Fiction
 - Page 2 of Fiction
 - Page 3 of Fiction
 - Page 4 of Fiction
 - Page 5 of Fiction
Scraping genre: Childrens
 - Page 1 of Childrens
 - Page 2 of Childrens
 - Page 3 of Childrens
Scraping genre: Religion
 - Page 1 of Religion
Scraping genre: Nonfiction
 - Page 1 of Nonfiction
 -

In [22]:
# Load datasets
books1 = pd.read_excel('All_Books.xlsx')
books2 = pd.read_excel('Books_By_Genre.xlsx')

# Concatenate vertically (stack rows)
combined_books = pd.concat([books1, books2], ignore_index=True)

# Display result
print(combined_books)

# Optionally, save to a new CSV
combined_books.to_excel('Books.xlsx', index=False)


                                                  Title Star Rating   Price  \
0                                  A Light in the Attic       Three  £51.77   
1                                    Tipping the Velvet         One  £53.74   
2                                            Soumission         One  £50.10   
3                                         Sharp Objects        Four  £47.82   
4                 Sapiens: A Brief History of Humankind        Five  £54.23   
...                                                 ...         ...     ...   
1718  Mexican Today: New and Rediscovered Recipes fo...        Five  £24.91   
1719  Vegan Vegetarian Omnivore: Dinner for Everyone...         Two  £13.66   
1720                       The Smitten Kitchen Cookbook         One  £23.59   
1721  The Art of Simple Food: Notes, Lessons, and Re...       Three  £34.32   
1722  Hungry Girl Clean & Hungry: Easy All-Natural R...       Three  £33.14   

     Availability           Genre  
0        In sto