In [2]:
import requests
from bs4 import BeautifulSoup
import csv
import re
import time
from urllib.parse import urljoin

def scrape_website():
    print("🚀 Starting the book scraping process...")

    base_url = "http://books.toscrape.com/catalogue/"
    start_page = "page-1.html"
    output_csv_file = "all_books_complete_data.csv"

    headers = {
        'User-Agent': 'MyBookScraper/1.1 (Contact: my-email@example.com) - Educational Project'
    }

    all_scraped_books = []
    rating_map = {"One": 1, "Two": 2, "Three": 3, "Four": 4, "Five": 5}

    try:
        current_url = urljoin(base_url, start_page)

        while current_url:
            print(f"📡 Scraping page: {current_url}")

            response = requests.get(current_url, headers=headers, timeout=10)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")

            books_on_page = soup.find_all("article", class_="product_pod")
            if not books_on_page:
                print("⚠️ No books found on this page. Exiting loop.")
                break

            for book in books_on_page:
                title = book.h3.a["title"]

                price_str = book.find("p", class_="price_color").text
                price = float(re.search(r"[\d.]+", price_str).group())

                rating_p_tag = book.find("p", class_=re.compile("star-rating"))
                rating_text = rating_p_tag["class"][1]
                rating_num = rating_map.get(rating_text, 0)

                all_scraped_books.append({"title": title, "price": price, "rating": rating_num})

            next_li_element = soup.find("li", class_="next")
            if next_li_element and next_li_element.a and next_li_element.a["href"]:
                next_page_relative_url = next_li_element.a["href"]
                current_url = urljoin(base_url, next_page_relative_url)
                time.sleep(1)
            else:
                print("✅ Reached the last page.")
                current_url = None

        print(f"\n💾 Saving {len(all_scraped_books)} books to '{output_csv_file}'...")
        with open(output_csv_file, "w", newline="", encoding="utf-8") as file:
            fieldnames = ["title", "price", "rating"]
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(all_scraped_books)
        print("   Save complete.")

        if all_scraped_books:
            total_price = sum(book["price"] for book in all_scraped_books)
            average_price = total_price / len(all_scraped_books)

            max_rating = max(book["rating"] for book in all_scraped_books)
            best_rated_books = [book["title"] for book in all_scraped_books if book["rating"] == max_rating]

            print("\n--- 📊 Analysis Summary ---")
            print(f"Total Books Scraped: {len(all_scraped_books)}")
            print(f"Average Book Price: £{average_price:.2f}")
            print(f"Highest Rating Found: {max_rating} stars")
            print(f"Number of Books with Highest Rating: {len(best_rated_books)}")
            print("--------------------------\n")

        print("🎉 Scraping process finished successfully!")

    except requests.exceptions.HTTPError as e:
        print(f"❌ HTTP Error occurred: {e}")
    except requests.exceptions.RequestException as e:
        print(f"❌ A network error occurred: {e}")
    except Exception as e:
        print(f"❌ An unexpected error occurred: {e}")

if __name__ == "__main__":
    scrape_website()

🚀 Starting the book scraping process...
📡 Scraping page: http://books.toscrape.com/catalogue/page-1.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-2.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-3.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-4.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-5.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-6.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-7.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-8.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-9.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-10.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-11.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-12.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-13.html
📡 Scraping page: http://books.toscrape.com/catalogue/page-14.html
📡 Scraping page: http://books.toscrape.com/ca