In [50]:
import requests
from bs4 import BeautifulSoup
import csv
import time

In [51]:
BASE_URL = "https://quotes.toscrape.com/page/{}/"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
OUTPUT_FILE = "quotes_dataset.csv"

In [52]:
def fetch_page(url):
    try:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching URL: {url}\n{e}")
        return None


In [53]:
def parse_quotes(html):
    soup = BeautifulSoup(html, "lxml")
    quote_cards = soup.find_all("div", class_="quote")

    extracted_data = []

    for card in quote_cards:
        text = card.find("span", class_="text").text.strip()
        author = card.find("small", class_="author").text.strip()
        tags = [tag.text for tag in card.find_all("a", class_="tag")]


        extracted_data.append({
            "text": text,
            "author": author,
            "tags": ", ".join(tags)
        })
        return extracted_data

In [54]:
def save_to_csv(data, filename):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=["text", "author", "tags"])
        writer.writeheader()
        writer.writerows(data)

In [55]:
def run_scraper(pages=10):
    all_quotes = []


    for page in range(1, pages + 1):
        url = BASE_URL.format(page)
        print(f"Scraping page {page}: {url}")


        html = fetch_page(url)
        if html is None:
            continue


        page_data = parse_quotes(html)
        if not page_data:
            print("No more quotes found. Stopping.")
            break


        all_quotes.extend(page_data)
        time.sleep(1) # polite delay


    save_to_csv(all_quotes, OUTPUT_FILE)
    print(f"Scraping completed! Saved {len(all_quotes)} quotes to {OUTPUT_FILE}")

In [56]:
    if __name__ == "__main__":
        run_scraper(pages=5)

Scraping page 1: https://quotes.toscrape.com/page/1/
Scraping page 2: https://quotes.toscrape.com/page/2/
Scraping page 3: https://quotes.toscrape.com/page/3/
Scraping page 4: https://quotes.toscrape.com/page/4/
Scraping page 5: https://quotes.toscrape.com/page/5/
Scraping completed! Saved 5 quotes to quotes_dataset.csv
