In [2]:
# Assignment 4 - Web Scraping
# ============================================================
# Q1: Scrape all available books from https://books.toscrape.com/
# ============================================================

import requests
from bs4 import BeautifulSoup
import pandas as pd

book_titles = []
book_prices = []
book_availability = []
book_ratings = []

base_url = "https://books.toscrape.com/catalogue/page-{}.html"

for page in range(1, 51):  # 50 pages total
    url = base_url.format(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    books = soup.find_all("article", class_="product_pod")
    
    if not books:
        break

    for book in books:
        # Title
        title = book.h3.a["title"]

        # Price
        price = book.find("p", class_="price_color").text.strip()

        # Availability
        availability = book.find("p", class_="instock availability").text.strip()

        # Star Rating
        rating_class = book.find("p")["class"][1]
        book_titles.append(title)
        book_prices.append(price)
        book_availability.append(availability)
        book_ratings.append(rating_class)

books_df = pd.DataFrame({
    "Title": book_titles,
    "Price": book_prices,
    "Availability": book_availability,
    "Star Rating": book_ratings
})

books_df.to_csv("books.csv", index=False)
print("âœ… Q1 Completed: books.csv file created successfully!")

KeyboardInterrupt: 

In [None]:
# ============================================================
# Q2: Scrape IMDB Top 250 Movies
# ============================================================

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

url = "https://www.imdb.com/chart/top/"
driver.get(url)
time.sleep(3)

movies = driver.find_elements(By.XPATH, '//tbody[@class="ipc-metadata-list-summary"]/tr')

ranks = []
titles = []
years = []
ratings = []

for movie in movies:
    try:
        rank = movie.find_element(By.XPATH, './/span[@class="ipc-metadata-list-summary-item__rank"]').text.strip('.')
        title = movie.find_element(By.XPATH, './/h3').text
        year = movie.find_element(By.XPATH, './/span[contains(@class,"title")]').text.strip('()')
        rating = movie.find_element(By.XPATH, './/span[@class="ipc-rating-star"]').text
        ranks.append(rank)
        titles.append(title)
        years.append(year)
        ratings.append(rating)
    except:
        continue

driver.quit()

imdb_df = pd.DataFrame({
    "Rank": ranks,
    "Title": titles,
    "Year": years,
    "IMDB Rating": ratings
})

imdb_df.to_csv("imdb_top250.csv", index=False)
print("âœ… Q2 Completed: imdb_top250.csv file created successfully!")


In [None]:

# ============================================================
# Q3: Scrape Weather Information from TimeandDate.com
# ============================================================

url = "https://www.timeanddate.com/weather/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

cities = []
temperatures = []
conditions = []

# Find the weather table
table = soup.find("table", class_="zebra fw tb-theme")
rows = table.find_all("tr")

for row in rows[1:]:
    cols = row.find_all("td")
    if len(cols) >= 3:
        city = cols[0].text.strip()
        temp = cols[1].text.strip()
        cond = cols[2].text.strip()
        cities.append(city)
        temperatures.append(temp)
        conditions.append(cond)

weather_df = pd.DataFrame({
    "City": cities,
    "Temperature": temperatures,
    "Condition": conditions
})

weather_df.to_csv("weather.csv", index=False)
print("âœ… Q3 Completed: weather.csv file created successfully!")

# ============================================================
# All Tasks Done
# ============================================================
print("\nðŸŽ‰ All three tasks completed successfully! CSV files generated:\n- books.csv\n- imdb_top250.csv\n- weather.csv")