In [4]:
# Q1 - Books to Scrape
import requests
from bs4 import BeautifulSoup
import pandas as pd

base_url = "https://books.toscrape.com/catalogue/page-{}.html"
books = []

page = 1
while True:
    url = base_url.format(page)
    response = requests.get(url)
    if response.status_code != 200:
        break
    soup = BeautifulSoup(response.text, 'html.parser')

    articles = soup.find_all('article', class_='product_pod')
    if not articles:
        break

    for article in articles:
        title = article.h3.a['title']
        price = article.find('p', class_='price_color').text.strip()
        availability = article.find('p', class_='instock availability').text.strip()
        star = article.p['class'][1]  # e.g., 'One', 'Two', 'Three'
        books.append({
            "Title": title,
            "Price": price,
            "Availability": availability,
            "Star Rating": star
        })

    page += 1

df_books = pd.DataFrame(books)
df_books.to_csv("books.csv", index=False)
print("✅ Books data saved to books.csv")


✅ Books data saved to books.csv


In [3]:
%pip install requests
%pip install bs4

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting beautifulsoup4 (from bs4)
  Downloading beautifulsoup4-4.14.2-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4->bs4)
  Downloading soupsieve-2.8-py3-none-any.whl.metadata (4.6 kB)
Downloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading beautifulsoup4-4.14.2-py3-none-any.whl (106 kB)
Downloading soupsieve-2.8-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4, bs4

   ---------------------------------------- 0/3 [soupsieve]
   ---------------------------------------- 0/3 [soupsieve]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ------------- -------------------------- 1/3 [beautifulsoup4]
   ----------------------


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
# Q2 - IMDB Top 250
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time

# Initialize driver (make sure you have ChromeDriver or use Edge/Firefox)
driver = webdriver.Chrome()
driver.get("https://www.imdb.com/chart/top/")
time.sleep(3)

movies = []

rows = driver.find_elements(By.CSS_SELECTOR, "li.ipc-metadata-list-summary-item")
for i, row in enumerate(rows, start=1):
    title_elem = row.find_element(By.CSS_SELECTOR, "h3.ipc-title__text").text
    year_elem = row.find_element(By.CSS_SELECTOR, "span.cli-title-metadata-item").text
    rating_elem = row.find_element(By.CSS_SELECTOR, "span.ipc-rating-star--rating").text
    movies.append({
        "Rank": i,
        "Title": title_elem,
        "Year": year_elem,
        "IMDB Rating": rating_elem
    })

driver.quit()

df_imdb = pd.DataFrame(movies)
df_imdb.to_csv("imdb_top250.csv", index=False)
print("✅ IMDB data saved to imdb_top250.csv")


✅ IMDB data saved to imdb_top250.csv


In [6]:
%pip install selenium

Collecting selenium
  Downloading selenium-4.38.0-py3-none-any.whl.metadata (7.5 kB)
Collecting trio<1.0,>=0.31.0 (from selenium)
  Downloading trio-0.31.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket<1.0,>=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting typing_extensions<5.0,>=4.15.0 (from selenium)
  Downloading typing_extensions-4.15.0-py3-none-any.whl.metadata (3.3 kB)
Collecting websocket-client<2.0,>=1.8.0 (from selenium)
  Downloading websocket_client-1.9.0-py3-none-any.whl.metadata (8.3 kB)
Collecting sortedcontainers (from trio<1.0,>=0.31.0->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio<1.0,>=0.31.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting sniffio>=1.3.0 (from trio<1.0,>=0.31.0->selenium)
  Downloading sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting wsproto>=0.14 (from tri


[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
# Q3 - Weather Data
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://www.timeanddate.com/weather/"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

cities = []
table = soup.find('table', class_='zebra tb-theme fw tb-wt zebra va-m')

if table:
    rows = table.find_all('tr')[1:]  # skip header
    for row in rows:
        cols = row.find_all('td')
        if len(cols) >= 4:
            city = cols[0].text.strip()
            temperature = cols[1].text.strip()
            condition = cols[2].text.strip()
            cities.append({
                "City": city,
                "Temperature": temperature,
                "Condition": condition
            })

df_weather = pd.DataFrame(cities)
df_weather.to_csv("weather.csv", index=False)
print("✅ Weather data saved to weather.csv")


✅ Weather data saved to weather.csv
