In [1]:
import pandas as pd
import sqlite3
from bs4 import BeautifulSoup
import requests

# 1. Setup
url = "https://www.scrapethissite.com/pages/simple/"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

data = []

# 2. BETTER SELECTION logic
# Instead of finding rows and slicing [3:-1], we find the specific cards directly.
# This removes the need for the nested 'for m in main' loop.
countries = soup.find_all("div", class_="country")

for country in countries:
    # We can handle errors if a specific field is missing using try/except (optional but good)
    try:
        name = country.find("h3").text.strip()
        capital = country.find("span", class_="country-capital").text.strip()
        population = country.find("span", class_="country-population").text.strip()
        area = country.find("span", class_="country-area").text.strip()

        data.append({
            "Country": name,
            "Capital": capital,
            "Population": int(population),
            "Area": float(area)
        })
        # Optional: Print less frequently to speed up code if you have 1000s of items
        # print(f'{name} Collected') 
        
    except AttributeError:
        # This prevents the script from crashing if one country is missing data
        print(f"Skipping a country due to missing data")

# 3. Create DataFrame
df = pd.DataFrame(data)

# 4. Saving
df.to_csv("Country.csv", index=False)
df.to_excel("Country.xlsx", index=False)
df.to_json("Country.json", orient='records', indent=4)

# 5. SQL with Context Manager (The "Pro" way)
# This ensures the connection closes safely every time
with sqlite3.connect("country.db") as connection:
    df.to_sql("Countries_data", connection, if_exists="replace", index=False)
    print("All data saved successfully!")

All data saved successfully!
