In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Function to extract table data
def extract_airport_data(soup):
    continent = None
    region = None
    country = None
    data = []

    # Iterate through the tags to extract the continent, region, and table information
    for tag in soup.find_all(['h2', 'h3', 'h4', 'table']):
        if tag.name == 'h2':  # Continent
            continent = tag.text.strip().replace('[edit]', '')

        elif tag.name == 'h3':  # Region
            region = tag.text.strip().replace('[edit]', '')

        elif tag.name == 'h4':  # Country
            country = tag.text.strip().replace('[edit]', '')

        elif tag.name == 'table' and 'wikitable' in tag.get('class', []):  # Airport tables
            # Extract table rows
            rows = tag.find_all('tr')
            for row in rows[1:]:  # Skip the header
                cols = row.find_all('td')
                if len(cols) >= 3:
                    location = cols[0].text.strip()
                    airport = cols[1].text.strip()
                    iata_code = cols[2].text.strip()

                    # Append the row to the data list
                    data.append({
                        'Location': location,
                        'Airport': airport,
                        'IATA Code': iata_code,
                        'Country': country,
                        'Continent': continent,
                        'Region': region
                    })

    return data

# URL of the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_international_airports"

# Request the page content
response = requests.get(url)

# Force the correct encoding to handle special characters
response.encoding = response.apparent_encoding

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Extract the data
airport_data = extract_airport_data(soup)

# Convert the data into a pandas DataFrame
df = pd.DataFrame(airport_data)

# Output the data
print(df)

# You can save the dataframe to a CSV file if needed
df.to_csv('international_airports.csv', index=False)

# Save the DataFrame to an Excel file (XLSX format)
df.to_excel('international_airports.xlsx', index=False, engine='openpyxl')
