# Section 1: Downloading Flight datasets

This section downloads both departure and arrival dataset from [tablebuilder.singsat](https://tablebuilder.singstat.gov.sg/), Depart of Statistics Singapore website. Files are dowloaded in our google drive and extract from there in the code.

In [None]:
#To download from google drive
pip install gdown

## Section 1.1 : Downloading Flight Departure 

In [None]:
import gdown
import pandas as pd

# Google Drive Departure Dataset 23/24 ID
file_id = "1CfEgn8RMfwpG_0RyUc3RvQFf03_LWQRQ"
file_name = "departure-23-24.csv"

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_name, quiet=False)

# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(file_name, header=9)

# Remove NaN entries
df = df.dropna()

df.set_index(df.columns[0], inplace=True)

# Transpose the DataFrame (convert months → columns, countries → rows)
df = df.T.reset_index()

# Rename columns
df.rename(columns={"index": "Country"}, inplace=True)

# Reshape using melt()
df_long = df.melt(id_vars=["Country"], var_name="Month (YYYY-MM)", value_name="Departures")

# Save or display results
df_long.to_csv("departures_refactor.csv", index=False)
print(df_long.head())  # View first few rows


In [None]:

pd.set_option('display.max_colwidth', None)  # Show full content of each column
pd.set_option('display.max_rows', None)      # Show all rows
pd.set_option('display.max_columns', None)

# Remove the common prefix and suffix from the 'Country' column
df_long['Country'] = df_long['Country'].str.replace('Number Of Air Passenger Departures -> ', '', regex=False)
df_long['Country'] = df_long['Country'].str.replace(' (Number)', '', regex=False)

# Display the updated DataFrame
print(df_long.head(30))

In [None]:
# Create a new DataFrame with only country names
df_countries = df_long[df_long['Country'].str.contains('->')].copy()

# Extract the country names by splitting and taking the last part
df_countries['Country'] = df_countries['Country'].apply(lambda x: x.split('->')[-1].strip())

# Display the new DataFrame
df_countries