## Predicting future air traffic from past growth rates

Import the Annual Growth Rate dataset from the online repository of this `jupyter notebooks` client into a `pandas` dataframe.

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# read-in the dataset from github into a pandas dataframe
growth_rates_df = pd.read_csv('Annual_Air_Traffic.csv', sep=',')

# print out the pandas dataframe to screen
growth_rates_df

Calculate the lower quartile, median and upper quartile growth rate (for the total passengers) for each airport.

In [None]:
# create a new dataframe to store the growth rates
growth_stats_df = pd.DataFrame(columns=['Airport', 'Lower_Quartile', 'Median', 'Upper_Quartile'])

# add each airport to the dataframe
for i in range(0, len(growth_rates_df)):
    # only add airport to the list if it hasn't already been added
    if i == 0 or not growth_rates_df['Airport'][i] == growth_stats_df['Airport'][len(growth_stats_df) - 1]:
        growth_stats_df.loc[len(growth_stats_df)] = [growth_rates_df['Airport'][i], 0, 0, 0]

# calculate lower quartile, median and upper quartile for each airport
for j in range(0, len(growth_stats_df)):
    # select only rows containing data for the jth airport
    airport_rates_df = growth_rates_df[growth_rates_df['Airport'] == growth_stats_df['Airport'][j]]
    
    # calculate the statistics for this airport
    growth_stats_df['Lower_Quartile'][j] = airport_rates_df['Total_Growth_Rate'].quantile(0.25)
    growth_stats_df['Median'][j] = airport_rates_df['Total_Growth_Rate'].quantile(0.5)
    growth_stats_df['Upper_Quartile'][j] = airport_rates_df['Total_Growth_Rate'].quantile(0.75)

growth_stats_df

Predict future air traffic at each airport for an additional 30 years based on the range of growth rates.

In [None]:
# set number of years to predict air traffic as a variable
nyears = 30

# create a copy of the dataframe
future_traffic_df = growth_rates_df.copy()

# add new columns for low, medium and high growth rate, and remove unwanted columns
future_traffic_df['Passengers_low'] = future_traffic_df['Total_Passengers']
future_traffic_df['Passengers_medium'] = future_traffic_df['Total_Passengers']
future_traffic_df['Passengers_high'] = future_traffic_df['Total_Passengers']
future_traffic_df = future_traffic_df.drop(columns=['Domestic_Passengers', 'International_Passengers', \
                'Total_Passengers', 'Domestic_Growth_Rate', 'International_Growth_Rate', 'Total_Growth_Rate'])

# find the latest year in the database
current_year = future_traffic_df['Year'].max()
    
# add projected traffic numbers for each airport for the next 30 years
for i in range(0, len(growth_stats_df)):
    # find the corresponding number of passengers
    current_passengers = int(future_traffic_df['Passengers_medium'][(future_traffic_df['Airport'] == \
                    growth_stats_df['Airport'][i]) & (future_traffic_df['Year'] == current_year)].values)
    
    for j in range(0, nyears):
        future_traffic_df.loc[len(future_traffic_df)] = [growth_stats_df['Airport'][i], current_year + j + 1, \
                    int(current_passengers*(1 + growth_stats_df['Lower_Quartile'][i]/100)**(j + 1)), \
                    int(current_passengers*(1 + growth_stats_df['Median'][i]/100)**(j + 1)), \
                    int(current_passengers*(1 + growth_stats_df['Upper_Quartile'][i]/100)**(j + 1))]
        
# sort the dataframe by the `Airport' name then `Year', so it is ordered again
future_traffic_df = future_traffic_df.sort_values(by=['Airport', 'Year'])

future_traffic_df

Write `pandas` dataframe to a `Microsoft Excel` file named 'Future_Air_Traffic.csv'.

In [None]:
future_traffic_df.to_csv("Future_Air_Traffic.csv", index=False)