# Aviation Data

In [1]:
import os
os.chdir("../../")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import itertools

In [2]:
ad = pd.read_excel(os.getcwd() +"/data/tourism/aviation_seats_flights_pic.xlsx")
ad.columns = [col.lower() for col in ad.columns]
ad["date"] = pd.to_datetime(ad["date"])
ad.head(5)

Unnamed: 0,country,iso,region,date,aircraft_type,seats_arrivals_domestic,seats_arrivals_interregional,seats_arrivals_intraregional,seats_arrivals_intl,seats_arrivals_total,available_seat_kilometers,number_of_flights_domestic,number_of_flights_interregional,number_of_flights_intraregional,number_of_flights_intl,number_of_flights_total
0,Fiji,FJ,East Asia & Pacific,2019-01-01,passenger,839,273,3480,3753,4592,14304160.0,8,1,10,11,19
1,Fiji,FJ,East Asia & Pacific,2019-01-02,passenger,974,313,3471,3784,4758,14956100.0,8,1,10,11,19
2,Fiji,FJ,East Asia & Pacific,2019-01-03,passenger,1190,443,3675,4118,5308,15921430.0,10,2,12,14,24
3,Fiji,FJ,East Asia & Pacific,2019-01-04,passenger,831,586,3159,3745,4576,14573340.0,7,2,12,14,21
4,Fiji,FJ,East Asia & Pacific,2019-01-05,passenger,744,273,4752,5025,5769,17734490.0,7,1,12,13,20


In [3]:
ad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10611 entries, 0 to 10610
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   country                          10611 non-null  object        
 1   iso                              10611 non-null  object        
 2   region                           10611 non-null  object        
 3   date                             10611 non-null  datetime64[ns]
 4   aircraft_type                    10566 non-null  object        
 5   seats_arrivals_domestic          10611 non-null  int64         
 6   seats_arrivals_interregional     10611 non-null  int64         
 7   seats_arrivals_intraregional     10611 non-null  int64         
 8   seats_arrivals_intl              10611 non-null  int64         
 9   seats_arrivals_total             10611 non-null  int64         
 10  available_seat_kilometers        10611 non-null  float64  

In [4]:
select_cols = ["country", "date", "seats_arrivals_intl", "seats_arrivals_total",
               "number_of_flights_intl", "number_of_flights_total"]

# Subset to the passenger
ad = (ad[ad["aircraft_type"] == "passenger"][select_cols]
      .reset_index().drop("index", axis=1))
ad.head(5)

Unnamed: 0,country,date,seats_arrivals_intl,seats_arrivals_total,number_of_flights_intl,number_of_flights_total
0,Fiji,2019-01-01,3753,4592,11,19
1,Fiji,2019-01-02,3784,4758,11,19
2,Fiji,2019-01-03,4118,5308,14,24
3,Fiji,2019-01-04,3745,4576,14,21
4,Fiji,2019-01-05,5025,5769,13,20


In [5]:
daterange = pd.date_range(start=ad.date.min(),
                          end=ad.date.max(),
                          freq="D").tolist()
countries = ad.country.unique().tolist()
comb = itertools.product(daterange, countries)

date_df = pd.DataFrame()
for i in comb:
    row = pd.DataFrame(i).T
    date_df = pd.concat([date_df, row], axis=0)
date_df.columns = ["date", "country"]

In [6]:
ad = date_df.merge(ad, how="left").fillna(0)
ad.sample(5)

Unnamed: 0,date,country,seats_arrivals_intl,seats_arrivals_total,number_of_flights_intl,number_of_flights_total
8158,2020-11-10,Tuvalu,0.0,0.0,0.0,0.0
12344,2021-10-25,Solomon Islands,0.0,0.0,0.0,0.0
15408,2022-07-08,Fiji,4324.0,4973.0,8.0,17.0
8808,2021-01-04,Fiji,273.0,588.0,1.0,5.0
570,2019-02-17,Papua New Guinea,723.0,3572.0,3.0,11.0


In [7]:
ad_7dsum = pd.DataFrame()
for country in countries:
    country_df = (ad[ad.country == country].sort_values(by="date")
                  .reset_index().drop("index", axis=1))
    country_df.iloc[:, -4:] = country_df.iloc[:, -4:].rolling(window=7).sum()
    ad_7dsum = pd.concat([ad_7dsum, country_df], axis=0)

In [8]:
sia_7dsum = (ad_7dsum[["date", "country", "seats_arrivals_intl"]]
             .pivot(index="date", columns="country", values="seats_arrivals_intl")
             .reset_index())
sia_7dsum.columns.name = None
sia_7dsum.head(5)

Unnamed: 0,date,Fiji,Kiribati,Marshall Islands (the),Micronesia (Federated States of),Nauru,Palau,Papua New Guinea,Samoa,Solomon Islands,Tonga,Tuvalu,Vanuatu
0,2019-01-01,,,,,,,,,,,,
1,2019-01-02,,,,,,,,,,,,
2,2019-01-03,,,,,,,,,,,,
3,2019-01-04,,,,,,,,,,,,
4,2019-01-05,,,,,,,,,,,,


In [9]:
from bokeh.palettes import Category20
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models import ColumnDataSource, HoverTool, Legend
output_notebook()

p = figure(height=500, width=780,
           title="International Seats Arrivals (7-Day Sum)",
           x_axis_type="datetime",
           x_axis_label="Date")

hover = HoverTool(tooltips=[("date", "@date{%F}"),
                            ("country", "$name"),
                            ("Intl Seats Arrivals", "@$name")],
                  formatters={'@date': 'datetime'})
p.add_tools(hover)

p.add_layout(Legend(), 'right')
source = ColumnDataSource(sia_7dsum)

for country, color in zip(countries, Category20[12]):
    p.line('date', country, source=source, name=country,
           legend_label=country, color=color)

p.legend.label_text_font_size = '9pt'
p.legend.click_policy = "mute"
p.legend.location = "top_left"

show(p)