In [2]:
#set dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy import stats

In [3]:
# load aviation data
aviation_data = pd.read_csv("./dataset/airline_data.csv")
aviation_data.head()

Unnamed: 0,flight_date,year,month,carrier,carrier_name,airport,airport_name,arr_flights,arr_del15,carrier_ct,...,security_ct,late_aircraft_ct,arr_cancelled,arr_diverted,arr_delay,carrier_delay,weather_delay,nas_delay,security_delay,late_aircraft_delay
0,12/2023,2023,12,9E,Endeavor Air Inc.,ABE,"Allentown/Bethlehem/Easton, PA: Lehigh Valley ...",72.0,5.0,2.46,...,0.0,0.81,0.0,0.0,672.0,61.0,574.0,20.0,0.0,17.0
1,12/2023,2023,12,9E,Endeavor Air Inc.,AEX,"Alexandria, LA: Alexandria International",62.0,7.0,4.25,...,0.0,1.75,0.0,0.0,348.0,252.0,0.0,33.0,0.0,63.0
2,12/2023,2023,12,9E,Endeavor Air Inc.,AGS,"Augusta, GA: Augusta Regional at Bush Field",95.0,10.0,5.94,...,0.0,3.0,0.0,0.0,859.0,536.0,0.0,47.0,0.0,276.0
3,12/2023,2023,12,9E,Endeavor Air Inc.,ALB,"Albany, NY: Albany International",23.0,2.0,0.56,...,0.0,1.44,1.0,0.0,75.0,9.0,0.0,0.0,0.0,66.0
4,12/2023,2023,12,9E,Endeavor Air Inc.,ATL,"Atlanta, GA: Hartsfield-Jackson Atlanta Intern...",2111.0,256.0,76.88,...,0.0,117.94,1.0,0.0,21424.0,8906.0,732.0,1487.0,0.0,10299.0


In [4]:
print(aviation_data["flight_date"].unique())

['12/2023' '11/2023' '10/2023' '9/2023' '8/2023' '7/2023' '6/2023'
 '5/2023' '4/2023' '3/2023' '2/2023' '1/2023' '12/2022' '11/2022'
 '10/2022' '9/2022' '8/2022' '7/2022' '6/2022' '5/2022' '4/2022' '3/2022'
 '2/2022' '1/2022' '12/2021' '11/2021']


In [10]:
flights = aviation_data.to_dict(orient='records')

filtered_flights = []

for flight in flights:
    flight_date = flight["flight_date"]
    if flight_date.startswith("11") or flight_date.startswith("12"):
        filtered_flights.append(flight)
        
print(len(filtered_flights))

11445


In [12]:
formatted_flights = []

random_flights = random.sample(filtered_flights, min(200, len(filtered_flights)))

print(f"Number of randomly selected flights: {len(random_flights)}")

Number of randomly selected flights: 200


In [13]:
for flight in random_flights[:5]:
        print(flight["flight_date"])

11/2022
12/2022
11/2022
12/2021
12/2023


In [15]:
for flight in random_flights:
    airline_name = flight["carrier_name"]
    arrival_airport = flight["airport"]
    date = flight["flight_date"]
    airline_delay = flight["carrier_delay"]
    weather_delay = flight["weather_delay"]
    nas_delay = flight["nas_delay"]
    security_delay = flight["security_delay"]
    late_aircraft_delay = flight["late_aircraft_delay"]
    count_airline_delay = flight["carrier_ct"]
    count_weather_delay = flight["weather_ct"]
    count_nas_delay = flight["nas_ct"]
    count_security_delay = flight["security_ct"]
    count_late_aircraft_delay = flight["late_aircraft_ct"]

    # NAS is the National Aviation System
    formatted_flights.append({
        "Date": date,
        "Airline Name": airline_name,
        "Arrival Airport": arrival_airport,
        "Delay due to Airline in Mins": airline_delay,
        "Delay due to Weather in Mins": weather_delay,
        "Delay due to NAS in Mins": nas_delay,
        "Delay due to Security in Mins": security_delay,
        "Delay due to Late Aircraft in Mins": late_aircraft_delay,
        "# of Airline Delays": count_airline_delay,
        "# of Weather Delays": count_weather_delay,
        "# of NAS Delays": count_nas_delay,
        "# of Security Delays": count_security_delay,
        "# of Late Aircraft Delays": count_late_aircraft_delay
    })

In [16]:
flight_data_df = pd.DataFrame(formatted_flights)

print(flight_data_df)

        Date                             Airline Name Arrival Airport  \
0    11/2022                  Delta Air Lines Network             LAX   
1    12/2022                  Alaska Airlines Network             KTN   
2    11/2022                        Frontier Airlines             FLL   
3    12/2021  GoJet Airlines LLC d/b/a United Express             XNA   
4    12/2023                          Spirit Airlines             MCO   
..       ...                                      ...             ...   
195  11/2021                    SkyWest Airlines Inc.             BWI   
196  12/2021                       Mesa Airlines Inc.             FSD   
197  12/2023                 United Air Lines Network             OKC   
198  11/2022                  Delta Air Lines Network             FLL   
199  11/2021                 United Air Lines Network             FCA   

     Delay due to Airline in Mins  Delay due to Weather in Mins  \
0                          9910.0                       

In [17]:
flight_data_df.to_csv("flights.csv", index=True, mode='w')

flight_data_df = pd.read_csv("flights.csv", index_col=0)

flight_data_df.head()

Unnamed: 0,Date,Airline Name,Arrival Airport,Delay due to Airline in Mins,Delay due to Weather in Mins,Delay due to NAS in Mins,Delay due to Security in Mins,Delay due to Late Aircraft in Mins,# of Airline Delays,# of Weather Delays,# of NAS Delays,# of Security Delays,# of Late Aircraft Delays
0,11/2022,Delta Air Lines Network,LAX,9910.0,1413.0,4793.0,82.0,6847.0,160.61,12.54,167.61,1.92,90.33
1,12/2022,Alaska Airlines Network,KTN,877.0,138.0,603.0,0.0,1202.0,11.61,1.88,17.36,0.0,15.14
2,11/2022,Frontier Airlines,FLL,1365.0,1.0,1088.0,0.0,3796.0,19.53,0.05,19.05,0.0,34.37
3,12/2021,GoJet Airlines LLC d/b/a United Express,XNA,61.0,53.0,14.0,0.0,137.0,1.02,1.0,0.37,0.0,1.61
4,12/2023,Spirit Airlines,MCO,10885.0,213.0,14597.0,283.0,17348.0,141.81,3.47,264.12,5.32,208.28


In [None]:
#import json
#import requests

#aviation_stack_api = "c612023093ef04f3142c66defcf82a7a"
#base_url = "https://api.aviationstack.com/v1/flights"

#date_from = "2018-11-01"
#date_to = "2023-12-31"

#url = f"{base_url}?access_key={aviation_stack_api}&flight_date={date_from},{date_to}"

#response = requests.get(url)
#data = response.json()

#print(data)

In [None]:
#flights = data.get('data', [])

#filtered_flights = []

#for flight in flights:
    #flight_date = pd.to_datetime(flight["flight_date"]).date()
    #if (flight_date.year >= 2018 and flight_date.year <= 2023) and (flight_date.month in [11, 12]):
        #filtered_flights.append(flight)

#formatted_flights = []

#random_flights = random.sample(filtered_flights, min(200, len(filtered_flights)))

#for flight in random_flights:
    #flight_number = flight["flight"]["number"]
    #flight_date = flight["flight_date"]
    #airline_name = flight["airline"]["name"]
    #scheduled_departure = flight["departure"]["scheduled"]
    #estimated_departure = flight["departure"]["estimated"]
    #actual_departure = flight["departure"]["actual"]
    #dep_iata = flight["departure"]["iata"]
    #arr_iata = flight["arrival"]["iata"]
    #flight_status = flight["flight_status"]

    #formatted_flights.append({"Flight Number": flight_number,
                        #"Date": flight_date,
                        #"Airline": airline_name,
                        #"Scheduled Departure": scheduled_departure,
                        #"Estimated Departure": estimated_departure,
                        #"Actual Departure": actual_departure,
                        #"Departure Location": dep_iata,
                        #"Arrival Location": arr_iata,
                        #"Flight Status": flight_status})

### Create Visualizations for the Data