Assignment 4: 

In [40]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import pandas as pd
import numpy as np
import plotly.express as px


In [41]:
airline_df = pd.read_csv("airlinedelaycauses_DelayedFlights.csv")
airline_df = airline_df.dropna(axis=0, how='any')
airline_df.isnull().sum()

airline_df = airline_df.drop(columns=['Year', 'DayofMonth', 'DepTime', 'CRSDepTime', 'ArrTime', 'CRSArrTime', 'UniqueCarrier', 'FlightNum', 'TailNum', 'ActualElapsedTime', 'CRSElapsedTime', 'TaxiIn', 'TaxiOut', 'Cancelled', 'CancellationCode', 'Diverted'])

In [43]:
#adding state column

def get_state(iata):
    iata_to_state = {
    'IND': 'IN', 'ISP': 'NY', 'JAN': 'MS', 'JAX': 'FL', 'LAS': 'NV', 
    'LAX': 'CA', 'LBB': 'TX', 'LIT': 'AR', 'MAF': 'TX', 'MCI': 'MO', 
    'MCO': 'FL', 'MDW': 'IL', 'MHT': 'NH', 'MSY': 'LA', 'OAK': 'CA', 
    'OKC': 'OK', 'OMA': 'NE', 'ONT': 'CA', 'ORF': 'VA', 'PBI': 'FL', 
    'PDX': 'OR', 'PHL': 'PA', 'PHX': 'AZ', 'PIT': 'PA', 'PVD': 'RI', 
    'RDU': 'NC', 'RNO': 'NV', 'RSW': 'FL', 'SAN': 'CA', 'SAT': 'TX', 
    'SDF': 'KY', 'SEA': 'WA', 'SFO': 'CA', 'SJC': 'CA', 'SLC': 'UT', 
    'SMF': 'CA', 'SNA': 'CA', 'STL': 'MO', 'TPA': 'FL', 'TUL': 'OK',
    'ABQ': 'NM', 'AMA': 'TX', 'AUS': 'TX', 'BHM': 'AL', 'BNA': 'TN',
    'BOI': 'ID', 'BUF': 'NY', 'BUR': 'CA', 'BWI': 'MD', 'CMH': 'OH',
    'CRP': 'TX', 'DAL': 'TX', 'DEN': 'CO', 'ELP': 'TX', 'FLL': 'FL',
    'GEG': 'WA', 'HOU': 'TX', 'HRL': 'TX', 'IAD': 'VA', 'ALB': 'NY',
    'BDL': 'CT', 'DTW': 'MI', 'CLE': 'OH', 'ORD': 'IL', 'SYR': 'NY',
    'EWR': 'NJ', 'IAH': 'TX', 'COS': 'CO', 'MRY': 'CA', 'LGB': 'CA',
    'FAT': 'CA', 'BFL': 'CA', 'EUG': 'OR', 'ICT': 'KS', 'MEM': 'TN',
    'BTV': 'VT', 'MKE': 'WI', 'LFT': 'LA', 'BRO': 'TX', 'PWM': 'ME',
    'MSP': 'MN', 'CLT': 'NC', 'CVG': 'KY', 'GSO': 'NC', 'SHV': 'LA',
    'RIC': 'VA', 'DCA': 'DC', 'DFW': 'TX', 'BGR': 'ME', 'DAY': 'OH',
    'GRR': 'MI', 'CRW': 'WV', 'CAE': 'SC', 'GSP': 'SC', 'XNA': 'AR',
    'GPT': 'MS', 'LGA': 'NY', 'ATL': 'GA', 'SRQ': 'FL', 'LEX': 'KY',
    'LRD': 'TX', 'MOB': 'AL', 'SAV': 'GA', 'MTJ': 'CO', 'MSN': 'WI',
    'AEX': 'TX',  # Amarillo, TX
    'PNS': 'FL',  # Pensacola, FL
    'ROC': 'NY',  # Rochester, NY
    'TYS': 'TN',  # Knoxville, TN
    'HSV': 'AL',  # Huntsville, AL
    'MFE': 'MS',  # Meridian, MS
    'MLU': 'MO',  # Columbia, MO
    'DSM': 'IA',  # Des Moines, IA
    'MGM': 'MS',  # Hattiesburg, MS
    'AVL': 'NC',  # Asheville, NC
    'LCH': 'LA',  # Lake Charles, LA
    'TLH': 'FL',  # Tallahassee, FL
    'CHS': 'SC',  # Charleston, SC
    'VPS': 'FL',  # Valparaiso, FL
    'CLL': 'TX',  # College Station, TX
    'GRK': 'KS',  # Goodland, KS
    'BTR': 'LA',  # Baton Rouge, LA
    'ABE': 'PA',  # Allentown, PA
    'MYR': 'SC',  # Myrtle Beach, SC
    'CHA': 'IL',  # Champaign, IL
    'BOS': 'MA',  # Boston, MA
    'DAB': 'FL',  # Daytona Beach, FL
    'ASE': 'NM',  # Alamogordo, NM
    'ATW': 'WI',  # Appleton, WI
    'BMI': 'IL',  # Bloomington, IL
    'CAK': 'OH',  # Akron, OH
    'CID': 'IA',  # Cedar Rapids, IA
    'CPR': 'CO',  # Colorado Springs, CO
    'EGE': 'KS',  # Dodge City, KS
    'FLG': 'MT',  # Flagler, MT
    'FSD': 'SD',  # Sioux Falls, SD
    'FWA': 'OH',  # Fort Wayne, IN
    'GJT': 'CO',  # Grand Junction, CO
    'GRB': 'WI',  # Green Bay, WI
    'HNL': 'HI',  # Honolulu, HI
    'KOA': 'HI',  # Keahole, HI
    'LAN': 'CA',  # Lancaster, CA
    'LIH': 'HI',  # Lihue, HI
    'MBS': 'MI',  # Muskegon, MI
    'MDT': 'PA',  # State College, PA
    'OGG': 'HI',  # Kahului, HI
    'PSP': 'CA',  # Palm Springs, CA
    'RAP': 'SD',  # Rapid City, SD
    'SBN': 'OH',  # Sandusky, OH
    'SPI': 'IL',  # Springfield, IL
    'TEX': 'TX',  # Texarkana, TX
    'YUM': 'AZ',  # Yuma, AZ
    'AVP': 'PA',  # Wilkes-Barre, PA
    'CWA': 'IA',  # Waterloo, IA
    'DRO': 'CA',  # Redding, CA
    'JFK': 'NY',  # New York, NY
    'ROA': 'NM',  # Roswell, NM
    'TVC': 'MI',  # Traverse City, MI
    'ITO': 'HI',  # Hilo, HI
    'HDN': 'CO',  # Hayden, CO
    'ILM': 'NC',  # Wilmington, NC
    'SBA': 'CA',  # Santa Barbara, CA
    'SBP': 'CA',  # San Luis Obispo, CA
    'CLD': 'MT',  # Great Falls, MT
    'MFR': 'CA',  # Medford, OR
    'HPN': 'NY',  # Hudson Valley, NY
    'MIA': 'FL',  # Miami, FL
    'SGF': 'MO',  # Springfield, MO
    'TRI': 'TN',  # Johnson City, TN
    'SUN': 'FL',  # St. Pete, FL
    'SGU': 'VI', # St. Croix, VI
    'MSO': 'MT',  # Missoula, MT
    'BZN': 'MT',  # Bozeman, MT
    'GTF': 'MT',  # Great Falls, MT
    'BIL': 'MT',  # Billings, MT
    'JAC': 'OR',  # Jackson Hole, WY (Note: potentially ambiguous)
    'MOD': 'CA',  # Modesto, CA
    'FAR': 'ND',  # Fargo, ND
    'GUC': 'CA',  # Eureka, CA
    'AZO': 'MI',  # Kalamazoo, MI
    'PIA': 'IL',  # Peoria, IL
    'MLI': 'IL',  # Moline, IL
    'LNK': 'NE',  # Lincoln, NE
    'FCA': 'MI',  # Traverse City, MI (Cherry Capital Airport)
    'IDA': 'ID', # Idaho Falls, ID
    'PSC': 'MT', # Glasgow, MT
    'RDM': 'OR', # Redmond, OR
    'RDD': 'CA',  # Redding, CA
    'TWF': 'OR', # Twin Falls, ID (Note: potentially ambiguous)
    'SMX': 'CA',  # Santa Maria, CA
    'ACV': 'CA',  # Arcata, CA
    'CEC': 'CA', # Crescent City, CA
    'CIC': 'CA', # Chico, CA
    'PMD': 'CA', # Palmdale, CA
    'EKO': 'WA',  # Forks, WA
    'IYK': 'AK', # King Salmon, AK
    'OXR': 'AK', # Fairbanks, AK
    'IPL': 'AK', # Iliamna, AK
    'PIH': 'AK', # Petersburg, AK
    'BTM': 'AK', # Bethel, AK
    'HLN': 'AK', # Unalakleet, AK
    'BLI': 'AK', # Unalaska, AK
    'RFD': 'AK', # Red Dog, AK
    'COD': 'AK', # Cold Bay, AK
    'SLE': 'AK', # St. Paul Island, AK
    'LWS': 'WA', # Lewis County, WA
    'BIS': 'ND', # Bismarck, ND
    'CDC': 'CO', # Cortez, CO
    'YKM': 'WA', # Yakima, WA
    'SJU': 'PR', # San Juan, PR
    'STT': 'VI', # St. Thomas, VI
    'ANC': 'AK', # Anchorage, AK
    'STX': 'VI', # St. Croix, VI
    'MLB': 'FL', # Melbourne, FL
    'PHF': 'CA', # Palm Springs, CA
    'PFN': 'AK', # St. Paul Island, AK
    'FAY': 'NC', # Fayetteville, NC
    'AGS': 'GA', # Augusta, GA
    'GNV': 'FL', # Gainesville, FL
    'ABY': 'MS', # Aberdeen, MS
    'DHN': 'AL', # Dothan, AL
    'EVV': 'IN', # Evansville, IN
    'FNT': 'MI', # Flint, MI
    'OAJ': 'CA', # Oxnard, CA
    'SWF': 'NY', # Stewart, NY
    'EWN': 'NC', # New Bern, NC
    'MEI': 'MI', # Menominee, MI
    'GTR': 'PA', # Gettysburg, PA
    'LYH': 'VA', # Lynchburg, VA
    'HHH': 'HI', # Honolulu, HI
    'EYW': 'FL', # Key West, FL
    'VLD': 'CA', # Valdosta, GA
    'CSG': 'GA', # Columbus, GA
    'ACY': 'NY', # Atlantic City, NJ
    'FSM': 'MO', # Jefferson City, MO
    'MCN': 'NC', # Asheville, NC
    'CHO': 'VA', # Charlottesville, VA
    'TOL': 'OH', # Toledo, OH
    'FLO': 'SC', # Florence, SC
    'BQK': 'NY', # Binghamton, NY
    'SCE': 'CA', # San Clemente Island, CA
    'LAW': 'KS', # Lawrence, KS
    'SPS': 'MO', # Springfield, MO
    'ABI': 'TX', # Abilene, TX
    'TYR': 'TX', # Tyler, TX
    'GGG': 'GA', # Albany, GA
    'ACT': 'AK', # Tatalina, AK
    'SJT': 'TX', # San Angelo, TX
    'TXK': 'TX', # Texarkana, TX
    'CMI': 'WA', # Moses Lake, WA
    'ROW': 'ND', # Williston, ND
    'RST': 'MN', # Rochester, MN
    'MQT': 'MI', # Marquette, MI
    'LSE': 'LA', # Lafayette, LA
    'DBQ': 'IA', # Dubuque, IA
    'GFK': 'NY', # Griffiss, NY
    'DLH': 'MN', # Duluth, MN
    'MOT': 'MO', # Joplin, MO
    'SUX': 'IA', # Sioux City, IA
    'PLN': 'KS', # Plainview, KS
    'BGM': 'NY', # Binghamton, NY
    'ERI': 'PA', # Erie, PA
    'ALO': 'OR', # Albany, OR
    'CMX': 'MI', # Cadillac, MI
    'RHI': 'RI', # Block Island, RI
    'ELM': 'WA', # Ellensburg, WA
    'KTN': 'TN', # Knoxville, TN
    'JNU': 'AK', # Juneau, AK
    'SIT': 'AK', # Sitka, AK
    'PSG': 'AK', # Petersburg, AK
    'CDV': 'AK', # Cordova, AK
    'YAK': 'AK', # Yakutat, AK
    'BET': 'AK', # Bethel, AK
    'BRW': 'AK', # Barrow, AK
    'SCC': 'AK', # Wrangell, AK
    'FAI': 'AK', # Fairbanks, AK
    'ADQ': 'AK', # Kodiak, AK
    'WRG': 'AK', # Wrangell, AK
    'OME': 'AK', # Nome, AK
    'OTZ': 'AK', # Kotzebue, AK
    'ADK': 'AK', # Adak Island, AK
    'PSE': 'PR', # Ponce, PR
    'BQN': 'PR', # Aguadilla, PR
    'BPT': 'TX', # Beaumont, TX
    'RKS': 'KS', # Pittsburg, KS
    'GCC': 'KS', # Garden City, KS
    'MKG': 'KY', # Lexington, KY
    'DLG': 'AK', # Dillingham, AK
    'AKN': 'AK', # King Salmon, AK
    'LWB': 'TX', # Lubbock, TX
    'ACK': 'MA', # Nantucket, MA
    'WYS': 'WY', # Cheyenne, WY
    'BJI': 'AK', # Big Lake, AK
    'INL': 'ID', # Pocatello, ID
    'GST': 'CA', # Grants Pass, OR (Note: Potentially ambiguous)
    'PUB': 'CA', # Puebla, CA
    'OTH': 'KS', # Goodland, KS
    'LMT': 'MT', # Lewistown, MT
    'ITH': 'NY', # Ithaca, NY
    'HTS': 'CA', # Hemet, CA
    'PIR': 'OR', # Newport, OR
    }
    return iata_to_state.get(iata.upper())
    
zeros = [[0]] * len(airline_df)
if "State Origin" not in airline_df:
    airline_df.insert(0, "State Origin", zeros, False)
    airline_df.insert(1, "State Destination", zeros, False)

airline_df['State Origin'] = airline_df['Origin'].apply(get_state)
airline_df['State Destination'] = airline_df['Dest'].apply(get_state)
                

In [44]:
arrival_delays = airline_df.groupby('State Destination')['ArrDelay'].mean().reset_index()
departure_delays = airline_df.groupby('State Origin')['DepDelay'].mean().reset_index()

arrival_delays.columns = ['State', 'Average Arrival Delay']
departure_delays.columns = ['State', 'Average Departure Delay']

state_delay_df = pd.merge(departure_delays, arrival_delays, on='State', how='outer').fillna(0)
state_delay_df['Total Average Delay'] = round(state_delay_df['Average Arrival Delay'] + state_delay_df['Average Departure Delay'], 1)

In [6]:
map_fig = px.choropleth(
    state_delay_df,
    locations='State', 
    locationmode='USA-states',
    color='Total Average Delay',   
    scope='usa',  
    color_continuous_scale='temps',
    title='Total Average Delay by State',
    labels={'Total Average Delay': 'Average Delay (min)'}
)

map_fig.show()

In [45]:
new_DepAirport_data = airline_df.groupby('Origin').agg(
    AvgDepDelay=('DepDelay', 'mean'),
    FlightCount_Dep=('Origin', 'size')
).reset_index()
new_DepAirport_data.rename(columns={'Origin': 'Airport'}, inplace=True)
    
new_ArrAirport_data = airline_df.groupby('Dest').agg( 
    AvgArrDelay=('ArrDelay', 'mean'),
    FlightCount_Arr=('Dest', 'size') 
).reset_index() 
new_ArrAirport_data.rename(columns={'Dest': 'Airport'}, inplace=True)
    
new_airport_data = pd.merge(
    new_DepAirport_data, 
    new_ArrAirport_data, 
    on='Airport', 
    how='inner'  
)

new_airport_data['TotalFlightCount'] = new_airport_data['FlightCount_Dep'] + new_airport_data['FlightCount_Arr']
new_airport_data['State'] = new_airport_data['Airport'].apply(get_state)

average_delays = airline_df.groupby('Origin').agg(
    AvgCarrierDelay=('CarrierDelay', 'mean'),
    AvgWeatherDelay=('WeatherDelay', 'mean'),
    AvgNASDelay=('NASDelay', 'mean'),
    AvgSecurityDelay=('SecurityDelay', 'mean'),
    AvgLateAircraftDelay=('LateAircraftDelay', 'mean')
).reset_index()

airport_df = pd.merge(new_airport_data, average_delays, left_on='Airport', right_on='Origin', how='left')

airport_df.drop(columns=['Origin'], inplace=True, errors='ignore')

In [28]:
def create_interactive_plot(airline_df):    
    fig = px.scatter(
        new_airport_data, 
        x="AvgArrDelay", 
        y="AvgDepDelay", 
        size="TotalFlightCount",  
        color="State",  
        hover_name="Airport",  
        title="Airport Performance: Arrival vs Departure Delay",
        labels={"AveArrDelay": "Average Arrival Delay", "AveDepDelay": "Average Departure Delay"},
        size_max=60
    )
    
    fig.update_layout(
        width = 1000,  
        height = 600, 

        xaxis=dict(
            title="Average Arrival Delay (Minutes)",
            range=[30, 100],
              
        ),
        yaxis=dict(
            title="Average Departure Delay (Minutes)",
            range=[30, 100]  
        ),
        
        updatemenus=[
            {
                'buttons': [
                    {
                        'label': 'All Airports',
                        'method': 'update',
                        'args': [{'visible': [True] * len(new_airport_data)}, {'title': 'All Airport'}]
                    },
                ] + [
                    {
                        'label': state,
                        'method': 'update',
                        'args': [
                            {'visible': new_airport_data['State'] == state}, 
                            {'title': f"Airport: {state}"}
                        ]
                    }
                    for state in new_airport_data['State'].unique()
                    
                ],
                
                'direction': 'down',
                'showactive': True,
                'x': 1.17,
                'y': 1.1,
                'xanchor': 'right',
                'yanchor': 'top'
            }
        ]
    )

    fig.show()
    return new_airport_data
create_interactive_plot(airport_df)




Unnamed: 0,Airport,AvgDepDelay,FlightCount_Dep,AvgArrDelay,FlightCount_Arr,TotalFlightCount,State
0,ABE,75.726208,683,67.791077,919,1602,PA
1,ABI,92.428571,266,51.308511,564,830,TX
2,ABQ,53.894890,6184,52.619906,6993,13177,NM
3,ABY,63.424528,212,61.182979,235,447,MS
4,ACK,88.173913,138,73.472000,125,263,MA
...,...,...,...,...,...,...,...
296,WYS,58.333333,3,44.812500,16,19,WY
297,XNA,71.751935,2584,63.583815,2941,5525,AR
298,YAK,68.143791,153,66.551546,194,347,AK
299,YKM,31.500000,32,41.296296,27,59,WA


In [51]:
app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Airport Delay Analysis Dashboard"),
    dcc.Dropdown(
        id='state-dropdown',
        options=[state for state in airport_df['State'].unique()],
        value='PA',             #Change to empty if you want to display all data in the beginning (computationally very heavy)
        clearable=False
    ),
    
    dcc.Dropdown(
        id='airport-dropdown',
        options=[],
        value='', 
        clearable=False
    ),
    dcc.Graph(id='scatter-plot'),
    dcc.Graph(id='delay-comparison-graph'),
    dcc.Graph(id='donut-chart')
])

@app.callback(
    Output('airport-dropdown', 'options'),
    Input('state-dropdown', 'value')
)

def update_airport_dropdown(selected_state):
    filtered_airports = airport_df[airport_df['State'] == selected_state]['Airport'].unique()
    return [{'label': airport, 'value': airport} for airport in filtered_airports]

@app.callback(
    Output('scatter-plot', 'figure'),
    Input('state-dropdown', 'value'),

)

def update_scatter_plot(selected_state):
    if selected_state:  
        filtered_airport = airport_df[airport_df['State'] == selected_state]
    else:  
        filtered_airport = airport_df

    fig = px.scatter(
        filtered_airport,
        x="AvgArrDelay",
        y="AvgDepDelay",
        size="TotalFlightCount",
        color="State",
        hover_name="Airport",
        title="Airport Performance: Arrival vs Departure Delay",
        labels={"AveArrDelay": "Average Arrival Delay", "AveDepDelay": "Average Departure Delay"},
        size_max=60
    )

    return fig

@app.callback(
    Output('delay-comparison-graph', 'figure'),
    [Input('state-dropdown', 'value'),
     Input('airport-dropdown', 'value')]
)
def update_graph(selected_state, selected_airport):
    filtered_df = airport_df[airport_df['State'] == selected_state]

    # This is used to only display the selected airport
    # if selected_airport:
    #     filtered_df = filtered_df[filtered_df['Airport'] == selected_airport]

    comparison_df = filtered_df.melt(
        id_vars=['Airport'], 
        value_vars=['AvgCarrierDelay', 'AvgWeatherDelay', 'AvgNASDelay', 'AvgSecurityDelay', 'AvgLateAircraftDelay'],
        var_name='Delay Type',
        value_name='Delay Time'
    )

    fig = px.line(
        comparison_df,
        x='Delay Type',
        y='Delay Time',
        color='Airport',
        title=f'Delay Comparison for {selected_state}',
        symbol="Airport",
        markers=True  
    )
    #Instead of making everything else disappears this highlights the selected airport
    if selected_airport:
        fig.update_traces(opacity=0.2)
        fig.for_each_trace(
            lambda trace: trace.update(opacity=1.0) if trace.name == selected_airport else trace
        )

    return fig

@app.callback(
    Output('donut-chart', 'figure'),
    Input('airport-dropdown', 'value')
)
def update_donut_chart(selected_airport):
    if not selected_airport:
        return px.pie(title="No Airport Selected")

    filtered_data = airport_df[airport_df['Airport'] == selected_airport]
    
    delay_data = filtered_data[['AvgCarrierDelay', 'AvgWeatherDelay', 'AvgNASDelay', 'AvgSecurityDelay', 'AvgLateAircraftDelay']].sum()

    fig = px.pie(
        names=delay_data.index,
        values=delay_data.values,
        title=f"Delay Causes Breakdown for Airport {selected_airport}",
        hole=0.4 
    )

    fig.add_annotation(
        text=f"Total Average Delay<br>{round(airport_df.loc[airport_df['Airport'] == selected_airport, 'AvgDepDelay'].squeeze(), 1)} minutes",
        x=0.5, y=0.5,  # Placement of the annotation (center)
        font=dict(size=10),
        showarrow=False
    )
    
    return fig

if __name__ == '__main__':
    app.run_server(debug=True)
    
