## Plotly

In [16]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [17]:
airline_data =  pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/airline_data.csv', 
                            encoding = "ISO-8859-1",
                            dtype={'Div1Airport': str, 'Div1TailNum': str, 
                                   'Div2Airport': str, 'Div2TailNum': str})

In [18]:
airline_data.head()

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
0,1295781,1998,2,4,2,4,1998-04-02,AS,19930,AS,...,,,,,,,,,,
1,1125375,2013,2,5,13,1,2013-05-13,EV,20366,EV,...,,,,,,,,,,
2,118824,1993,3,9,25,6,1993-09-25,UA,19977,UA,...,,,,,,,,,,
3,634825,1994,4,11,12,6,1994-11-12,HP,19991,HP,...,,,,,,,,,,
4,1888125,2017,3,8,17,4,2017-08-17,UA,19977,UA,...,,,,,,,,,,


In [19]:
airline_data.shape

(27000, 110)

In [20]:
# random_state = 42
air_50 = airline_data.sample(n=50,random_state=42)

In [21]:
air_50.shape

(50, 110)

In [22]:
air_50.head()

Unnamed: 0.1,Unnamed: 0,Year,Quarter,Month,DayofMonth,DayOfWeek,FlightDate,Reporting_Airline,DOT_ID_Reporting_Airline,IATA_CODE_Reporting_Airline,...,Div4WheelsOff,Div4TailNum,Div5Airport,Div5AirportID,Div5AirportSeqID,Div5WheelsOn,Div5TotalGTime,Div5LongestGTime,Div5WheelsOff,Div5TailNum
5312,985989,2006,1,3,29,3,2006-03-29,OO,20304,OO,...,,,,,,,,,,
18357,1782939,1993,3,8,3,2,1993-08-03,DL,19790,DL,...,,,,,,,,,,
6428,84140,1989,3,7,3,1,1989-07-03,HP,19991,HP,...,,,,,,,,,,
15414,1839736,2008,4,10,10,5,2008-10-10,UA,19977,UA,...,,,,,,,,,,
10610,1622640,2010,1,2,19,5,2010-02-19,FL,20437,FL,...,,,,,,,,,,


In [23]:
print(air_50.columns)

Index(['Unnamed: 0', 'Year', 'Quarter', 'Month', 'DayofMonth', 'DayOfWeek',
       'FlightDate', 'Reporting_Airline', 'DOT_ID_Reporting_Airline',
       'IATA_CODE_Reporting_Airline',
       ...
       'Div4WheelsOff', 'Div4TailNum', 'Div5Airport', 'Div5AirportID',
       'Div5AirportSeqID', 'Div5WheelsOn', 'Div5TotalGTime',
       'Div5LongestGTime', 'Div5WheelsOff', 'Div5TailNum'],
      dtype='object', length=110)


# plotly.graph_object

## Scatter Plot

The relationship between the Departure Time and Airport distance

In [24]:
# create a figure, plotly graph object
fig = go.Figure(data=go.Scatter(x=air_50['Distance'],y=air_50['DepTime'],mode='markers',marker=dict(color='red')))
# update layout with adding title and x & y axis
fig.update_layout(title='Distance & Departure Time',xaxis_title='Distance',yaxis_title='Departure Time')
# display
fig.show()

## Line Plot

PLOT: Average Monthly Arrival Delay Time in Time Series

In [25]:
# Group the data by Month
# Compute average over arrival delay time
delay_line = air_50.groupby('Month')['ArrDelay'].mean().reset_index()
delay_line

Unnamed: 0,Month,ArrDelay
0,1,2.333333
1,2,-6.666667
2,3,18.2
3,4,2.5
4,5,1.5
5,6,-12.5
6,7,11.0
7,8,14.0
8,9,3.0
9,10,8.333333


In [26]:
# create fig object with graph_objects
# x = Month, y = ArrDelay (line)
fig = go.Figure(data=go.Scatter(x=delay_line['Month'],y=delay_line['ArrDelay'],mode='lines'))
# update layout and add title and x & y axis
fig.update_layout(title='Month & Average Flight Delay Time',xaxis_title='Month',yaxis_title='Ave Delay Time')
# display
fig.show()

# plotly.express

## Bar Chart

PLOT: Number of flights from a specific airline that goes to Destination State

In [27]:
# Group the data by destination state and airline
# Calculate total # of flights in each DestState
flights_bar = air_50.groupby(['DestState'])['Flights'].sum().reset_index()
flights_bar

Unnamed: 0,DestState,Flights
0,AZ,1.0
1,CA,7.0
2,CO,1.0
3,CT,1.0
4,FL,2.0
5,GA,2.0
6,HI,1.0
7,IL,5.0
8,IN,1.0
9,KY,2.0


In [28]:
# create a bar figure using plotly.express with input data
fig = px.bar(flights_bar, x='DestState',y='Flights',title='Total number of flights to the destination States')
# display
fig.show()

## Bubble Chart

PLOT: The number of flights as per reporting airline

In [29]:
# group by reporting airlines and sum the number of flights
flights_bubble = air_50.groupby('Reporting_Airline')['Flights'].sum().reset_index()
flights_bubble

Unnamed: 0,Reporting_Airline,Flights
0,9E,1.0
1,AA,6.0
2,B6,2.0
3,CO,2.0
4,DL,10.0
5,FL,1.0
6,HP,2.0
7,KH,1.0
8,MQ,4.0
9,NW,2.0


In [31]:
# create fig object using px.scatter
fig = px.scatter(flights_bubble,
                x='Reporting_Airline',
                y='Flights',
                size='Flights',
                hover_name='Reporting_Airline',
                title='The volume of flights by Airlines',
                size_max=60)
# display
fig.show()

## Histogram

PLOT: Distribution of arrival delay

In [None]:
# Extract ArrDelay data and fill missing values with O
air_50['ArrDelay'] = air_50['ArrDelay'].fillna(0)

In [None]:
# Create histogram
fig = px.histogram(air_50, x='ArrDelay',nbins=40)
# display
fig.show()

## Pie Chart

PLOT: The proportion of distance group by Month

In [32]:
# use pie function to create a figure
fig = px.pie(air_50, 
            values='Month',
            names='DistanceGroup',
            color_discrete_sequence=px.colors.sequential.RdBu,
            title='Distance Group Proportion by Month')
# display
fig.show()

## Sunburst Charts

PLOT: Hierarchical view in order of month and destination state holding value of number of flights

In [None]:
# Use sunburst function to great a figure
fig = px.sunburst(air_50,path=['Month','DestStateName'],values='Flights')
# display
fig.show()

# End