In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Read data

from js import fetch
import io

URL = 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/airline_data.csv'
resp = await fetch(URL)
text = io.BytesIO((await resp.arrayBuffer()).to_py())

airline_data =  pd.read_csv(text,
                            encoding = "ISO-8859-1",
                            dtype={'Div1Airport': str, 'Div1TailNum': str, 
                                   'Div2Airport': str, 'Div2TailNum': str})

print('Data downloaded and read into a dataframe!')

# Preview the first 5 lines of the Loaded data
airline_data.head()

# Shape of the data
airline_data.shape

# Randomly sample 500 data points. Setting the random state to be 42 so that we get same result.
data = airline_data.sample(n=500, random_state=42)

# Get the shape of the trimmed data
data.shape

In [None]:
# Plotly.graph_objects: Scatter plot, Line plot
# Plotly.express: Bubble chart, Histogram, Pie chart, sunburst chart

# Question: How departure time changes with respect to airport distance
# First we create a figure using go.Figure and adding trace to it through go.scatter
fig = go.Figure(data=go.Scatter(x=data['Distance'], y=data['DepTime'], mode='markers', marker=dict(color='red')))
# Updating Layout through 'update_layout'. Here we are adding title to the plot and providing title to x and y axis.
fig.update_layout(title='Distance vs Departure Time', xaxis_title='Distance', yaxis_title='DepTime')
fig.show()

In [None]:
# Extract average monthly arrival delay time and see how it changes over the year.

# Group the data by Month and compute average over arrival delay time.
line_data = data.groupby('Month')['ArrDelay'].mean().reset_index()
line_data

# Create line plot here
fig= go.Figure(data=go.Scatter(x=line_data['Month'], y=line_data['ArrDelay'], mode='lines', marker=dict(color='green')))
fig.update_layout(title='Average Delay time vs month', xaxis_title='Month', yaxis_title='Delay Time')
fig.show()

In [None]:
# Plotly.express

# bar chart
bar_data = data.groupby(['DestState'])['Flights'].sum().reset_index()
bar_data

fig= px.bar(bar_data,x='DestState', y = "Flights", title="Total number of flights to the destination state split by reporting airline")
fig.show()

In [1]:
# Bubble chart
bub_data = data.groupby("Reporting_Airline")['Flights'].sum().reset_index()
bub_data

# Create bubble chart here
fig = px.scatter(bub_data, x="Reporting_Airline", y="Flights",title="Reporting airline vs Number of Flights", size="Flights", size_max=60, hover_name="Reporting_Airline")
fig.show()

NameError: name 'data' is not defined

In [None]:
# Histogram

data['ArrDelay'] = data['ArrDelay'].fillna(0)

fig=px.histogram(data,x="ArrDelay")
fig.show()

In [None]:
# Use px.pie function to create the chart. Input dataset. 
# Values parameter will set values associated to the sector. 'Month' feature is passed to it.
# labels for the sector are passed to the `names` parameter.
fig = px.pie(data, values='Month', names='DistanceGroup', title='Distance group proportion by month')
fig.show()

In [None]:
# Create sunburst chart here

fig = px.sunburst(data, path=['Month', 'DestStateName'], values="Flights")
fig.show()            