### Creating interactive plots using bokeh

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Full_airline_data.csv')

In [4]:
#Get the list of the 10 biggest Airline Carriers
top10_carrier = list(df['UNIQUE_CARRIER'].value_counts().head(10).index)

#Read the Carrier Key CSV file for full names of the Airlines
carrier_key = pd.read_csv('L_UNIQUE_CARRIERS.csv')

In [5]:
#Visualize the POSITIVE_ARR_DELAYS for top 10 airlines 

#Get the list of the 10 biggest Airline Carriers
df_sub = df[df['UNIQUE_CARRIER'].isin(top10_carrier)][['UNIQUE_CARRIER','DEP_HOUR','ARR_DELAY']].copy()
#Since we are considering only flight delays, we ignore early landings
#ARR_DELAY < 0 means the flight landed before its CRS_ARR_TIME
df_sub['ARR_DELAY'] = df_sub['ARR_DELAY'].apply(lambda x:0 if x<0 else x)

#Merge the airline names
df_sub = pd.merge(df_sub,carrier_key,how='left',left_on='UNIQUE_CARRIER', right_on='Code')
df_sub.drop(['UNIQUE_CARRIER','Code'],axis=1,inplace=True)

In [6]:
#Create an numpy array to store the averaged carrier delays
arr_delay = np.ndarray(shape=(10,25)) #1 for each hour of the day, and 1 for daily averages

#Mean delay (daily average)
df_mean = df_sub[['ARR_DELAY','Description']].groupby('Description').mean().reset_index()
#df_mean = pd.merge(df_mean,carrier_key,how='left',on='IATA')
arr_delay[:,24] = np.array(df_mean['ARR_DELAY'])

#delays grouped by Departure Hour
df_hourly = df_sub.groupby(by=['Description','DEP_HOUR']).mean()['ARR_DELAY'].unstack()
df_hourly.fillna(0,inplace=True)
arr_delay[:,0:24]=np.array(df_hourly)

arr_delay = arr_delay*2.0

In [7]:
###  Interaction in Bokeh charts using widgets

from bokeh.models import  Callback, ColumnDataSource, Rect,CustomJS, LabelSet
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.layouts import row, column,widgetbox
from bokeh.models import CustomJS, Slider, Select
import numpy as np
from math import pi


#data
carrier = df_mean['Description'].tolist()
ages_gen = '0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24'.split()

#source for callback
source = ColumnDataSource(data=dict(x=carrier, y = np.zeros(10),height = arr_delay[:,24], 
                                     height0 = arr_delay[:,0],
                                     height1 = arr_delay[:,1],
                                     height2 = arr_delay[:,2],
                                     height3 = arr_delay[:,3],
                                     height4 = arr_delay[:,4],
                                     height5 = arr_delay[:,5],
                                     height6 = arr_delay[:,6],
                                     height7 = arr_delay[:,7],
                                     height8 = arr_delay[:,8],
                                     height9 = arr_delay[:,9],
                                     height10 = arr_delay[:,10],
                                     height11 = arr_delay[:,11],
                                     height12 = arr_delay[:,12],
                                     height13 = arr_delay[:,13],
                                     height14 = arr_delay[:,14],
                                     height15 = arr_delay[:,15],
                                     height16 = arr_delay[:,16],
                                     height17 = arr_delay[:,17],
                                     height18 = arr_delay[:,18],
                                     height19 = arr_delay[:,19],
                                     height20 = arr_delay[:,20],
                                     height21 = arr_delay[:,21],
                                     height22 = arr_delay[:,22],
                                     height23 = arr_delay[:,23],
                                     height24 = arr_delay[:,24]))

source2 = ColumnDataSource(data=dict(title=["Daily Average"],h1 = ["Hourly"],h2 = ["Daily Average"]))


#Javascript Callbacks for Departure Hour
Callback_Hour = CustomJS(args={'source1': source,'source2':source2}, code="""
        var f = cb_obj.get('value');
        var data1 = source1.get('data');
        var data2 = source2.get('data');
        
        data1['height'] = data1['height'+f.toString()];
        source1.trigger('change');
        
        var f = cb_obj.get('value');
        if (f==24){
            data2['title'] = data2['h2'];
            source2.trigger('change');
            }
        else {
            data2['title'] = data2['h1'];
            source2.trigger('change');
            }
    """)

#Figure for Stacked bar chart
p1 = figure(title="Average flight delay", 
            x_range=carrier, y_range=[0, 30],
            plot_width=600, plot_height = 350,
            outline_line_color= None,
            toolbar_location='above')
p1.background_fill_color = '#e3e0db' 
p1.xaxis.major_label_orientation = pi/4
p1.xaxis.axis_label = 'Airline Carrier'
p1.yaxis.axis_label = 'Average Delay'


#Create Barchart using rect glyphs
p1.rect(x ='x', y ='y', width =.8, height = 'height', source = source, color="#720017", alpha=0.95, name = "Airline Delay")
#Create label 
labels = LabelSet(x=0, y=0, text='title', level='glyph',x_offset=35, y_offset=170, source=source2, render_mode='canvas')
p1.add_layout(labels)

#Create the slider
hour_slider = Slider(start=0, end=24, value=24, step=1,title="Departure Hour", callback=Callback_Hour)
#Alternately:
#hour_slider = Select(title="Departure Hour:", value=ages_gen[24], options= ages_gen,  callback = Callback_Hour)

layout = column(p1,widgetbox(hour_slider))
output_file("slider.html", title="slider.py example")

show(layout)

In [8]:
#Create the javascript code and script tag to embed in the blog
from bokeh.resources import CDN
from bokeh.embed import autoload_static

js, tag = autoload_static(layout, CDN, "https://xcitech.github.io/assets/bokeh_js/interact_barchart_select.js")

f = open( 'interact_barchart_select.js', 'w' )
f.write(js)
f.close()
print(tag)


<script
    src="https://xcitech.github.io/assets/bokeh_js/interact_barchart_select.js"
    id="c5bda6c6-c641-40fc-a717-00c1664383a3"
    data-bokeh-model-id="240d4c27-2526-439b-bbc2-4076fdecc10c"
    data-bokeh-doc-id="05cacb08-3476-4ae1-8f9c-9ebe42f821e5"
></script>


## Interactive Heat Maps with Bokeh

In [3]:
df_sub = df[['UNIQUE_CARRIER','DAY_OF_WEEK','DEP_HOUR','ARR_DELAY']]

#Remove datapoints which have erroneous DAY_OF_WEEK
df_sub = df_sub.drop(df_sub[df_sub['DAY_OF_WEEK']==9].index) 
#Since we are considering only flight delays, we ignore early landings
#ARR_DELAY < 0 means the flight landed before its CRS_ARR_TIME
df_sub['ARR_DELAY'] = df_sub['ARR_DELAY'].apply(lambda x:0 if x<0 else x)

In [4]:
#Get the list of the 10 biggest Airline Carriers
top10_carrier = pd.DataFrame(df_sub['UNIQUE_CARRIER'].value_counts().head(10))
top10_carrier.reset_index(inplace=True)
top10_carrier.rename(index=str, columns={"index": "IATA"},inplace=True)

In [5]:
#Load the L_UNIQUE_CARRIERS.csv for the full names of the IATA codes
carrier_key = pd.read_csv('L_UNIQUE_CARRIERS.csv')
carrier_key.rename(index=str, columns={"Code": "IATA"},inplace=True)

#Merge with the dataframe containing the 10 biggest Airline Carriers
top10_carrier = pd.merge(top10_carrier,carrier_key,how='left',on='IATA')

In [6]:
#Bokeh code

In [7]:
from bokeh.charts import HeatMap
from bokeh.io import output_file, show
from bokeh.palettes import OrRd9
from bokeh.models import Range1d, LinearColorMapper, ColorBar
from bokeh.models.widgets import Panel, Tabs

In [8]:
def create_heatmap(df_sub, desc='Average Flight Delays'):
    dayHour = df_sub.groupby(by=['DAY_OF_WEEK','DEP_HOUR']).mean()
    dayHour.reset_index(inplace=True)
    
    dayHour['DAY_OF_WEEK'].replace(1,'Monday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(2,'Tuesday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(3,'Wednesday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(4,'Thursday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(5,'Friday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(6,'Saturday',inplace=True)
    dayHour['DAY_OF_WEEK'].replace(7,'Sunday',inplace=True)
    
    dayHour['ARR_DELAY'] = dayHour['ARR_DELAY'].apply(lambda x:30 if x>30 else x)
    df_cali = pd.DataFrame([['Monday',24, 0], ['Sunday',24, 30]], columns=('DAY_OF_WEEK','DEP_HOUR','ARR_DELAY'))
    dayHour = dayHour.append(df_cali)
    
    hm = HeatMap(dayHour, y='DAY_OF_WEEK', x='DEP_HOUR', values='ARR_DELAY', stat=None,
             width=600, plot_height=330, palette=OrRd9, ylabel='Day of the Week', xlabel='Departure Hour',
             legend = False,
             title= desc, toolbar_location='above')
    hm.x_range = Range1d(0, 23)
    
    #Add the Colorbar. Its a bit complicated in Bokeh
    Invert_OrRd9 = OrRd9[::-1]
    color_mapper = LinearColorMapper(palette=Invert_OrRd9, low=0, high=30)

    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, location=(0,0))
    hm.add_layout(color_bar, 'right')
    
    return hm

In [9]:
def create_tab(df_sub,k):
    hm = create_heatmap(df_sub[df_sub['UNIQUE_CARRIER']==top10_carrier['IATA'][k-1]],desc = top10_carrier['Description'][k-1])
    tab = Panel(child=hm, title=top10_carrier['IATA'][k-1])
    return tab

In [10]:
#Create 11 heatmaps, 1 for all flights, and 10 each for the 10 biggest airlines
hm = create_heatmap(df_sub)
tab0 = Panel(child=hm, title="Overall")

tab1 = create_tab(df_sub,1)
tab2 = create_tab(df_sub,2)
tab3 = create_tab(df_sub,3)
tab4 = create_tab(df_sub,4)
tab5 = create_tab(df_sub,5)
tab6 = create_tab(df_sub,6)
tab7 = create_tab(df_sub,7)
tab8 = create_tab(df_sub,8)
tab9 = create_tab(df_sub,9)
tab10 = create_tab(df_sub,10)

In [11]:
#Display in browser
output_file("heatmap.html")
tabs = Tabs(tabs=[ tab0, tab1, tab2, tab3, tab4, tab5, tab6, tab7, tab8, tab9, tab10])
show(tabs)

In [12]:
#Create the javascript code and script tag to embed in the blog
from bokeh.resources import CDN
from bokeh.embed import autoload_static

js, tag = autoload_static(tabs, CDN, "https://xcitech.github.io/assets/bokeh_js/heatmap1.js")

f = open( 'heatmap1.js', 'w' )
f.write(js)
f.close()
print(tag)


<script
    src="https://xcitech.github.io/assets/bokeh_js/heatmap1.js"
    id="7e7ddf43-63b4-4b79-a06d-214aac9b7cca"
    data-bokeh-model-id="efdbc2ca-c9c1-4014-895d-a6ed8f7f0346"
    data-bokeh-doc-id="70a0312c-6e6d-48d2-9133-ef5dbc9c9f40"
></script>


## Create bokeh charts for predicted weather discounted arrival delays

In [2]:
#Read the predicted airline data csv file

df = pd.read_csv('predicted_first.csv')[['UNIQUE_CARRIER','DEP_HOUR','ARR_DELAY','predicted_delay']]

#Since we are considering only flight delays, we ignore early landings
#ARR_DELAY < 0 means the flight landed before its CRS_ARR_TIME
#df['ARR_DELAY'] = df['ARR_DELAY'].apply(lambda x:x if x>5 else 0)
df.rename(index=str, columns={"UNIQUE_CARRIER": "IATA"},inplace=True)

In [3]:
#Get the list of the 10 biggest Airline Carriers
top10_carrier = pd.DataFrame(df['IATA'].value_counts().head(10))
top10_carrier.reset_index(inplace=True)
top10_carrier.rename(index=str, columns={"IATA": "count"},inplace=True)
top10_carrier.rename(index=str, columns={"index": "IATA"},inplace=True)

#Consider data from only the top 10 Airlines
df = df[df['IATA'].isin(top10_carrier['IATA'])].copy()

#Load the L_UNIQUE_CARRIERS.csv for the full names of the IATA codes of the Airlines
carrier_key = pd.read_csv('L_UNIQUE_CARRIERS.csv')
carrier_key.rename(index=str, columns={"Code": "IATA"},inplace=True)

In [4]:
#Create an numpy array to store the averaged carrier delays
arr_delay = np.ndarray(shape=(10,5)) #1 for each hour of the day, and 1 for daily averages
predicted_delay = np.ndarray(shape=(10,5)) #1 for each hour of the day, and 1 for daily averages

#Mean delay (daily average)
df_mean = df[['IATA','ARR_DELAY','predicted_delay']].groupby('IATA').mean().reset_index()
df_mean = pd.merge(df_mean,carrier_key,how='left',on='IATA')
arr_delay[:,4] = np.array(df_mean['ARR_DELAY'])
predicted_delay[:,4] = np.array(df_mean['predicted_delay'])

#delays grouped by Departure Hour

df_hourly = df[df['DEP_HOUR'].isin([23,1,2,3,4,5,6,7,8,9,10])].groupby('IATA').mean().reset_index()
arr_delay[:,0]=np.array(df_hourly['ARR_DELAY'])
predicted_delay[:,0]=np.array(df_hourly['predicted_delay'])

df_hourly = df[df['DEP_HOUR'].isin([11,12,13,14,15])].groupby('IATA').mean().reset_index()
arr_delay[:,1]=np.array(df_hourly['ARR_DELAY'])
predicted_delay[:,1]=np.array(df_hourly['predicted_delay'])

df_hourly = df[df['DEP_HOUR'].isin([16,17,18,19])].groupby('IATA').mean().reset_index()
arr_delay[:,2]=np.array(df_hourly['ARR_DELAY'])
predicted_delay[:,2]=np.array(df_hourly['predicted_delay'])

df_hourly = df[df['DEP_HOUR'].isin([20,21,22])].groupby('IATA').mean().reset_index()
arr_delay[:,3]=np.array(df_hourly['ARR_DELAY'])
predicted_delay[:,3]=np.array(df_hourly['predicted_delay'])

arr_delay = arr_delay*2.0
predicted_delay = predicted_delay*2.0

In [5]:
from bokeh.models import  Callback, ColumnDataSource, Rect,CustomJS, LabelSet
from bokeh.plotting import figure, output_file, show, gridplot
from bokeh.layouts import row, column,widgetbox
from bokeh.models import CustomJS, Slider, Select
import numpy as np
from math import pi

#data
carrier = df_mean['Description'].tolist()
ages_gen = '0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24'.split()

#source for callback
source1 = ColumnDataSource(data=dict(x=carrier, y = np.zeros(10),height = arr_delay[:,4], 
                                     height0 = arr_delay[:,0],
                                     height1 = arr_delay[:,1],
                                     height2 = arr_delay[:,2],
                                     height3 = arr_delay[:,3],
                                     height4 = arr_delay[:,4]))

source2 = ColumnDataSource(data=dict(x=carrier, y = np.zeros(10),height = predicted_delay[:,4], 
                                     height0 = predicted_delay[:,0],
                                     height1 = predicted_delay[:,1],
                                     height2 = predicted_delay[:,2],
                                     height3 = predicted_delay[:,3],
                                     height4 = predicted_delay[:,4]))

source3 = ColumnDataSource(data=dict(title=["Daily Average"],
                                     h0 = ["Morning"],
                                     h1 = ["Afternoon"],
                                     h2 = ["Evening"],
                                     h3 = ["Night"],
                                     h4 = ["Daily Average"]))


#Javascript Callbacks for Departure Hour
Callback_Hour = CustomJS(args={'source1': source1,'source2':source2,'source3':source3}, code="""
        var f = cb_obj.get('value');
        var data1 = source1.get('data');
        var data2 = source2.get('data');
        var data3 = source3.get('data');
        
        data1['height'] = data1['height'+f.toString()];
        source1.trigger('change');
        
        data2['height'] = data2['height'+f.toString()];
        source2.trigger('change');
        
        data3['title'] = data3['h'+f.toString()];
        source3.trigger('change');
        
    """)

#Figure for Stacked bar chart
p1 = figure(title="Average flight delay", 
            x_range=carrier, y_range=[0, 30],
            plot_width=600, plot_height = 350,
            outline_line_color= None,
            toolbar_location='above')
p1.background_fill_color = '#e3e0db' 
p1.xaxis.major_label_orientation = pi/4
p1.xaxis.axis_label = 'Airline Carrier'
p1.yaxis.axis_label = 'Average Delay'

#Create Barchart using rect glyphs
p1.rect(x ='x', y ='y', width =.8, height = 'height', source = source1, color="#F47C3A", alpha=0.35, name = "Airline Delay", legend = "Actual Delays")

#Create Barchart using rect glyphs for weather corrected delays
p1.rect(x ='x', y ='y', width =.8, height = 'height', source = source2, color="#720017", alpha=0.95, name = "Airline Delay", legend = "Weather accounted Delays")


#Create label 
labels = LabelSet(x=0, y=0, text='title', level='glyph',x_offset=35, y_offset=170, source=source3, render_mode='canvas')
p1.add_layout(labels)

#Create the slider
hour_slider = Slider(start=0, end=4, value=4, step=1,title="Departure Hour", callback=Callback_Hour)
#Alternately:
#hour_slider = Select(title="Departure Hour:", value=ages_gen[24], options= ages_gen,  callback = Callback_Hour)

layout = column(p1,widgetbox(hour_slider))
output_file("slider.html", title="slider.py example")

show(layout)

In [6]:
#Create the javascript code and script tag to embed in the blog
from bokeh.resources import CDN
from bokeh.embed import autoload_static

js, tag = autoload_static(layout, CDN, "https://xcitech.github.io/assets/bokeh_js/interact_barchart2.js")

f = open( 'interact_barchart2.js', 'w' )
f.write(js)
f.close()
print(tag)


<script
    src="https://xcitech.github.io/assets/bokeh_js/interact_barchart2.js"
    id="9c176959-b0dc-40c6-94f6-c2e1bf976718"
    data-bokeh-model-id="0b5e5e96-1592-49f2-8699-471b9a23259f"
    data-bokeh-doc-id="0f42d7dc-ed11-441b-8dd4-5e3cf3688c30"
></script>
