# Interactive Visualizations with Python

### Madpy Meetup
January 9, 2020

### About Me

- Zach Griffith
- Software Developer for the Space Science and Engineering Center at UW-Madison
- zdgriffith on GitHub

## Outline

- Bokeh
    - why?
    - The Bokeh Model
    - Plot Tools
    - Layouts
    - Syncing Plot Elements
    - Widgets
    - Callbacks
    - Bokeh Server
- Higher Level Packages built on Bokeh
    - Holoviews
    - Geoviews
    - Panel
- Alternatives (Plotly, Altair)
- Summary
- References
- Resources

# Part 1:  The Bokeh Model and Basic Plotting

# Why Bokeh

- Write entirely in Python - No JavaScript! <span style="font-size:small;">most of the time...</span>

- Built to work well with PyData Tools (e.g. Pandas)

- Supports variety of output formats
    - Jupyter Notebook
    - Standalone HTML
    - Web Applications via Bokeh Server

In [1]:
from bokeh.io import output_file, output_notebook

#output_file('output.html')
output_notebook()

# The Bokeh Model

In [2]:
import dot_graphs

dot = dot_graphs.bokeh_model()

<center>
    <img src="images/bokeh_model.svg"
         align="center"
         width="100%">
</center>
All low-level Bokeh Python objects are serialized to JSON:

In [3]:
from bokeh.models import Line
line = Line(x=1, y=2, line_color='orange', line_width=4)
line.to_json(include_defaults=False)

{'id': '1002',
 'line_color': 'orange',
 'line_width': 4,
 'x': {'value': 1},
 'y': {'value': 2}}

# Mapping Visual Attributes to Data Columns

In [4]:
from bokeh.models import ColumnDataSource

source = ColumnDataSource({
    'x': [...], # Columns can be any sequence
    'y': [...], # (e.g. list, numpy array, pandas series...)
})

Intialize with a dictionary, Pandas DataFrame, or Pandas GroupBy

In [5]:
from bokeh.models import Plot
from bokeh.plotting import show
import numpy as np

x = np.arange(0, 6*np.pi, 0.2)
source = ColumnDataSource({
    'x': x,
    'y': np.sin(x)
})

# plotting with the bokeh.models API
p = Plot(height=200)
line = Line(x='x', y='y')
p.add_glyph(source, line)
show(p)

In [6]:
from bokeh.plotting import figure
# plotting with the bokeh.plotting API

p = figure(height=200) # Create Plot instance with some defaults
p.line(source=source, x='x', y='y')
show(p)

# Glyphs

In [7]:
p = figure(height=200)
p.step(source=source, x='x', y='y')
show(p)

In [8]:
p = figure(height=200)
p.square(source=source, x='x', y='y')
show(p)

In [9]:
p = figure(height=200)
p.vbar(source=source, x='x', top='y', width=0.2)
show(p)

In [10]:
p = figure(height=200)
p.annular_wedge(source=source, x='x', y='y', inner_radius=0.1, outer_radius=0.25, start_angle=0.2, end_angle=5)
show(p)

See full list at:  https://docs.bokeh.org/en/latest/docs/reference/models/glyphs.html

# Part 2:  Adding Interactions

In [11]:
import pandas as pd
df_nba = pd.read_csv('Data/nba_players_2019.csv')
df_nba.columns

Index(['Rk', 'Player', 'Pos', 'Age', 'Tm', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG%',
       '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%',
       'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'MP.1',
       'PER', 'TS%', '3PAr', 'FTr', 'ORB%', 'DRB%', 'TRB%', 'AST%', 'STL%',
       'BLK%', 'TOV%', 'USG%', 'OWS', 'DWS', 'WS', 'WS/48', 'OBPM', 'DBPM',
       'BPM', 'VORP'],
      dtype='object')

In [12]:
from bokeh.io import curdoc
source_nba = ColumnDataSource(df_nba)
p_nba = figure(title='2019-20 NBA Players')
p_nba.xaxis.axis_label = 'Minutes Per Game'
p_nba.yaxis.axis_label = 'Points Per Game'
p_nba.circle(source=source_nba, x='MP', y='PTS', alpha=0.5)
show(p_nba)

# Widgets

In [13]:
from bokeh.models.widgets import Button, CheckboxGroup, Slider

button = Button(label='Woo MadPy', button_type='success')
show(button)

In [14]:

checkbox_group = CheckboxGroup(
        labels=["Python", "JavaScript", "Stuff"], active=[0, 1])
show(checkbox_group)

In [15]:
slider = Slider(start=0, end=11, value=11, step=0.1, title='MadPy Volume')
show(slider)

In [16]:
slider.to_json(include_defaults=False)

{'callback': None,
 'end': 11,
 'id': '1982',
 'start': 0,
 'step': 0.1,
 'title': 'MadPy Volume',
 'value': 11}

See full list of widgets at:  https://docs.bokeh.org/en/latest/docs/user_guide/interaction/widgets.html

# Organizing Multiple Visual Elements

Plots and Widgets are Layout objects which can be placed using functions from the layouts module

In [17]:
from bokeh.layouts import widgetbox, row, column, layout
from bokeh.models.widgets import Select

values = list(source_nba.data)
x_select = Select(title='X-Axis', options=values, value='MP')
y_select = Select(title='Y-Axis', options=values, value='PTS')

# Creates Column of widgets with predefined styling
widget_box = widgetbox([x_select, y_select])

p_nba = figure(title='2019-20 NBA Players')
p_nba.xaxis.axis_label = 'Minutes Per Game'
p_nba.yaxis.axis_label = 'Points Per Game'
p_nba.circle(source=source_nba, x='MP', y='PTS', alpha=0.5)

# Create a Row of Layout objects horizontally
lay = row(widget_box, p_nba)

show(lay)

In [18]:
# Create a Column of Layout objects vertically
lay = column(widget_box, p_nba)
show(lay)

In [50]:
df_wnba = pd.read_csv('Data/wnba_players_2019.csv')
source_wnba = ColumnDataSource(df_wnba)

p_wnba = figure(title='2019 WNBA Players')
p_wnba.xaxis.axis_label = 'Minutes Per Game'
p_wnba.yaxis.axis_label = 'Points Per Game'
circle = p_wnba.circle(source=source_wnba, x='MP', y='PTS', alpha=0.5)

In [20]:
# Creates grid layout using a Column with nested Rows
lay = layout([widget_box], [p_nba, p_wnba])
show(lay)

# Multi-Tab Layouts

In [21]:
from bokeh.models.layouts import Panel, Tabs

tab1 = Panel(child=row([widget_box, p_nba]), title='NBA')
tab2 = Panel(child=row([widget_box, p_wnba]), title='WNBA')
tabs = Tabs(tabs=[tab1, tab2])
show(tabs)

# Linking Behavior

In [22]:
from bokeh.models.layouts import Panel, Tabs
df2 = pd.read_csv('Data/wnba_players_2019.csv')
wnba = ColumnDataSource(df2)

p_nba = figure(title='2019-20 NBA Players')
p_nba.xaxis.axis_label = 'Minutes Per Game'
p_nba.yaxis.axis_label = 'Points Per Game'
p_nba.circle(source=source_nba, x='MP', y='PTS', alpha=0.5)

p_wnba = figure(title='2019 WNBA Players')
p_wnba.xaxis.axis_label = 'Minutes Per Game'
p_wnba.yaxis.axis_label = 'Points Per Game'
p_wnba.circle(source=source_wnba, x='MP', y='PTS', alpha=0.5)

# Explicitly link Plot properties to connect interactivity
p_wnba.x_range = p_nba.x_range
p_wnba.y_range = p_nba.y_range

tab_nba = Panel(child=row([widget_box, p_nba]), title='NBA')
tab_wnba = Panel(child=row([widget_box, p_wnba]), title='WNBA')
tabs = Tabs(tabs=[tab_nba, tab_wnba])
show(tabs)

# Configuring Widgets

Encode connections between widget values and figure elements/data via:

- Python Event Handlers 
    - must have a persistent Python process 
- JavaScript Event Handlers
    - necessary for standalone HTML documents

# Python Event Handlers

User-defined functions that need to be attached to Bokeh model objects (e.g. Widgets, DataSources)

In [32]:
df = {'nba': df_nba, 'wnba': df_wnba}
leagues = ['nba', 'wnba']
def basketball_viz(doc):
    figs = {}
    sources = {}
    for league in leagues:
        sources[league] = ColumnDataSource({
            'x': df[league]['MP'], 'y': df[league]['PTS']
        })
        figs[league] = figure(title=f'2019 {league.upper()} Players')
        figs[league].circle(source=sources[league], x='x', y='y', alpha=0.5)
    figs['nba'].x_range = figs['wnba'].x_range
    figs['nba'].y_range = figs['wnba'].y_range

    values = list(df['nba'].columns)
    x_select = Select(title='X-Axis', options=values, value='MP')
    y_select = Select(title='Y-Axis', options=values, value='PTS')

    # Event handler function 
    def update_axis(attr, old, new):
        for league in leagues:
            sources[league].data.update(x=df[league][x_select.value], y=df[league][y_select.value])
            figs[league].xaxis.axis_label = x_select.value
            figs[league].yaxis.axis_label = y_select.value

    # Add event handler for change of value
    x_select.on_change("value", update_axis)
    y_select.on_change("value", update_axis)
    widget_box = widgetbox([x_select, y_select])

    tabs = []
    for league in leagues:
        tabs.append(Panel(child=row([widget_box, figs[league]]), title=league.upper()))
    tab_layout = Tabs(tabs=tabs)
    doc.add_root(tab_layout)
show(basketball_viz)

# JavaScript Callbacks

In [53]:
from bokeh.models.callbacks import CustomJS

df = {'nba': df_nba, 'wnba': df_wnba}
leagues = ['nba', 'wnba']
def basketball_viz(doc):
    alpha_slider = Slider(title='Circle Opacity', start=0, end=1, value=0.5, step=0.01)
    
    figs = {}
    sources = {}
    circle_callbacks = {}
    for league in leagues:
        sources[league] = ColumnDataSource({
            'x': df[league]['MP'], 'y': df[league]['PTS']
        })
        figs[league] = figure(title=f'2019 {league.upper()} Players')
        circle = figs[league].circle(source=sources[league], x='x', y='y', alpha=0.5)
        
        alpha_slider.js_on_change(
            'value',
            CustomJS(
                args=dict(circle=circle),
                code="circle.glyph.fill_alpha = cb_obj.value;" # models passed as args are magically available
            )
        )
        
    figs['nba'].x_range = figs['wnba'].x_range
    figs['nba'].y_range = figs['wnba'].y_range

    values = list(df['nba'].columns)
    x_select = Select(title='X-Axis', options=values, value='MP')
    y_select = Select(title='Y-Axis', options=values, value='PTS')

    # Event handler function 
    def update_axis(attr, old, new):
        for league in leagues:
            sources[league].data.update(x=df[league][x_select.value], y=df[league][y_select.value])
            figs[league].xaxis.axis_label = x_select.value
            figs[league].yaxis.axis_label = y_select.value
            
    # Add event handler for change of value
    x_select.on_change("value", update_axis)
    y_select.on_change("value", update_axis)
    widget_box = widgetbox([x_select, y_select, alpha_slider])

    tabs = []
    for league in leagues:
        tabs.append(Panel(child=row([widget_box, figs[league]]), title=league.upper()))
    tab_layout = Tabs(tabs=tabs)
    doc.add_root(tab_layout)
show(basketball_viz)

# Customizing Plot Tools

In [37]:
df = {'nba': df_nba, 'wnba': df_wnba}
leagues = ['nba', 'wnba']
def basketball_viz(doc):
    figs = {}
    sources = {}
    
    # Define tooltips
    TOOLTIPS = [
        ("Player", "@Player"),
        ("Team", "@Team"),
    ]
    
    for league in leagues:
        sources[league] = ColumnDataSource({
            'x': df[league]['MP'], 'y': df[league]['PTS'],
            'Player': df[league]['Player'], 'Team': df[league]['Tm'],
        })
        
        figs[league] = figure(
            title=f'2019 {league.upper()} Players',
            tooltips=TOOLTIPS # add tooltips to figure initialization
        )
        
        figs[league].circle(source=sources[league], x='x', y='y', alpha=0.5)
    figs['nba'].x_range = figs['wnba'].x_range
    figs['nba'].y_range = figs['wnba'].y_range

    values = [stat for stat in df['nba'].columns if stat in df['wnba'].columns]
    x_select = Select(title='X-Axis', options=values, value='MP')
    y_select = Select(title='Y-Axis', options=values, value='PTS')

    # Event handler function 
    def update_axis(attr, old, new):
        for league in leagues:
            sources[league].data.update(x=df[league][x_select.value], y=df[league][y_select.value])
            figs[league].xaxis.axis_label = x_select.value
            figs[league].yaxis.axis_label = y_select.value

    # Add event handler for change of value
    x_select.on_change("value", update_axis)
    y_select.on_change("value", update_axis)
    widget_box = widgetbox([x_select, y_select])

    tabs = []
    for league in leagues:
        tabs.append(Panel(child=row([widget_box, figs[league]]), title=league.upper()))
    tab_layout = Tabs(tabs=tabs)
    doc.add_root(tab_layout)
show(basketball_viz)

# Bokeh Server

Connect front-end UI events (JavaScript) to running Python code



In [26]:
dot = dot_graphs.bokeh_server()

<center>
    <img src="images/bokeh_serve.svg"
         align="center"
         width="100%">
</center>

In [27]:
import pandas as pd
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, curdoc

In [28]:
df1 = pd.read_csv('~/Downloads/EcoTotem_Capital_City_trail_Bike_counts.csv', usecols=['Count_Date', 'Count'])
df2 = pd.read_csv('~/Downloads/EcoTotem_Southwest_Path_Bike_Counts.csv', usecols=['Count_Date', 'Count'])
df = df1.merge(df2, on='Count_Date', how='outer', suffixes=('_cap', '_sw'))
df['Count_Date'] = pd.to_datetime(df['Count_Date'])
df['Count_Date_end'] = df['Count_Date'] + pd.Timedelta('1 hour')
df['Hour'] = df['Count_Date'].dt.hour
for sfx in ['_cap', '_sw']:
    df['Count_cumsum'+sfx] = np.cumsum(df['Count'+sfx])

In [29]:
source = ColumnDataSource(df)
source.data

{'index': array([    0,     1,     2, ..., 39698, 39699, 39700]),
 'Count_Date': array(['2015-06-23T11:00:00.000000000', '2015-06-23T12:00:00.000000000',
        '2015-06-23T13:00:00.000000000', ...,
        '2015-06-23T08:00:00.000000000', '2015-06-23T09:00:00.000000000',
        '2015-06-23T10:00:00.000000000'], dtype='datetime64[ns]'),
 'Count_cap': array([ 92., 191., 156., ...,  nan,  nan,  nan]),
 'Count_sw': array([121., 126., 144., ..., 203., 124.,  77.]),
 'Count_Date_end': array(['2015-06-23T12:00:00.000000000', '2015-06-23T13:00:00.000000000',
        '2015-06-23T14:00:00.000000000', ...,
        '2015-06-23T09:00:00.000000000', '2015-06-23T10:00:00.000000000',
        '2015-06-23T11:00:00.000000000'], dtype='datetime64[ns]'),
 'Hour': array([11, 12, 13, ...,  8,  9, 10]),
 'Count_cumsum_cap': array([ 92., 283., 439., ...,  nan,  nan,  nan]),
 'Count_cumsum_sw': array([1.210000e+02, 2.470000e+02, 3.910000e+02, ..., 1.705680e+06,
        1.705804e+06, 1.705881e+06])}

In [30]:
p = figure(x_axis_type='datetime')
p.line(source=source, x='Count_Date', y='Count_cumsum_cap', legend_label='Cumulative Rider Counts (Capitol City Trail)',
       color='red')
p.line(source=source, x='Count_Date', y='Count_cumsum_sw', legend_label='Cumulative Rider Counts (SW Trail)',
       color='blue')

p2 = figure(x_axis_type='datetime')
p2.quad(source=source, bottom=0, top='Count_cap', left='Count_Date', right='Count_Date_end', color='red', fill_alpha=0.8,
        alpha=0.2, legend_label='Rider Counts (Capitol City Trail)', line_color=None)
p2.quad(source=source, bottom=0, top='Count_cap', left='Count_Date', right='Count_Date_end', color='blue', fill_alpha=0.8,
        alpha=0.2, legend_label='Rider Counts (SW Trail)', line_color=None)
p2.legend.location = 'top_left'
p2.legend.click_policy="hide"
show(column(p, p2))

In [31]:
from bokeh.models.widgets import DateRangeSlider
from bokeh.layouts import column, row, WidgetBox

def modify_doc(doc):
    df_hour = df.groupby(df['Hour']).mean()
    source = ColumnDataSource({
        'Hour_left': df_hour['Hour'],
        'Hour_right': df_hour['Hour'] + 1,
        'Count': df_hour['Count'],
    })

    def update_data(attr, old, new):
        mask = df['Count_Date'].dt.date >= date_select.value_as_date[0]
        mask = mask & (df['Count_Date'].dt.date <= date_select.value_as_date[1])
        df_hour = df[mask].groupby(df[mask]['Hour']).mean()
        source.data.update(
            Hour_left=df_hour['Hour'],
            Hour_right=df_hour['Hour'] + 1,
            Count=df_hour['Count'],
        )

    start = df['Count_Date'].dt.date[0]
    end = list(df['Count_Date'].dt.date)[-1]
    date_select = DateRangeSlider(start=start, end=end, step=1, value=[start, end])
    date_select.on_change('value', update_data)
    widgets = WidgetBox(date_select)

    p = figure(y_range=(0, 400))
    p.quad(source=source, bottom=0, left='Hour_left', right='Hour_right', top='Count', fill_alpha=0.6)
    lay = column(widgets, p, sizing_mode='scale_width')
    doc.add_root(lay)
show(modify_doc)

ERROR:tornado.application:Uncaught exception GET /autoload.js?bokeh-autoload-element=3079&bokeh-absolute-url=http://localhost:56832&resources=none (::1)
HTTPServerRequest(protocol='http', host='localhost:56832', method='GET', uri='/autoload.js?bokeh-autoload-element=3079&bokeh-absolute-url=http://localhost:56832&resources=none', version='HTTP/1.1', remote_ip='::1')
Traceback (most recent call last):
  File "/Users/zgriffith/miniconda3/envs/int-viz/lib/python3.8/site-packages/pandas/core/indexes/base.py", line 2897, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 107, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 131, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1607, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1614, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'nba'

During handling 