# Visualization with Bokeh
Demo, showing python bokeh visualization library.

Victor Kitov, <v.v.kitov@yandex.ru>, <https://victorkitov.github.io>

To install Bokeh use `conda install bokeh` or `pip install bokeh`

### Intro
* Matplotlib outputs png image (fast, but static)
* Bokeh outputs vector image (slower, but interactive - can zoom in/out, make data selections)
    * can be made faster by subsampling data (Bokeh can do this)

In [1]:
%pylab inline
%precision 6

Populating the interactive namespace from numpy and matplotlib


'%.6f'

In [2]:
import numpy as np
from numpy import *
np.set_printoptions(linewidth=140,    # reserve wide area for text output 
                    edgeitems=10)     # show more elements at the beginning and the end of numpy arrays

In [3]:
import sklearn as skl
import pandas as pd
from pdb import set_trace as bp

In [4]:
import bokeh
from bokeh.models import BasicTicker, ColorBar, ColumnDataSource, LinearColorMapper, PrintfTickFormatter, FactorRange, Range1d, ColorBar
from bokeh.transform import transform, linear_cmap, factor_cmap, factor_mark, jitter
from bokeh.plotting import figure, output_file, output_notebook, show
from bokeh.colors import RGB # RGB(r,g,b) - convenient way to define a color in Bokeh
from bokeh.palettes import Spectral6, Viridis256
from bokeh.io import show, output_file
from bokeh.core.properties import value
from bokeh.util.hex import hexbin
from bokeh.layouts import column,row,gridplot

In [5]:
# output_file(filename)  # bokeh output to static HTML file, opened in new tab
output_notebook()        # bokeh output directly to notebook

In [6]:
from functools import partial  # allows calling function with prespecified parameters
figure = partial(figure, plot_width=350, plot_height=350) # now all fugures will have default 350x350 size (if not redefined)

# Linear plot

In [7]:
p = figure()
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
show(p)

### Constrain view area

In [9]:
p = figure()
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
p.x_range.bounds=(0, None)
p.y_range.bounds=(0, None)
show(p)

### Init view area

In [10]:
p = figure()
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
p.x_range=Range1d(0,6)
p.y_range=Range1d(2,6,bounds=(0, None))
show(p)

### Specify tools in the toolbar

In [11]:
p = figure(tools="pan,box_zoom,reset,crosshair,wheel_zoom,box_select,lasso_select,save")
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
show(p)

### Remove toolbar

In [12]:
p = figure(toolbar_location=None)
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
show(p)

### Redefine figure size

In [13]:
p = figure(plot_width=500, plot_height=200)
p.line([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], line_width=2)
show(p)

### Step plot vs linear plot

In [14]:
x=linspace(0,2,10)
y2=x**2

p = figure()
p.line(x, y2, legend="y=x^2 linear plot", line_color="blue")
p.step(x, y2, legend="y=x^2 step plot", line_color="green",mode="center")

p.legend.location = "top_left"
show(p)

### Control line color

In [15]:
x=linspace(0,2,10)
y1=x
y2=x**2
y3=x**3

p = figure()
p.line(x, y1, legend="y=x", line_color="red")
p.line(x, y2, legend="y=x^2", line_color="green")
p.line(x, y3, legend="y=x^3", line_color="blue")

p.legend.location = "top_left"
show(p)

### Control line type

In [16]:
x=linspace(0,2,10)
y1=x
y2=x**2
y3=x**3

p = figure()
p.line(x, y1, legend="y=x", line_dash='dashed')
p.line(x, y2, legend="y=x^2", line_dash='dotted')
p.line(x, y3, legend="y=x^3", line_dash='dotdash')

p.legend.location = "top_left"
show(p)

### Add markers to plot

In [17]:
x=linspace(0,2,10)
y1=x
y2=x**2
y3=x**3

p = figure()
p.line(x, y1, legend="y=x", line_color="red")
p.circle(x, y1, fill_color="white", size=8)

p.line(x, y2, legend="y=x^2", line_color="green")
p.triangle(x, y2, fill_color="white", size=8)

p.line(x, y3, legend="y=x^3", line_color="blue")
p.square(x, y3, fill_color="white", size=8)

p.legend.location = "top_left"
show(p)

### Multiple plots

In [18]:
x=linspace(0,2,10)
y1=x
y2=x**2
y3=x**3

s1 = figure(title="y=x")
s1.title.align='center'
s1.line(x, y1, line_color="red")

s2 = figure(title="y=x^2")
s2.title.align='center'
s2.line(x, y2, line_color="green")

s3 = figure(title="y=x^3")
s3.title.align='center'
s3.line(x, y3, line_color="blue")

In [19]:
show(column(s1, s2, s3))

In [20]:
show(row(s1, s2, s3))

In [21]:
p = gridplot([[s1, s2, s3]], toolbar_location=None)
show(p)

In [22]:
p = gridplot([[s1, s2], [None,s3]], toolbar_location=None)
show(p)

In [23]:
p = gridplot([s1, s2, s3], ncols=2)
show(p)

### Syncronous movement

In [24]:
x=linspace(0,2,10)
y1=x
y2=x**2
y3=x**3

s1 = figure(title="y=x")
s1.title.align='center'
s1.line(x, y1, line_color="red")

s2 = figure(title="y=x^2", x_range=s1.x_range, y_range=s1.y_range,)
s2.title.align='center'
s2.line(x, y2, line_color="green")

s3 = figure(title="y=x^3", x_range=s1.x_range, y_range=s1.y_range,)
s3.title.align='center'
s3.line(x, y3, line_color="blue")

p = gridplot([[s1, s2, s3]], toolbar_location=None)
show(p)

### Syncronous selection (try selecting points on one plot)

In [25]:
N = 300
x = np.linspace(0, 4*np.pi, N)
y0 = np.sin(x)
y1 = np.cos(x)

# output to static HTML file
output_notebook()

# NEW: create a column data source for the plots to share
source = ColumnDataSource(data=dict(x=x, y0=y0, y1=y1))

TOOLS = "pan,wheel_zoom,box_zoom,reset,save,box_select,lasso_select"

# create a new plot and add a renderer
left = figure(tools=TOOLS, title=None)
left.circle('x', 'y0', source=source)

# create another new plot and add a renderer
right = figure(tools=TOOLS, title=None)
right.circle('x', 'y1', source=source)

# put the subplots in a gridplot
p = gridplot([[left, right]])

# show the results
show(p)

In [26]:
from bokeh.sampledata.stocks import AAPL  # on first use need to call 'bokeh.sampledata.download()'

# prepare some data
aapl = np.array(AAPL['adj_close'])
aapl_dates = np.array(AAPL['date'], dtype=np.datetime64)

window_size = 30  
window = np.ones(window_size)/float(window_size)
aapl_avg = np.convolve(aapl, window, 'same') # make rolling average with window size=30


# create a new plot with a a datetime axis type
p = figure(plot_width=800, plot_height=350, x_axis_type="datetime")

# add renderers
p.circle(aapl_dates, aapl, size=4, color='darkgrey', alpha=0.2, legend='close')
p.line(aapl_dates, aapl_avg, color='navy', legend='avg')

# NEW: customize by setting attributes
p.title.text = "AAPL One-Month Average"
p.legend.location = "top_left"
p.grid.grid_line_alpha = 0
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'Price'
p.ygrid.band_fill_color = "olive"
p.ygrid.band_fill_alpha = 0.1

# show the results
show(p)

# Bar plot

### Vectical bar plot

In [27]:
p = figure(plot_width=400, plot_height=400)
p.vbar(x=[1, 2, 3], width=0.5, bottom=0,
       top=[1.2, 2.5, 3.7], color="firebrick")

show(p)

### Horizontal bar plot

In [28]:
p = figure(plot_width=400, plot_height=400)
p.hbar(y=[1, 2, 3], height=0.5, left=0,
       right=[1.2, 2.5, 3.7], color="navy")

show(p)

# Scatter plot

In [29]:
from sklearn.datasets.samples_generator import make_blobs

In [30]:
X, Y = make_blobs(n_samples=99, centers=3, n_features=2, cluster_std=0.3, random_state=0)
X.shape, Y.shape

((99, 2), (99,))

### Visualizing classification

#### Uniform scatter plot

In [31]:
p = figure(title="Blobs dataset", plot_width=350, plot_height=350)
p.scatter(X[:,0], X[:,1], marker="circle", size=8, color='blue', alpha=0.5, line_color=None)
show(p)

#### Show y with color

In [32]:
p = figure(title="Blobs dataset", plot_width=350, plot_height=350)
class2color={0:'red',1:'green',2:'blue'}
p.scatter(X[:,0], X[:,1], marker="circle", size=8, color=[class2color[y] for y in Y], alpha=0.5)
show(p)

#### Show y with marker

In [33]:
p = figure(title="Blobs dataset", plot_width=350, plot_height=350)
class2marker={0:'circle',1:'square',2:'triangle'}
p.scatter(X[:,0], X[:,1], marker=[class2marker[y] for y in Y], size=8, alpha=0.5)
show(p)

#### Show y with opacity

In [34]:
p = figure(title="Blobs dataset", plot_width=350, plot_height=350)
class2opacity={0:1,1:0.6,2:0.2}
p.scatter(X[:,0], X[:,1], size=8, alpha=[class2opacity[y] for y in Y])
show(p)

# Visualizing regression

In [35]:
N=1000
X=3*randn(N,2)
Y=X[:,0]**2+X[:,1]**2

In [36]:
#Use the field name of the column source
mapper = linear_cmap(field_name='Y', palette=Viridis256, low=min(Y), high=max(Y))
source = ColumnDataSource(dict(X1=X[:,0],X2=X[:,1],Y=Y))

p = figure(match_aspect=True, plot_width=300, plot_height=300, title="Linear Color Map Based on Y")

p.circle(x='X1', y='X2', line_color=None, color=mapper, fill_alpha=0.6, size=5, source=source)

color_bar = ColorBar(color_mapper=mapper['transform'], width=10,  location=(0,0))

p.add_layout(color_bar, 'right')

show(p)

### Using factor_mark, factor_cmap to map class with market and color

In [37]:
from bokeh.sampledata.iris import flowers

SPECIES = ['setosa', 'versicolor', 'virginica']
MARKERS = ['hex', 'circle_x', 'triangle']

p = figure(title = "Iris Morphology")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Sepal Width'

p.scatter("petal_length", "sepal_width", source=flowers, legend="species", fill_alpha=0.4, size=12,
          marker=factor_mark('species', MARKERS, SPECIES),
          color=factor_cmap('species', 'Category10_3', SPECIES))

show(p)

### Plot image

In [38]:
# create an array of RGBA data
N = 20
img = np.empty((N, N,4), dtype=np.uint8)
img[:,:,3]=255

for i in range(N):
    for j in range(N):
        img[i, j, 0] = int(255 * i / N)
        img[i, j, 1] = int(255 * j / N)
        img[i, j, 2] = 0

p = figure(plot_width=400, plot_height=400, x_range=(0, 10), y_range=(0, 10))

p.image_rgba(image=[img], x=[0], y=[0], dw=[10], dh=[10])

show(p)

# Hexbin plot

In [40]:
n = 50000
x = np.random.standard_normal(n)
y = np.random.standard_normal(n)

bins = hexbin(x, y, 0.1)

p = figure(match_aspect=True, background_fill_color='#440154')
p.grid.visible = False

p.hex_tile(q="q", r="r", size=0.1, line_color=None, source=bins,
           fill_color=linear_cmap('counts', 'Viridis256', 0, max(bins.counts)))

show(p)

# Categorical data

### Bar plot

In [39]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

p = figure(x_range=fruits, plot_height=250, title="Fruit Counts",
           toolbar_location=None)

p.vbar(x=fruits, top=counts, width=0.9)  # x axis tied to categories

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

### Bar plot - display in sorted order

In [40]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

# sorting the bars means sorting the range factors
sorted_fruits = sorted(fruits, key=lambda x: counts[fruits.index(x)])

p = figure(x_range=sorted_fruits, plot_height=350, title="Fruit Counts",
           toolbar_location=None, tools="")

p.vbar(x=fruits, top=counts, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

### Add color to barplot

In [41]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

data=dict(fruits=fruits, counts=counts, color=[RGB(i,0,0) for i in linspace(0,255,6)])
source = ColumnDataSource(data)

p = figure(x_range=fruits, y_range=(0,9), plot_height=250, title="Fruit Counts",
           toolbar_location=None, tools="")

p.vbar(x='fruits', top='counts', width=0.9, color='color', legend="fruits", source=data)

p.xgrid.grid_line_color = None
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

### Stacked bar plot

In [42]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
colors = ["#c9d9d3", "#718dbf", "#e84d60"]

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 4, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}

p = figure(x_range=fruits, plot_height=250, title="Fruit Counts by Year",
           toolbar_location=None, tools="")

p.vbar_stack(years, x='fruits', width=0.9, color=colors, source=ColumnDataSource(data),
             legend=[value(x) for x in years])

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

### Show information on hover

In [43]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
years = ["2015", "2016", "2017"]
colors = ["#c9d9d3", "#718dbf", "#e84d60"]

data = {'fruits' : fruits,
        '2015'   : [2, 1, 4, 3, 2, 4],
        '2016'   : [5, 3, 4, 2, 4, 6],
        '2017'   : [3, 2, 4, 4, 5, 3]}

p = figure(x_range=fruits, plot_height=250, title="Fruit Counts by Year",
           toolbar_location=None, tools="hover", tooltips="$name @fruits: @$name")

p.vbar_stack(years, x='fruits', width=0.9, color=colors, source=data,
             legend=[value(x) for x in years])

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.axis.minor_tick_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

### Grouped bar plot

In [41]:
factors = [
    ("Q1", "jan"), ("Q1", "feb"), ("Q1", "mar"),
    ("Q2", "apr"), ("Q2", "may"), ("Q2", "jun"),
    ("Q3", "jul"), ("Q3", "aug"), ("Q3", "sep"),
    ("Q4", "oct"), ("Q4", "nov"), ("Q4", "dec"),

]

p = figure(x_range=FactorRange(*factors), plot_height=250,
           toolbar_location=None, tools="")

x = [ 10, 12, 16, 9, 10, 8, 12, 13, 14, 14, 12, 16 ]
p.vbar(x=factors, top=x, width=0.9, alpha=0.5)

p.line(x=["Q1", "Q2", "Q3", "Q4"], y=[12, 9, 13, 14], color="red", line_width=2)

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)

### Stacked&grouped bar plot

In [45]:
factors = [
    ("Q1", "jan"), ("Q1", "feb"), ("Q1", "mar"),
    ("Q2", "apr"), ("Q2", "may"), ("Q2", "jun"),
    ("Q3", "jul"), ("Q3", "aug"), ("Q3", "sep"),
    ("Q4", "oct"), ("Q4", "nov"), ("Q4", "dec"),

]

regions = ['east', 'west']

source = ColumnDataSource(data=dict(
    x=factors,
    east=[ 5, 5, 6, 5, 5, 4, 5, 6, 7, 8, 6, 9 ],
    west=[ 5, 7, 9, 4, 5, 4, 7, 7, 7, 6, 6, 7 ],
))

p = figure(x_range=FactorRange(*factors), plot_height=250,
           toolbar_location=None, tools="")

p.vbar_stack(regions, x='x', width=0.9, alpha=0.5, color=["blue", "red"], source=source,
             legend=[value(x) for x in regions])

p.y_range.start = 0
p.y_range.end = 18
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None
p.legend.location = "top_center"
p.legend.orientation = "horizontal"

show(p)


### Display events on numeric scale (github commits grouped by weekday and time)

In [46]:
from bokeh.sampledata.commits import data
DAYS = ['Sun', 'Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon']

source = ColumnDataSource(data)

p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_axis_type='datetime',
           title="Commits by Time of Day (US/Central) 2012—2016")

p.circle(x='time', y='day',  source=source, alpha=0.3)

p.xaxis[0].formatter.days = ['%Hh']
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None

show(p)

### Add jitter to see actual counts of events better

In [47]:
from bokeh.sampledata.commits import data
DAYS = ['Sun', 'Sat', 'Fri', 'Thu', 'Wed', 'Tue', 'Mon']

source = ColumnDataSource(data)

p = figure(plot_width=800, plot_height=300, y_range=DAYS, x_axis_type='datetime',
           title="Commits by Time of Day (US/Central) 2012—2016")

p.circle(x='time', y=jitter('day', width=0.6, range=p.y_range),  source=source, alpha=0.3)

p.xaxis[0].formatter.days = ['%Hh']
p.x_range.range_padding = 0
p.ygrid.grid_line_color = None

show(p)

### Heatmap (continious variable in space of 2 categorical values)

In [48]:
from bokeh.sampledata.unemployment1948 import data


data.Year = data.Year.astype(str)
data = data.set_index('Year')
data.drop('Annual', axis=1, inplace=True)
data.columns.name = 'Month'

# reshape to 1D array or rates with a month and year for each row.
df = pd.DataFrame(data.stack(), columns=['rate']).reset_index()

source = ColumnDataSource(df)

# this is the colormap from the original NYTimes plot
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
mapper = LinearColorMapper(palette=colors, low=df.rate.min(), high=df.rate.max())

p = figure(plot_width=900, plot_height=400, title="US Unemployment 1948—2016",
           x_range=list(data.index), y_range=list(reversed(data.columns)),
           toolbar_location=None, tools="", x_axis_location="above")

p.rect(x="Year", y="Month", width=1, height=1, source=source,
       line_color=None, fill_color=transform('rate', mapper))

color_bar = ColorBar(color_mapper=mapper, location=(0, 0),
                     ticker=BasicTicker(desired_num_ticks=len(colors)),
                     formatter=PrintfTickFormatter(format="%d%%"))

p.add_layout(color_bar, 'right')

p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "5pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.0

show(p)

# Colormaps

### `linear_cmap` maps continious categories to colors

In [49]:
x = [1,2,3,4,5,7,8,9,10]
y = [1,2,3,4,5,7,8,9,10]

#Use the field name of the column source
mapper = linear_cmap(field_name='y', palette=Viridis256, low=min(y), high=max(y))

source = ColumnDataSource(dict(x=x,y=y))

p = figure(plot_width=300, plot_height=300, title="Linear Color Map Based on Y")

p.circle(x='x', y='y', line_color=mapper, color=mapper, fill_alpha=1, size=12, source=source)

color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))

p.add_layout(color_bar, 'right')

show(p)

### `factor_cmap` maps discrete categories to colors

In [50]:
fruits = ['Apples', 'Pears', 'Nectarines', 'Plums', 'Grapes', 'Strawberries']
counts = [5, 3, 4, 2, 4, 6]

source = ColumnDataSource(data=dict(fruits=fruits, counts=counts))

p = figure(x_range=fruits, plot_height=250, toolbar_location=None, title="Fruit Counts")
p.vbar(x='fruits', top='counts', width=0.9, source=source, legend="fruits",
       line_color='white', fill_color=factor_cmap('fruits', palette=Spectral6, factors=fruits))

p.xgrid.grid_line_color = None
p.y_range.start = 0
p.y_range.end = 9
p.legend.orientation = "horizontal"
p.legend.location = "top_center"

show(p)

### Addition:
    * bokeh integrates with pandas (e.g. can work with dataframe indices, dataframes grouped by some features)
    * can visualize geospatial data on a map
    * can visualize graph data (nodes, arrows)