# Data visualization in Python using Bokeh library

* Ref: [https://www.javatpoint.com/data-visualization-in-python-using-bokeh-library](https://www.javatpoint.com/data-visualization-in-python-using-bokeh-library)

### Create scatter circle markers

In [52]:
from bokeh.plotting import figure
from bokeh.plotting import output_notebook
from bokeh.plotting import show

# output to the notebook
output_notebook()

# creating figure
plot1 = figure(
    plot_width=500,
    plot_height=500,
    title="Scatter markers"
)

# adding a circle renderer with size, color and alpha
plot1.circle(
    [1, 2, 3, 4, 5],
    [2, 1, 6, 8, 0],
    size=12,
    color="green",
    alpha=1
)

# show the results
show(plot1)

### Create a single line

In [53]:
from bokeh.plotting import figure
from bokeh.plotting import output_notebook
from bokeh.plotting import show

# output to the notebook
output_notebook()

# creating figure
plot2 = figure(
    plot_width=500,
    plot_height=500,
    title="Line plot"
)

# adding a line renderer
plot2.line(
    [1, 2, 3, 4, 5],
    [2, 1, 6, 8, 0],
    line_width=4,
    color="red"
)

# show the results
show(plot2)

### Create a bar chart

In [54]:
from bokeh.plotting import figure
from bokeh.plotting import output_file
from bokeh.plotting import show

# file to save the model
# output_file("jtp.html")
output_notebook()

# vertical bar chart
plot3 = figure(title="Bokeh vertical bar graph")
x = [1, 2, 3, 4, 5, 6, 7, 8]
top = [5, 1, 4, 3, 2, 7, 6, 8]
width = 0.7

plot3.vbar(
    x,
    top=top,
    width=width,
    color="green"
)

show(plot3)

# horizontal bar chart
plot4 = figure(title="Bokeh horizontal bar graph")
x = [1, 2, 3, 4, 5, 6, 7, 8]
right = [5, 1, 4, 3, 2, 7, 6, 8]
height = 0.7

plot4.hbar(
    x,
    right=right,
    height=height,
    color="green"
)

show(plot4)

### Create box plot

In [55]:
import numpy as np
import pandas as pd

from bokeh.plotting import figure
from bokeh.plotting import show

# generate some time series for six different categories
cats1 = list("xyzprq") # cats1 = ['x', 'y', 'z', 'p', 'r', 'q']
y1 = np.random.randn(2000) # 用 normal distribution 產生 2000 個數
g1 = np.random.choice(cats_1, 2000) # 從 cats1 取 2000 次

for k, j in enumerate(cats_1):
    y1[g1 == j] += k //2
df = pd.DataFrame(dict(score=y1, group=g1))
display(df.head())

# now, we will find the quartiles and IQR for each category
df_group = df.groupby("group")
df_q1 = df_group.quantile(q=0.25)
df_q2 = df_group.quantile(q=0.5)
df_q3 = df_group.quantile(q=0.75)
df_iqr = df_q3 - df_q1
df_upper = df_q3 + 1.5 * df_iqr
df_lower = df_q1 - 1.5 * df_iqr

# find the outliers for each category
def outliers(df):
    cat2 = df.name
    return df[(df.score > df_upper.loc[cat2]["score"])|
              (df.score < df_lower.loc[cat2]["score"])]["score"]

out = df_group.apply(outliers).dropna() # 是一個由 outliers 組成的 pd.Series

# we will prepare outlier data to plot, we would be needing coordinates for every outlier
if not out.empty:
    out_x = list(out.index.get_level_values(0))
    out_y = list(out.values)

    
plot5 = figure(
    tools="",
    background_fill_color="#efefef",
    x_range=cats1,
    toolbar_location=None
)

# if no outliers, we will shrink the lengths of stems to be no longer than the minimums or maximums
df_q_min = df_group.quantile(q=0.0)
df_q_max = df_group.quantile(q=1.0)
df_upper.score = [min([x, y]) for (x, y) in zip(list(df_q_max.loc[:, "score"]), df_upper.score)]
df_lower.score = [max([x, y]) for (x, y) in zip(list(df_q_min.loc[:, "score"]), df_lower.score)]

# stems
plot5.segment(
    cats1,
    df_upper.score,
    cats1,
    df_q3.score,
    line_color="black"
)

plot5.segment(
    cats1,
    df_lower.score,
    cats1,
    df_q1.score,
    line_color="black"
)

# boxes
plot5.vbar(
    cats1,
    0.7,
    df_q2.score,
    df_q3.score,
    fill_color="#E08E79",
    line_color="black"
)

plot5.vbar(
    cats1,
    0.7,
    df_q1.score,
    df_q2.score,
    fill_color="#3B8686",
    line_color="black"
)

# whiskers (almost-0 height rects simpler than segments)
plot5.rect(
    cats1,
    df_lower.score,
    0.2,
    0.01,
    line_color="black"
)

plot5.rect(
    cats1,
    df_upper.score,
    0.2,
    0.01,
    line_color="black"
)

# outliers
if not out.empty:
    plot5.circle(
        out_x, 
        out_y,
        size=6,
        color="#F38630",
        fill_alpha=0.6
    )
    
plot5.xgrid.grid_line_color = None
plot5.ygrid.grid_line_color = "white"
plot5.grid.grid_line_width = 2
plot5.xaxis.major_label_text_font_size = "16px"

show(plot5)

Unnamed: 0,score,group
0,0.14479,x
1,0.176394,x
2,-0.171607,x
3,-0.465428,y
4,2.011693,p


### Create scatter plot

In [56]:
from bokeh.plotting import figure
from bokeh.plotting import output_notebook, output_file
from bokeh.plotting import show

# file for saving the model
# output_file("jtp.html")

output_notebook()

# instantiate the figure object
plot6 = figure(
    title="Bokeh scatter graph"
)

# the points to be plotted on scatter plot
x1 = [1.4, 5.1, 5.9, 2.3, 5.6, 
      8.6, 4.5, 2.1, 3.1, 4.3, 
      5.5, 4.4, 6.9, 2.1, 4, 
      5.2, 6.3, 7.2, 7.9, 2]
y1 = [3.4, 2.1, 5.7, 8.5, 4.3, 
      4.2, 5.7, 6.5, 8.9, 9.1, 
      1.5, 2.1, 6.8, 1, 6, 5.2, 
      4.5, 7.4, 7.5, 6.3]

# plot the graph
plot6.scatter(x1, y1)

# display the model
show(plot6)