In [1]:
import charts
from bkcharts import Bar, output_file, show
from bokeh.io import output_notebook
from bokeh.plotting import figure
from bokeh.layouts import layout
from bkcharts import Donut
from bkcharts.utils import df_from_json
from bokeh.models.widgets import Div

import pandas as pd
from bokeh.layouts import widgetbox

import numpy as np
from bokeh.palettes import RdGy, PuBu

regions = charts.loadRegionData()
ages = charts.loadChartData()
output_notebook()

In [4]:
""" Data Processing """
""" The number of Countries that data > 0 in each area"""

result0 = {}
for age in ages:
    tmp = regions.get(age[0])
    if tmp:
        if result0.get(tmp):
            result0[tmp] += 1
        else: result0[tmp] = 1
# print result0

result1 = {}
for age in ages:
    if age[1] > 0:
        tmp = regions.get(age[0])
        if tmp:
            if result1.get(tmp):
                result1[tmp] += 1
            else: result1[tmp] = 1
# print result1

In [5]:
""" CHART ONE: STACK BAR CHART """

tag = []
barName = []
billionaires = []

for key in result0.keys():
    tag.append("non-exists")
    tag.append("exists")
    barName.append(key)
    barName.append(key)
    billionaires.append(result1[key])
    billionaires.append(result0[key] - result1[key])

# print tag
# print barName
# print billionaires

In [13]:
div = Div(text="""The dataset I selected for the homework is <b>Average age of dollar billionaires (years)</b> and 
gaminder_regions.csv file. By using those two files, I could see the distribution of billionaires for each region 
and also see the change of each country's average age of billionaires. This data visualization would require some data
processing job first and then, use bokeh for plotting. The charts I tried contain stacked bar chart, pie chart, scatter
plot and line plot.""",
width=800, height=100)

show(widgetbox(div))

In [7]:
data = {'tag': tag, 'barName': barName, 'billionaires':billionaires}
# print data
bar = Bar(data, values='billionaires', label='barName', stack='tag', agg='sum',
          title="The number of countries which have billionaires and not", legend='top_right', 
          plot_width=600, plot_height = 500, color=['#ff8080','#cc0000'])

"""Reference-Stacked Bar Chart: http://bokeh.pydata.org/en/0.12.0/docs/reference/charts.html"""

show(bar, notebook_handle=True)

In [10]:
""" CHART TWO: DONUT CHART """
"""Reference-Donut Chart: http://bokeh.pydata.org/en/0.12.0/docs/reference/charts.html"""

donutData = {'count': 241, 'object': 'list',
             'data': [{'name': 'East Asia & Pacific', 'numbers': {'total': 46, 'ex': 11, 'no-ex': 35}},
                      {'name': 'Sub-Saharan Africa', 'numbers': {'total': 50, 'ex': 1, 'no-ex': 49}},
                      {'name': 'Middle East & North Africa', 'numbers': {'total': 21, 'ex': 5, 'no-ex': 16}},
                      {'name': 'South Asia', 'numbers': {'total': 8, 'ex': 1, 'no-ex': 7}},
                      {'name': 'America', 'numbers': {'total': 51, 'ex': 8, 'no-ex': 43}},
                      {'name': 'Europe & Central Asia', 'numbers': {'total': 65, 'ex': 18, 'no-ex': 47}}]}

df = df_from_json(donutData)
df = df.sort_values(by="total", ascending=False)
df = pd.melt(df, id_vars=['name'],
             value_vars=['ex', 'no-ex'],
             value_name='medal_count', var_name='medal')

# Reference: Donut chart for medals - Bokeh
d = Donut(df, label=['name', 'medal'], values='medal_count',
          text_font_size='7pt', hover_text='country_count', plot_height=600, plot_width=600,
          title = 'Pie Chart for the number of countries which have billionaires',
          # color = PuBu)
          color=['#084594', '#2171b5', '#4292c6', '#6baed6', '#9ecae1', '#c6dbef', '#deebf7', '#f7fbff'])

show(d, notebook_handle=True)

In [17]:
""" Chart Three: Scatter Plot """
""" Reference: https://bokeh.pydata.org/en/latest/docs/gallery/color_scatter.html"""
x, y, radius = charts.scatterData(ages, regions)
colors = ["#E6B8FF" for r in x]

TOOLS="hover, crosshair, pan, wheel_zoom, zoom_in, zoom_out, box_zoom, undo,\
       redo, reset, tap, save, box_select, poly_select, lasso_select,"

p_scatter = figure(tools=TOOLS, plot_width = 800, plot_height = 400, 
                   title = "Scatter Plot for the average age of billionaires for each country")

p_scatter.xaxis.axis_label = 'regions: East Asia & Pacific, Sub-Saharan Africa, Middle East & North Africa, South Asia, America, Europe & Central Asia' 
p_scatter.yaxis.axis_label = 'Average Age'

p_scatter.scatter(x, y, radius=radius,
          fill_color=colors, fill_alpha=0.9,
          line_color=None)

show(p_scatter)

In [14]:
""" Chart Four: Line Plot"""
"""Reference Line: https://bokeh.pydata.org/en/latest/docs/gallery/stocks.html"""

p1 = figure(x_axis_type="linear", title="Average age of billionaires (Click legend for hiding lines)",
            plot_width = 1000)
p1.grid.grid_line_alpha=0.3
p1.xaxis.axis_label = 'Date'
p1.yaxis.axis_label = 'Average Age'

line = charts.lineData(ages)
# print line
tmp =  RdGy.values()
lineColor = []
for colorlist in tmp:
    lineColor += colorlist
# print lineColor

i = 0
for key in line.keys():
    p1.line([2004, 2005, 2006, 2007], line[key], color = lineColor[i], legend = key)
    i += 1

p1.legend.location = "top_left"
p1.legend.click_policy="hide"

show(p1)