In [1]:
import numpy as np
import pandas as pd

In [2]:
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, FactorRange
from bokeh.io import show, output_file, output_notebook
from bokeh.palettes import Spectral6, brewer, d3
from bokeh.transform import factor_cmap, linear_cmap

In [3]:
output_notebook()

In [4]:
import geoviews as gv
import holoviews as hv

In [5]:
hv.extension('bokeh')

In [6]:
import os

In [7]:
DATA_DIR = r'C:\Projects\Amazon\Applications\SA_Capetown'
DATA_FILE = os.path.join(DATA_DIR, 'SA_Capetown_Model_v3.csv')
print(DATA_FILE)

C:\Projects\Amazon\Applications\SA_Capetown\SA_Capetown_Model_v3.csv


In [8]:
df = pd.read_csv(DATA_FILE)
df.head()

Unnamed: 0,Employee_ID,Office,VoT,Coeff_Cost_Drive,Coeff_Cost_Transit,Coeff_Cost_Bike,Coeff_Cost_Walk,Commute_Distance_TOTAL,Commute_Distance_Oneway,Distance_Drive,...,Distance_Walk,Time_Walk,Cost_Walk,Utility_Walk,Prob_Walk,Prob_Total,Prediction,X,Y,Mode
0,sample1,CPT13_pt,13.04,-0.00115,-0.00115,-0.006902,-0.001725,16.22,8.11,16.22,...,18.65,373.06,0.0,-13.49,0.0,1.0,1.0,18.496877,-33.965254,DRIVE
1,sample10,CPT13_pt,13.04,-0.00115,-0.00115,-0.006902,-0.001725,2.16,1.08,2.16,...,2.48,49.68,0.0,-1.36,0.35,1.0,1.0,18.414839,-33.924405,DRIVE
2,sample100,CPT13_pt,13.04,-0.00115,-0.00115,-0.006902,-0.001725,17.98,8.99,17.98,...,20.68,413.54,0.0,-15.01,0.0,1.0,1.0,18.508354,-33.974077,DRIVE
3,sample1000,CPT10,13.04,-0.00115,-0.00115,-0.006902,-0.001725,17.72,8.86,17.72,...,20.38,407.56,0.0,-14.78,0.0,1.0,1.0,18.492304,-33.850579,DRIVE
4,sample1001,CPT10,13.04,-0.00115,-0.00115,-0.006902,-0.001725,20.9,10.45,20.9,...,24.04,480.7,0.0,-17.53,0.0,1.0,1.0,18.579903,-33.952412,DRIVE


### <span class="burk"><span class="girk">Individual Plots</span></span>

#### Total Mode Share

In [8]:
df_total_mode_share = df.groupby('Mode', as_index=False)['Employee_ID'].count().rename(columns={'Employee_ID':'Travelers'})
df_total_mode_share['Share'] = df_total_mode_share.Travelers / np.sum(df_total_mode_share.Travelers)
df_total_mode_share

Unnamed: 0,Mode,Travelers,Share
0,DRIVE,4706,0.760996
1,TRANSIT,1137,0.183862
2,WALK,341,0.055142


In [17]:
p = figure(x_range=df_total_mode_share.Mode, plot_height=400, title="Mode Share")
p.vbar(x='Mode', top='Share', width=0.5, source=ColumnDataSource(df_total_mode_share), legend="Mode", 
       fill_color=factor_cmap('Mode', palette=Spectral6, factors=df_total_mode_share.Mode))
show(p)

#### Mode Share by Offices

In [31]:
offices, modes = df.Office.unique(), df.Mode.unique()

In [32]:
index_office_mode = pd.MultiIndex.from_product((offices, modes))
index_office_mode

MultiIndex(levels=[['CPT10', 'CPT11_pt', 'CPT12_pt', 'CPT13_pt', 'CPT2'], ['DRIVE', 'TRANSIT', 'WALK']],
           codes=[[3, 3, 3, 0, 0, 0, 4, 4, 4, 2, 2, 2, 1, 1, 1], [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]])

In [44]:
df_modes_by_offices = df.groupby(['Office', 'Mode'])['Employee_ID'].count()
df_modes_by_offices = pd.DataFrame(df_modes_by_offices.reindex(index_office_mode)).rename(columns={'Employee_ID':'Travelers'})
df_modes_by_offices['Share'] = df_modes_by_offices.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
df_modes_by_offices = df_modes_by_offices.reset_index().rename(columns={'level_0': 'Office', 'level_1': 'Mode'})
df_modes_by_offices

Unnamed: 0,Office,Mode,Travelers,Share
0,CPT13_pt,DRIVE,521,95.772059
1,CPT13_pt,TRANSIT,18,3.308824
2,CPT13_pt,WALK,5,0.919118
3,CPT10,DRIVE,1930,94.561489
4,CPT10,TRANSIT,51,2.498775
5,CPT10,WALK,60,2.939735
6,CPT2,DRIVE,1894,59.61599
7,CPT2,TRANSIT,1058,33.301857
8,CPT2,WALK,225,7.082153
9,CPT12_pt,DRIVE,143,91.082803


In [45]:
ds_modes_by_offices = hv.Dataset(df_modes_by_offices, kdims=['Office', 'Mode'], vdims=['Travelers', 'Share'])
ds_modes_by_offices

:Dataset   [Office,Mode]   (Travelers,Share)

In [61]:
curves = ds_modes_by_offices.to(hv.Bars, kdims=['Mode'], vdims=['Share'])
curves.opts(color=hv.dim('Mode'))
curves

In [50]:
ds_modes_by_offices_gb = ds_modes_by_offices.groupby('Office')
ds_modes_by_offices_gb.data[('CPT10',)].data

Unnamed: 0,Office,Mode,Travelers,Share
3,CPT10,DRIVE,1930,94.561489
4,CPT10,TRANSIT,51,2.498775
5,CPT10,WALK,60,2.939735


In [37]:
curve = hv.Bars(df_modes_by_offices, kdims=['level_0', 'level_1'], vdims=['Share'])
curve.opts(width=600)

In [None]:
kdims = [hv.Dimension(('level_0', 'Office')),
         hv.Dimension(('level_1', 'Mode'))]
holomap = hv.HoloMap(df_modes_by_offices, kdims=kdims)
holomap.opts(opts.Curve(width=600))

In [31]:
p = figure(x_range=df_modes_by_offices.Mode, plot_height=400, title="Mode Share")
p.vbar(x='Mode', top='Share', width=0.5, source=ColumnDataSource(df_total_mode_share), legend="Mode", 
       fill_color=factor_cmap('Mode', palette=Spectral6, factors=df_total_mode_share.Mode))
show(p)

Unnamed: 0,Unnamed: 1,Travelers,Share
CPT13_pt,DRIVE,521,95.772059
CPT13_pt,TRANSIT,18,3.308824
CPT13_pt,WALK,5,0.919118
CPT10,DRIVE,1930,94.561489
CPT10,TRANSIT,51,2.498775
CPT10,WALK,60,2.939735
CPT2,DRIVE,1894,59.61599
CPT2,TRANSIT,1058,33.301857
CPT2,WALK,225,7.082153
CPT12_pt,DRIVE,143,91.082803


In [73]:
cds = ColumnDataSource(df_modes_by_offices)
p = figure(x_range=FactorRange(*(index_office_mode.to_list())), plot_height=400, title="Mode Share by Office", tools="hover")
p.vbar(x='index', top='Share', width=0.5, source=cds, line_color="white",
       fill_color=factor_cmap('index', palette=d3['Category10'][3], factors=df_modes_by_offices.index.levels[1], start=1, end=2))
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
show(p)

#### Commute Distance Distribution

In [80]:
bins = np.arange(0, 62, 2)
hist, edges = np.histogram(df.Commute_Distance_Oneway, density=True, bins=bins)

In [84]:
p = figure(plot_width=800, title='One-Way Commute Distance', background_fill_color="#fafafa")
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:],
       fill_color="navy", line_color="white", alpha=0.5)

p.y_range.start = 0
p.xaxis.axis_label = 'Distance (Miles)'
p.yaxis.axis_label = 'Proportion'
p.grid.grid_line_color="white"
show(p)

#### Mode Choices by Distance

In [131]:
bins = np.arange(0, 62, 2)
shares_by_distance = {}
for m in df.Mode.unique():
    share = pd.cut(df.loc[df.Mode == m, 'Commute_Distance_Oneway'], bins=bins).value_counts().sort_index().values
    share = share / np.sum(share)
    shares_by_distance[m] = share

In [139]:
from scipy import interpolate
colors = d3['Category20'][3]
p = figure(plot_width=800, title='Mode Share by Commute Distance', background_fill_color="#fafafa")
for (m, s), color in zip(shares_by_distance.items(), colors):
    xp, fp = bins[:-1], s
    xs = np.linspace(0, 50, 1000)
    f = interpolate.interp1d(xp, fp, kind='cubic')
    ys = f(xs)
    p.line(xs, ys, line_color=color, line_width=4, alpha=0.7, legend=m)
p.legend.background_fill_color = "#fefefe"
p.y_range.start = 0
p.x_range.start, p.x_range.end = 0, 50
p.xaxis.axis_label = 'Distance (Miles)'
show(p)

ModuleNotFoundError: No module named 'scipy'

#### Hexbin of Employee Distribution

In [27]:
from geoviews import opts

In [None]:
opts.HexTiles()

In [25]:
h = gv.HexTiles(df, ['X', 'Y'])


In [26]:
h.opts.info()

:HexTiles   [X,Y]   (Employee_ID,Office,VoT,Coeff_Cost_Drive,Coeff_Cost_Transit,Coeff_Cost_Bike,Coeff_Cost_Walk,Commute_Distance_TOTAL,Commute_Distance_Oneway,Distance_Drive,Time_Drive,Cost_Drive,Utility_Drive,Prob_Drive,Distance_Transit,Time_Transit,Time_Walk_AccEgr,Wait_Transit,Cost_Transit,Utility_Transit,Prob_Transit,Distance_Bike,Time_Bike,Cost_Bike,Utility_Bike,Prob_Bike,Distance_Walk,Time_Walk,Cost_Walk,Utility_Walk,Prob_Walk,Prob_Total,Prediction,Mode)


In [21]:
p.kdims, p.vdims

([Dimension('X'), Dimension('Y')], [])

In [28]:
overlay = gv.WMTS('https://mt1.google.com/vt/lyrs=m&x={X}&y={Y}&z={Z}') * gv.HexTiles(df, ['X', 'Y'],)

In [30]:
overlay.opts(gv.opts.HexTiles(width=800, height=600, alpha=0.5, tools=['hover'], colorbar=True), gv.opts.WMTS(width=800))

In [23]:
gv.WMTS('https://mt1.google.com/vt/lyrs=m&x={X}&y={Y}&z={Z}').opts(width=800, height=600)

### Simple Dashboard

In [11]:
import panel as pn
pn.extension()

In [12]:
import param

In [17]:
unique_offices, unique_modes = df.Office.unique(), df.Mode.unique()
index_office_mode = pd.MultiIndex.from_product((unique_offices, unique_modes))

In [28]:
class ViewModebyOffice(param.Parameterized):

    office = param.Selector(objects=unique_offices)
    
    @param.depends('office') 
    def plot(self):
        df_modes_by_offices = df.groupby(['Office', 'Mode'])['Employee_ID'].count()
        df_modes_by_offices = pd.DataFrame(df_modes_by_offices.reindex(index_office_mode)).rename(columns={'Employee_ID':'Travelers'})
        df_modes_by_offices['Share'] = df_modes_by_offices.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
        df_modes_by_offices = df_modes_by_offices.reset_index().rename(columns={'level_0': 'Office', 'level_1': 'Mode'})
        df_select = df_modes_by_offices.loc[df_modes_by_offices.Office == self.office]
        elem = hv.Bars(df_select, kdims=['Mode'], vdims=['Share'])
        return elem.opts(width=600, height=400, show_grid=True, ylim=(0, 100), tools=['hover'])


In [29]:
viewer_modebyoffice = ViewModebyOffice()
panel_modebyoffice = pn.Row(viewer_modebyoffice.param, viewer_modebyoffice.plot)

In [30]:
import param

class Sine(param.Parameterized):
    amplitude = param.Number(default=1, bounds=(0, None), softbounds=(0,5))
    frequency = param.Number(default=2, bounds=(0, 10))
    n = param.Integer(default=200, bounds=(1, 200))

    def view(self):
        return sine(self.frequency, self.amplitude, self.n)
    
sine_obj = Sine()

In [36]:
panel_modebyoffice.servable()
# panel_modebyoffice.app('localhost:8888',port=5433)

### <span class="girk">Scratch</span>

In [4]:
class DriveParameter(param.Parameterized):
    average_drive_speed = param.Number(45,bounds=(0,80), precedence=-1)
    auto_operating_cost = param.Number(30, bounds=(0, 100), precedence=1)
    
    @param.depends('average_drive_speed', 'auto_operating_cost')
    def prob(self, x):
        return self.average_drive_speed * x + self.auto_operating_cost

In [5]:
drive_predictor = DriveParameter()

In [6]:
pn.Row(drive_predictor.param)

In [9]:
print(drive_predictor.prob(100))

4589.8


In [27]:
hv.help(hv.Bars)

Bars

Online example: http://holoviews.org/reference/elements/bokeh/Bars.html

[1;35m-------------
Style Options
-------------[0m

	alpha, bar_width, cmap, color, fill_alpha, fill_color, hover_alpha, hover_color, hover_fill_alpha, hover_fill_color, hover_line_alpha, hover_line_color, line_alpha, line_cap, line_color, line_dash, line_join, line_width, muted_alpha, muted_color, muted_fill_alpha, muted_fill_color, muted_line_alpha, muted_line_color, nonselection_alpha, nonselection_color, nonselection_fill_alpha, nonselection_fill_color, nonselection_line_alpha, nonselection_line_color, selection_alpha, selection_color, selection_fill_alpha, selection_fill_color, selection_line_alpha, selection_line_color, width

(Consult bokeh's documentation for more information.)

[1;35m------------
Plot Options
------------[0m

The plot options are the parameters of the plotting class:

[1;32mParameters of 'BarPlot'
[0m
[1;31mParameters changed from their default values are marked in red.[0m


In [11]:
number_lines = 10000
number_pts = 3

xs = [np.random.randint(0, 100+1, number_pts) for _ in range(number_lines)]
ys = [np.random.randint(0, 100+1, number_pts) for _ in range(number_lines)]


p = figure(plot_width=800, plot_height=800)

p.multi_line(xs, ys, line_width=1)

show(p)

In [13]:
number_pts = 300_000

xs = np.random.randint(0, 10000+1, number_pts)
ys = np.random.randint(0, 10000+1, number_pts)


p = figure(plot_width=800, plot_height=800)

p.circle(xs, ys, size=2, color="navy", alpha=0.5)

show(p)


In [8]:
np.random.randint(0, 100+1, 3)

array([31, 14, 15])