In [118]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, FactorRange, Legend
import bokeh.palettes as palettes

### Part 2

##### Data prep

In [119]:
data = pd.read_csv('crime.csv')

In [120]:
data.Date = pd.to_datetime(data.Date)
data.Time = pd.to_datetime(data.Time)
data['Hour'] = data.Time.dt.hour
data['Minutes'] = data.Time.dt.minute
data = data[data.Date.dt.year < 2018]
focuscrimes = ['WEAPON LAWS', 'PROSTITUTION', 'DRIVING UNDER THE INFLUENCE', 'ROBBERY', 'BURGLARY', 'ASSAULT', 'DRUNKENNESS', 'DRUG/NARCOTIC', 'TRESPASS', 'LARCENY/THEFT', 'VANDALISM', 'VEHICLE THEFT', 'STOLEN PROPERTY', 'DISORDERLY CONDUCT']
data = data[data.Category.isin(focuscrimes)]

In [121]:
data.head()

Unnamed: 0,PdId,IncidntNum,Incident Code,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,...,Fix It Zones as of 2018-02-07 2 2,"CBD, BID and GBD Boundaries as of 2017 2 2","Areas of Vulnerability, 2016 2 2",Central Market/Tenderloin Boundary 2 2,Central Market/Tenderloin Boundary Polygon - Updated 2 2,HSOC Zones as of 2018-06-05 2 2,OWED Public Spaces 2 2,Neighborhoods 2,Hour,Minutes
0,4133422003074,41334220,3074,ROBBERY,"ROBBERY, BODILY FORCE",Monday,2004-11-22,2023-03-14 17:50:00,INGLESIDE,NONE,...,,,,,,,,,17,50
1,5118535807021,51185358,7021,VEHICLE THEFT,STOLEN AUTOMOBILE,Tuesday,2005-10-18,2023-03-14 20:00:00,PARK,NONE,...,,,,,,,,,20,0
2,4018830907021,40188309,7021,VEHICLE THEFT,STOLEN AUTOMOBILE,Sunday,2004-02-15,2023-03-14 02:00:00,SOUTHERN,NONE,...,,,,,,,,,2,0
4,10108108004134,101081080,4134,ASSAULT,BATTERY,Sunday,2010-11-21,2023-03-14 17:00:00,SOUTHERN,NONE,...,,,2.0,,,,,32.0,17,0
5,13027069804134,130270698,4134,ASSAULT,BATTERY,Tuesday,2013-04-02,2023-03-14 15:50:00,TARAVAL,NONE,...,,,1.0,,,,,44.0,15,50


In [122]:
hourlyData = data.groupby(['Hour', 'Category']).size().unstack().transform(lambda x: x/x.sum())
hourlyData

Category,ASSAULT,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,LARCENY/THEFT,PROSTITUTION,ROBBERY,STOLEN PROPERTY,TRESPASS,VANDALISM,VEHICLE THEFT,WEAPON LAWS
Hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,0.054414,0.038057,0.052551,0.126799,0.03168,0.084436,0.039986,0.081566,0.05444,0.047708,0.029442,0.056748,0.037633,0.053292
1,0.049486,0.02457,0.038626,0.116367,0.019269,0.079992,0.024724,0.063271,0.055919,0.03386,0.021037,0.039765,0.025698,0.039957
2,0.044484,0.026503,0.034662,0.115468,0.015616,0.067797,0.016162,0.046861,0.058577,0.028321,0.023339,0.037612,0.020367,0.034907
3,0.021801,0.02711,0.021956,0.051439,0.010614,0.026664,0.010096,0.032821,0.035345,0.021978,0.019217,0.025185,0.0126,0.021523
4,0.013582,0.024087,0.01606,0.019065,0.007735,0.013642,0.006551,0.028202,0.022933,0.019834,0.013918,0.016787,0.009739,0.0151
5,0.01075,0.021962,0.04015,0.010612,0.004048,0.004651,0.00659,0.034887,0.019769,0.015188,0.027836,0.013964,0.009755,0.008138
6,0.014655,0.02212,0.101545,0.01241,0.011345,0.008268,0.010399,0.031362,0.018927,0.017243,0.061399,0.016126,0.014452,0.011864
7,0.021593,0.034809,0.091889,0.008273,0.025569,0.016846,0.016397,0.016228,0.016942,0.02448,0.066592,0.021834,0.024888,0.020395
8,0.03325,0.053894,0.068408,0.008273,0.033605,0.015812,0.027631,0.009117,0.018964,0.031359,0.061881,0.031653,0.035573,0.026131
9,0.036983,0.047094,0.051738,0.009892,0.039604,0.017879,0.032791,0.009421,0.02282,0.033414,0.055083,0.029616,0.034875,0.033044


In [123]:
source = ColumnDataSource(hourlyData)
hourList = np.array(range(0, 24)).astype(str)
output_notebook()

In [124]:
# clear output
from bokeh.io import curdoc
curdoc().clear()

In [125]:
p = figure(plot_width=800, plot_height=400, x_range=FactorRange(factors=hourList), 
           x_axis_label='Hour of the day', y_axis_label='Relative frequency', title='Crimes per hour')

In [126]:
palette = palettes.viridis(14)

In [127]:
bar = {}
for indx,i in enumerate(focuscrimes):
    bar[i] = p.vbar(x='Hour',  top=i, source= source, width=0.8, legend_label=i, muted=True, muted_alpha=0.04, color=palette[indx])
p.legend.click_policy="mute"
p.legend.orientation = "vertical"
p.add_layout(p.legend[0], 'left')


In [128]:
show(p)