In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
pd.set_option('max_column', 100)

In [2]:
# Plotly functions
def plot_with_colorbar(loc_count, size, color, text, zoom = 10.1):
    loc_count['name'] = [a+'<br>'+b+'<br>'+color.title()+': '+str(round(c, 2)) \
                     for a,b,c, in zip(loc_count['bike_label'],
                                       loc_count['ped_label'], 
                                       loc_count[color])]
    data = [
        go.Scattermapbox(
            lat=loc_count['latitude'],
            lon=loc_count['longitude'],
            mode='markers',
            marker=dict(
                size=loc_count[size]/3,
                color=loc_count[color],
                colorbar=dict(thickness=20)
            ),
            text=loc_count[text],
        )
    ]
    layout = go.Layout(
        autosize=True,
        hovermode='closest',
        mapbox=dict(
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat= (np.max(loc_count['latitude']) + np.min(loc_count['latitude']))/2,
                lon= (np.max(loc_count['longitude']) + np.min(loc_count['longitude']))/2
            ),
            pitch=0,
            zoom=zoom
        ),
    )
    fig = dict(data=data, layout=layout)
    return fig

In [3]:
# This is only a one-time set up!!!!!
# import plotly
# plotly.tools.set_credentials_file(username='zihaoxu', api_key='UUEKSL35OteiSutHDAY1')

# Define Access Token (user specific)
mapbox_access_token = 'pk.eyJ1IjoienhxbDIwMTUiLCJhIjoiY2pkdGh3MWVwMmJlOTJ3cXBya2UyZXVkayJ9.JzftwW6qNPESBlTldyzGSg'

In [4]:
mst_path = '/Users/zihaoxu/R_repos/BikeSGV/datasets/mst/'
fig_path = '/Users/zihaoxu/R_repos/BikeSGV/export/02_summary_stats_ped/'

In [5]:
ped = pd.read_csv(mst_path + 'clean_ped.csv')
ped.head(2)

Unnamed: 0,intervalid,location_id,street1,street2,street3,latitude,longitude,date,weekday,period_begin,period_end,interval_begin,ns,sn,ew,we,specialneeds,skate,child,count_method,raining,unusual,bikewaytype,bikewaytype_options,road_class,speed_limit,landuse,pk_dataset_id,city,createdatetime,total_count,female,cane,dog,on_street
0,17560.0,1025.0,Durfee Ave.,Santa Anita Ave.,Peck Rd.,34.035943,-118.039993,12-03-2016,Saturday,11:00 AM,1:00 AM,11:00 AM,0,0,4,7,0,1,2.0,manual_in_house,no,none,none,none,minorArterial,40.0,neighborhoodRetail,133.0,SEM,2017-07-17 15:08:00,11,0.0,0.0,0.0,0.0
1,17561.0,1025.0,Durfee Ave.,Santa Anita Ave.,Peck Rd.,34.035943,-118.039993,12-03-2016,Saturday,11:00 AM,1:00 AM,11:15 AM,0,0,0,0,0,0,0.0,manual_in_house,no,none,none,none,minorArterial,40.0,neighborhoodRetail,133.0,SEM,2017-07-17 15:08:00,0,0.0,0.0,0.0,0.0


In [6]:
bike = pd.read_csv(mst_path + 'clean_bike.csv')
bike.head(2)

Unnamed: 0,intervalid,location_id,street1,street2,street3,latitude,longitude,date,weekday,period_begin,period_end,interval_begin,ns,sn,ew,we,female,sidewalk,wrongway,count_method,count_bikesonsidewalks,raining,unusual,bikewaytype,bikewaytype_options,road_class,speed_limit,landuse,pk_dataset_id,city,createdatetime,total_count,no_helmet,child
0,19203.0,1025.0,Durfee Ave.,Santa Anita Ave.,Peck Rd.,34.035943,-118.039993,12-03-2016,Saturday,11:00 AM,1:00 AM,11:00 AM,0,0,0,1,0,0,1.0,manual_in_house,1,no,none,none,none,minorArterial,40.0,neighborhoodRetail,133.0,SEM,2017-07-17 15:08:00,1,1,0
1,19204.0,1025.0,Durfee Ave.,Santa Anita Ave.,Peck Rd.,34.035943,-118.039993,12-03-2016,Saturday,11:00 AM,1:00 AM,11:15 AM,0,0,1,0,0,0,0.0,manual_in_house,1,no,none,none,none,minorArterial,40.0,neighborhoodRetail,133.0,SEM,2017-07-17 15:08:00,1,0,0


In [7]:
ped_loc_count = ped.groupby('location_id').agg(['sum'])['total_count'].reset_index()
ped_loc_count['ped_label'] = ['Ped. Count: ' + str(s) for s in ped_loc_count['sum']]
ped_loc_count = ped_loc_count.rename(columns = {'sum':'ped_sum'})
print(ped_loc_count.shape)
ped_loc_count.head()

(106, 3)


Unnamed: 0,location_id,ped_sum,ped_label
0,1025.0,115,Ped. Count: 115
1,1030.0,133,Ped. Count: 133
2,1245.0,208,Ped. Count: 208
3,1368.0,262,Ped. Count: 262
4,1375.0,117,Ped. Count: 117


In [8]:
bike_loc_count = bike.groupby(['city', 'location_id', 'longitude', 'latitude']).agg(['sum'])['total_count'].reset_index()
bike_loc_count['bike_label'] = ['Bike Count: ' + str(s) for s in bike_loc_count['sum']]
bike_loc_count = bike_loc_count.rename(columns = {'sum':'bike_sum'})
print(bike_loc_count.shape)
bike_loc_count.head()

(112, 6)


Unnamed: 0,city,location_id,longitude,latitude,bike_sum,bike_label
0,Altadena,1551.0,-118.113507,34.169051,13,Bike Count: 13
1,Altadena,1554.0,-118.098393,34.169834,20,Bike Count: 20
2,Altadena,1569.0,-118.111557,34.169035,14,Bike Count: 14
3,Altadena,1593.0,-118.159145,34.189121,29,Bike Count: 29
4,Altadena,1622.0,-118.168809,34.185666,73,Bike Count: 73


In [9]:
interesting_cols = ['female', 'sidewalk', 'wrongway', 'no_helmet', 'child']
bike_info = bike.groupby('location_id').sum().reset_index()[['location_id', 'female', 'sidewalk', 'wrongway', 'no_helmet', 'child', 'total_count']]
for col in interesting_cols:
    bike_info[col] = bike_info[col]/bike_info['total_count']
bike_info = bike_info.fillna(0).drop(['total_count'], 1)
bike_info.head()

Unnamed: 0,location_id,female,sidewalk,wrongway,no_helmet,child
0,1025.0,0.0,0.409091,0.090909,0.681818,0.090909
1,1030.0,0.044776,0.716418,0.029851,0.761194,0.059701
2,1245.0,0.09434,0.490566,0.0,0.867925,0.188679
3,1368.0,0.117647,0.5,0.088235,0.617647,0.0
4,1373.0,0.0,0.0,0.0,0.0,0.0


In [10]:
loc_count = pd.merge(ped_loc_count, bike_loc_count, on = 'location_id', how = 'inner')
loc_count = pd.merge(loc_count, bike_info, on = 'location_id', how = 'inner')
loc_count['bike_ratio'] = loc_count['bike_sum'] / (loc_count['ped_sum'] + loc_count['bike_sum'])
print(loc_count.shape)
loc_count.head(2)

(111, 14)


Unnamed: 0,location_id,ped_sum,ped_label,city,longitude,latitude,bike_sum,bike_label,female,sidewalk,wrongway,no_helmet,child,bike_ratio
0,1025.0,115,Ped. Count: 115,SEM,-118.039993,34.035943,22,Bike Count: 22,0.0,0.409091,0.090909,0.681818,0.090909,0.160584
1,1030.0,133,Ped. Count: 133,SEM,-118.032415,34.043525,43,Bike Count: 43,0.044776,0.716418,0.029851,0.761194,0.059701,0.244318


In [11]:
# ??go.Scattermapbox

In [12]:
fig = plot_with_colorbar(loc_count, 
                         size = 'bike_sum', 
                         color = 'bike_ratio',
                         text = 'name')
py.iplot(fig, filename = 'Bike Counts Colored with Bike Ratio')

In [13]:
for col in interesting_cols:
    fig = plot_with_colorbar(loc_count, 
                             size = 'bike_sum', 
                             color = col,
                             text = 'name')
    py.iplot(fig, filename = 'Bike Counts Colored with ' + col.title() + ' Ratio')

## View output: https://plot.ly/~zihaoxu/