In [None]:
# load all the packages?
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
#pd.set_option('notebook_repr_html', True)
#pd.set_option('display.max_rows', 50)
from collections import OrderedDict
from bokeh.plotting import figure, show, output_notebook, ColumnDataSource
from bokeh.models import HoverTool

In [None]:
# load all the data
#trips1 = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Trips_2014-Q1Q2.csv')#.astype(str)
trips2 = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Trips_2014-Q3-07.csv')#.astype(str)
#trips3 = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Trips_2014-Q3-0809.csv')#.astype(str)
#trips4 = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Trips_2014-Q4.csv')#.astype(str)
#trips5 = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Trips_2013.csv').astype(str)
#trips = pd.concat([trips1, trips2, trips3, trips4])
stations = pd.read_csv('~/TDI/divvy/useful_data/Divvy_Stations_2014-Q3Q4.csv').set_index('id')

# Bike Sharing System in Chicago

## 1. Explore ~2.5 million trips in 2014
## 2. Investigate the net flow of bikes for all the stations
## Q: What are the best locations for new bike stations to gain more business?
## Q: What is the most cost-effective way to redistribute bikes to balance the flow?

In [None]:
def convert_date(old_date):
    new_date = datetime.strptime(old_date, '%m/%d/%Y %H:%M')
    return new_date

trips2.starttime = trips2.starttime.map(convert_date)
trips2.stoptime = trips2.stoptime.map(convert_date)

In [None]:
def is_weekday(a_date):
    if a_date.weekday() in (0, 1, 2, 3, 4):
        return True
    else:
        return False

def is_weekend(a_date):
    if a_date.weekday() in (5, 6):
        return True
    else:
        return False

def is_am_rh(a_date):
    if 7 <= a_date.hour < 10:
        return True
    else:
        return False

def is_pm_rh(a_date):
    if 16 <= a_date.hour < 19:
        return True
    else:
        return False
    
def is_b2pm(a_date):
    if a_date.hour <= 14:
        return True
    else:
        return False

def is_a6pm(a_date):
    if a_date.hour > 18:
        return True
    else:
        return False

In [None]:
trips_weekday = trips2[trips2.starttime.map(is_weekday)]
trips_weekend = trips2[trips2.starttime.map(is_weekend)]

trips_weekday_am_rh = trips_weekday[trips_weekday.starttime.map(is_am_rh)]
trips_weekday_pm_rh = trips_weekday[trips_weekday.starttime.map(is_pm_rh)]

trips_weekend_b2pm = trips_weekend[trips_weekend.starttime.map(is_b2pm)]
trips_weekend_a6pm = trips_weekend[trips_weekend.starttime.map(is_a6pm)]

In [None]:
trips = trips_weekend_b2pm

In [None]:
flow_out = trips.groupby(['from_station_id']).size()
flow_in = trips.groupby(['to_station_id']).size()
net_flow = flow_in - flow_out
net_flow.index.name = 'station_id'

In [None]:
net_in = net_flow.ix[net_flow > 0]
net_out = net_flow.ix[net_flow < 0]
new_net_flow = pd.concat([net_in, net_out])

In [None]:
net_in_lon = stations.ix[net_in.index]['longitude']
net_in_lat = stations.ix[net_in.index]['latitude']
net_in_cap = stations.ix[net_in.index]['dpcapacity']
net_in_name = stations.ix[net_in.index]['name']

net_out_lon = stations.ix[net_out.index]['longitude']
net_out_lat = stations.ix[net_out.index]['latitude']
net_out_cap = stations.ix[net_out.index]['dpcapacity']
net_out_name = stations.ix[net_out.index]['name']

new_lon = pd.concat([net_in_lon, net_out_lon])
new_lat = pd.concat([net_in_lat, net_out_lat])
new_cap = pd.concat([net_in_cap, net_out_cap])
new_name = pd.concat([net_in_name, net_out_name])

## NEXT STEPS:

I. Superimpose a map of Chicago in order to label the landmarks near bike stations.

II. Segement the data using the following criteria and generate separate plots:
1. Weekdays vs Weekends
2. Mornings vs Afternoons
3. Members vs Non-members (approx. Locals vs Tourists)

III. Design a webpage to present the plots and the findings

IV. Figure out the most cost-effective way to redistribute bikes to balance the net flow

In [None]:
from __future__ import print_function

from bokeh.browserlib import view
from bokeh.document import Document
from bokeh.embed import file_html
#from bokeh.models.glyphs import Circle
from bokeh.models import (
    GMapPlot, Range1d, ColumnDataSource, LinearAxis,
    PanTool, WheelZoomTool, BoxSelectTool,
    BoxSelectionOverlay, GMapOptions,
    NumeralTickFormatter, PrintfTickFormatter)
from bokeh.resources import INLINE

x_range = Range1d()
y_range = Range1d()

# 41.8827° N, 87.6227° W for Millenium Park

# JSON style string taken from: https://snazzymaps.com/style/1/pale-dawn
map_options = GMapOptions(lat=41.8827, lng=-87.6227, map_type="roadmap", zoom=13, styles="""
[{"featureType":"administrative","elementType":"all","stylers":[{"visibility":"on"},{"lightness":33}]},{"featureType":"landscape","elementType":"all","stylers":[{"color":"#f2e5d4"}]},{"featureType":"poi.park","elementType":"geometry","stylers":[{"color":"#c5dac6"}]},{"featureType":"poi.park","elementType":"labels","stylers":[{"visibility":"on"},{"lightness":20}]},{"featureType":"road","elementType":"all","stylers":[{"lightness":20}]},{"featureType":"road.highway","elementType":"geometry","stylers":[{"color":"#c5c6c6"}]},{"featureType":"road.arterial","elementType":"geometry","stylers":[{"color":"#e4d7c6"}]},{"featureType":"road.local","elementType":"geometry","stylers":[{"color":"#fbfaf7"}]},{"featureType":"water","elementType":"all","stylers":[{"visibility":"on"},{"color":"#acbcc9"}]}]
""")

plot = GMapPlot(
    x_range=x_range, y_range=y_range,
    map_options=map_options,
    title = "Net Flow Weekend Before 2PM"
)

'''
source = ColumnDataSource(
    data=dict(
        lat=[30.2861, 30.2855, 30.2869],
        lon=[-97.7394, -97.7390, -97.7405],
        fill=['orange', 'blue', 'green']
    )
)
'''

source = ColumnDataSource(
    data = dict(
        lon = new_lon,
        lat = new_lat,
        radius = np.sqrt(abs(new_net_flow)),
        color = ['#FF0000']*len(net_in) + ['#0000FF']*len(net_out),
        name = new_name,
        capacity = new_cap,
        flow = new_net_flow,
    )
)

circle = Circle(x="lon", y="lat", size="radius", fill_color="color", line_color=None, fill_alpha=0.5)
plot.add_glyph(source, circle)
#plot.scatter('x', 'y', radius='radius', fill_color='color', fill_alpha=0.5, line_color=None, source=source)

pan = PanTool()
wheel_zoom = WheelZoomTool()
box_select = BoxSelectTool()

plot.add_tools(pan, wheel_zoom, box_select)

xaxis = LinearAxis(axis_label="lat", major_tick_in=0, formatter=NumeralTickFormatter(format="0.000"))
plot.add_layout(xaxis, 'below')

yaxis = LinearAxis(axis_label="lon", major_tick_in=0, formatter=PrintfTickFormatter(format="%.3f"))
plot.add_layout(yaxis, 'left')

overlay = BoxSelectionOverlay(tool=box_select)
plot.add_layout(overlay)

doc = Document()
doc.add(plot)

if __name__ == "__main__":
    filename = "weekend_b2pm.html"
    with open(filename, "w") as f:
        f.write(file_html(doc, INLINE, "Google Maps Example"))
    print("Wrote %s" % filename)
    view(filename)