## Import statements and load data

In [39]:
import requests

import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import cartopy.crs as ccrs


from matplotlib import animation
from matplotlib import pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.collections import LineCollection
from cartopy.io.img_tiles import MapboxTiles
from polyline.codec import PolylineCodec

%matplotlib inline

In [3]:
INPUT_FILE = '../data/trip_data_1_full.csv'
DATETIME_FORMAT = '%Y-%m-%d %H:%M:%S'

# Number of time slices to split a day into
NFRAMES = 24

# Trips taking longer than this amount (in seconds) will be filtered off
TRIP_TIME_CUTOFF = 1500

# Bounding box for location-based filtering
LAT_MIN, LAT_MAX = 40.76041, 40.76507
LON_MIN, LON_MAX = -73.98164, -73.97653
RECT = [LON_MIN, LON_MAX, LAT_MIN, LAT_MAX]

def filter_df(df, rect, dir='pickup'):
    ''' Filters a dataframe object based on location
    '''
    df = df[(df['{}_longitude'.format(dir)] >= rect[0]) &
            (df['{}_longitude'.format(dir)] <= rect[1]) &
            (df['{}_latitude'.format(dir)]  >= rect[2]) &
            (df['{}_latitude'.format(dir)]  <= rect[3])]
    return df

# Load and filter data without reading entire csv at once
data_iterator = pd.read_csv(INPUT_FILE, iterator=True, chunksize=10000)
td = pd.concat((filter_df(df, RECT) for df in data_iterator), ignore_index=True)

print 'Data size: ' + str(len(td))

Data size: 188392


In [4]:
# Cut off long tail
td=td[td['trip_time_in_secs'] < TRIP_TIME_CUTOFF]
trip_time_full = np.asarray(td['trip_time_in_secs'], dtype=np.int)

# Plot trip time histogram
plt.hist(trip_time_full,bins=100)
plt.show()

<matplotlib.figure.Figure at 0x7f5123c42710>

In [5]:
# Convert to correct datetime
td['pickup_datetime'] = pd.to_datetime(td['pickup_datetime'])
td['dropoff_datetime'] = pd.to_datetime(td['dropoff_datetime'])

# Create datetime index
tdi = td.set_index(pd.DatetimeIndex(td['pickup_datetime']))

# Computes time intervals
starts = [dt.time(x / 60, x % 60) for x in range(0, 1440, 1440 / NFRAMES)]
ends = starts[1:] + [starts[0]]

# Plots trips against time
trips = [tdi.between_time(s, e) for s, e in zip(starts, ends)]
plt.plot(starts, map(len, trips))
plt.xticks(rotation='vertical')
plt.show()

<matplotlib.figure.Figure at 0x7f514c0f3950>

## Plot a map color-coding with trip times 

In [34]:
# Add a map using a mapbox template
mapbox_token = 'pk.eyJ1IjoiY2hlbnlhbmciLCJhIjoiNjAwYTI1MDExNDdmYzVkZWY0M2NkNjQyMDJkZjhkOTMifQ.G7P8sFypgRqFW2QNaEZocQ'
mapbox_mapid = 'chenyang.e39eae3e'
mapbox_tiles = MapboxTiles(mapbox_token, mapbox_mapid)
manhattan_rect = [-74.025, -73.92, 40.70, 40.85]

def draw_frame(frame_no=None):
    # Create new axes, using the tiles' projection for the underlying map.
    plt.clf()
    ax = plt.axes(projection=mapbox_tiles.crs)
    
    # Load data for current frame
    if frame_no is None:
        data = td
    else:
        data = trips[frame_no]
    dropoff_lat = np.asarray(data['dropoff_latitude'], dtype=np.float)
    dropoff_lon = np.asarray(data['dropoff_longitude'], dtype=np.float)
    trip_time = np.asarray(data['trip_time_in_secs'], dtype=np.int)
        
    # Add the tiles at zoom level 13.
    ax.add_image(mapbox_tiles, 13)
    
    # Specify a region of interest, in this case, Manhattan.
    ax.set_extent(manhattan_rect, ccrs.PlateCarree())

    # Plot the origin area
    ax.add_patch(Rectangle((LON_MIN, LAT_MIN), LON_MAX-LON_MIN, LAT_MAX-LAT_MIN, 
                 facecolor="blue", alpha=0.3, lw=0,
                 transform=ccrs.PlateCarree()))
    
    # Plot destinations, color coded by travel times
    sc = ax.scatter(dropoff_lon, dropoff_lat, 
                    marker='.', s=15, lw=0, 
                    c=trip_time, cmap=cm.RdYlGn_r, vmin=0, vmax=TRIP_TIME_CUTOFF,
                    transform=ccrs.PlateCarree())
        
    # Scale and labels
    cur_time = starts[frame_no] if frame_no != None else 'All'
    plt.title('Dropoff Locations at Time: {}'.format(cur_time))
    plt.colorbar(sc).set_label('Trip time in seconds')

In [None]:
# Render and save animation
plt.clf()
fig = plt.figure(figsize=(16, 16))
anim = animation.FuncAnimation(fig, draw_frame, frames=NFRAMES)
anim.save('animation.gif', writer='imagemagick', fps=4)

In [35]:
# Plot all trips
fig = plt.figure(figsize=(16, 16))
draw_frame()

<matplotlib.figure.Figure at 0x7f51235c50d0>

In [76]:
# Testing OSRM
OSRM_SERVER = 'http://127.0.0.1:5000'

def decode(geom):
    ''' Polyline library returns 6 decimal places, so we divide each coord
    by 10 here
    '''
    return [(x / 10, y / 10) for x, y in PolylineCodec().decode(geom)]

def get_route(row):
    plat, plon = row['pickup_latitude'], row['pickup_longitude']
    dlat, dlon = row['dropoff_latitude'], row['dropoff_longitude']
    
    url = '{}/viaroute?loc={},{}&loc={},{}'.format(OSRM_SERVER, plat, plon, dlat, dlon)
    res = requests.get(url).json()
    if 'route_geometry' not in res:
        print res
    return decode(res['route_geometry'])

def make_segments(x, y):
    '''Create list of line segments from x and y coordinates, in the correct
    format for LineCollection: an array of the form:
    numlines x (points per line) x 2 (x and y) array
    '''
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    return np.concatenate([points[:-1], points[1:]], axis=1)


plt.figure(figsize=(15, 15))
norm = plt.Normalize(0.0, 1.0)
ax = plt.gca()
ax = plt.axes(projection=mapbox_tiles.crs)

plt.xlim(manhattan_rect[2:])
plt.ylim(manhattan_rect[:2])

# Specify a region of interest, in this case, Manhattan.
#ax.set_extent(manhattan_rect, mapbox_tiles.crs)

# Add the tiles at zoom level 13.
#ax.add_image(mapbox_tiles, 13)

for _, row in list(trips[5].iterrows()):
    try:
        x, y = zip(*get_route(row))
    except:
        continue
    z = np.linspace(0.0, row['trip_time_in_secs']/1500.0, len(x))
    lc = LineCollection(make_segments(x, y), array=z, cmap=cm.RdYlGn_r, norm=norm)
    lc.set_transform(mapbox_tiles.crs)
    ax.add_collection(lc)

plt.show()


<matplotlib.figure.Figure at 0x7f51208c1d50>