In [1]:
import numpy as np # linear algebra
import pandas as pd 

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
from subprocess import check_output
print(check_output(["ls","CleanData/data_clean.csv"]).decode("utf8"))

CleanData/data_clean.csv



In [5]:
#Load Chicago Taxi trip data
%time
# We'll load some important columns only
df = pd.read_csv('CleanData/data_clean.csv',
                 usecols=['Trip Start Timestamp', 'Trip End Timestamp', 'Fare', 'Pickup Latitude', 'Pickup Longitude',
                          'Dropoff Latitude', 'Dropoff Longitude'])

CPU times: user 9 µs, sys: 16 µs, total: 25 µs
Wall time: 16.2 µs


In [6]:
df.head()

Unnamed: 0,Trip Start Timestamp,Trip End Timestamp,Fare,Pickup Latitude,Pickup Longitude,Dropoff Latitude,Dropoff Longitude
0,12/31/2016 12:00:00 AM,12/31/2016 12:15:00 AM,14.5,41.899602,-87.633308,41.922686,-87.649489
1,12/31/2016 12:00:00 AM,12/31/2016 12:00:00 AM,7.5,41.922686,-87.649489,41.901207,-87.676356
2,01/01/2017 12:00:00 AM,01/01/2017 12:00:00 AM,3.25,41.901207,-87.676356,41.986712,-87.663416
3,12/31/2016 12:00:00 AM,12/31/2016 12:15:00 AM,9.25,41.975171,-87.687516,41.986712,-87.663416
4,12/31/2016 12:00:00 AM,12/31/2016 12:15:00 AM,8.75,41.965812,-87.655879,41.975171,-87.687516


In [7]:
df.shape

(816, 7)

In [8]:
from bokeh.plotting import figure, output_notebook, show # bokeh plotting library
# We'll show the plots in the cells of this notebook
output_notebook()

In [9]:
print(np.min(df['Pickup Latitude']), np.min(df['Pickup Longitude']))
print(np.max(df['Pickup Latitude']), np.max(df['Pickup Longitude']))

print(np.min(df['Dropoff Latitude']), np.min(df['Dropoff Longitude']))
print(np.max(df['Dropoff Latitude']), np.max(df['Dropoff Longitude']))

41.740205756 -87.913624596
42.009622881 -87.596183344
41.689729914000004 -87.80602000200001
42.009622881 -87.57278198700001


In [20]:
Chicago = x_range, y_range = ((41.68, 42.00), (-87.57, -87.91))

In [21]:
plot_width = int(750)
plot_height = int(plot_width//1.2)

def base_plot(tools='pan, wheel_zoom, reset', plot_width=plot_width, plot_height=plot_height, **plot_args):
    p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
              x_range=x_range, y_range=y_range, outline_line_color=None,
              min_border=0, min_border_left=0, min_border_right=0,
              min_border_top=0, min_border_bottom=0, **plot_args)
    
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    return p

options = dict(line_color=None, fill_color='blue', size=5)

In [22]:
%%time
# let's plot 10k sample pickup
samples = df.sample(n=10000)
p = base_plot()

p.circle(x=samples['Pickup Latitude'], y=samples['Pickup Longitude'], **options)
show(p)

ValueError: Cannot take a larger sample than population when 'replace=False'

In [13]:
import datashader as ds
from datashader import transfer_functions as tr_fns
from datashader.colors import Greys9
Greys9_r = list(reversed(Greys9))[:2]

In [None]:
%%time
cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height, x_range=x_range, y_range=y_range)
agg = cvs.points(df, 'Dropoff Longitude', 'Dropoff Latitude')
img = tr_fns.shade(agg, cmap=["white", 'darkblue'], how='linear')

img

In [None]:
from datashader.bokeh_ext import InteractiveImage
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from IPython.core.display import HTML, display

background = "black"
export = partial(export_image, export_path="export", background=background)
cm = partial(colormap_select, reverse=(background=="black"))

def create_image(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'Dropoff Longitude', 'Dropoff Latitude')
    img = tr_fns.shade(agg, cmap=Hot, how='eq_hist')
    return tr_fns.dynspread(img, threshold=0.5, max_px=4)

p = base_plot(background_fill_color=background)
export(create_image(*CHI), "CHI_HOT")
InteractiveImage(p, create_image)

In [None]:
from functools import partial

def create_image90(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'dropoff_longitude', 'dropoff_latitude', ds.count('passenger_count'))
    img = tr_fns.shade(agg.where(agg > np.percentile(agg, 90)), cmap=inferno, how='eq_hist')
    return tr_fns.dynspread(img, threshold=0.3, max_px=4)
    
p = base_plot()
export(create_image(*NYC), "NYCT_90th")
InteractiveImage(p, create_image90)

In [None]:
def merged_images(x_range, y_range, w=plot_width, h=plot_height, how='log'):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    picks = cvs.points(df, 'pickup_longitude', 'pickup_latitude', ds.count('passenger_count'))
    drops = cvs.points(df, 'dropoff_longitude', 'dropoff_latitude', ds.count('passenger_count'))
    more_drops = tr_fns.shade(drops.where(drops > picks), cmap=["darkblue", 'cornflowerblue'], how=how)
    more_picks = tr_fns.shade(drops.where(picks > drops), cmap=["darkred", 'orangered'], how=how)
    img = tr_fns.stack(more_picks, more_drops)
    return tr_fns.dynspread(img, threshold=0.3, max_px=4)

p = base_plot(background_fill_color=background)
export(merged_images(*NYC), "NYCT_pickups_vs_drops")
InteractiveImage(p, merged_images)