In [1]:
import numpy as np # linear algebra
import pandas as pd

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
from subprocess import check_output
print(check_output(["ls","CleanData/data_clean.csv"]).decode("utf8"))

CleanData/data_clean.csv



In [4]:
#Load Chicago Taxi trip data
%time
# We'll load some important columns only
df = pd.read_csv('CleanData/data_clean.csv',
                 usecols=['trip_start_timestamp', 'trip_end_timestamp', 'fare', 'pickup_longitude','pickup_latitude',
                          'dropoff_longitude','dropoff_latitude', ])

CPU times: user 6 µs, sys: 4 µs, total: 10 µs
Wall time: 16.2 µs


In [5]:
df.head()

Unnamed: 0,trip_start_timestamp,trip_end_timestamp,fare,pickup_latitude,pickup_longitude,dropoff_latitude,dropoff_longitude
0,12/31/2016 1:30,12/31/2016 1:45,6.25,41.901207,-87.676356,41.901207,-87.676356
1,12/31/2016 22:15,12/31/2016 22:30,6.75,41.922761,-87.699155,41.922761,-87.699155
2,12/31/2016 19:45,12/31/2016 20:00,4.75,41.965812,-87.655879,41.965812,-87.655879
3,12/31/2016 23:15,12/31/2016 23:30,8.0,41.885281,-87.657233,41.893216,-87.637844
4,12/31/2016 0:15,12/31/2016 0:30,8.25,41.899602,-87.633308,41.899602,-87.633308


In [6]:
len(df['pickup_latitude'].unique())

174

In [7]:
df.shape

(28561, 7)

In [8]:
from bokeh.plotting import figure, output_notebook, show # bokeh plotting library
# We'll show the plots in the cells of this notebook
output_notebook()

In [9]:
print(np.min(df['pickup_longitude']), np.min(df['pickup_latitude']))
print(np.max(df['pickup_longitude']), np.max(df['pickup_latitude']))

print(np.min(df['dropoff_longitude']), np.min(df['dropoff_latitude']))
print(np.max(df['dropoff_longitude']), np.max(df['dropoff_latitude']))

-87.91362459999998 41.6738199
-87.5514282 42.00962288
-87.91362459999998 41.6738199
-87.5349029 42.00962288


In [10]:
Chicago = x_range, y_range = ((-87.92,-87.55), (41.68, 42.00))

In [11]:
plot_width = int(750)
plot_height = int(plot_width//1.2)

def base_plot(tools='pan, wheel_zoom, reset', plot_width=plot_width, plot_height=plot_height, **plot_args):
    p = figure(tools=tools, plot_width=plot_width, plot_height=plot_height,
              x_range=x_range, y_range=y_range, outline_line_color=None,
              min_border=0, min_border_left=0, min_border_right=0,
              min_border_top=0, min_border_bottom=0, **plot_args)
    
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    return p

options = dict(line_color=None, fill_color='blue', size=5)

In [12]:
%%time
# let's plot 10k sample pickup
samples = df.sample(n=10000)
p = base_plot()

p.circle(x=samples['pickup_longitude'], y=samples['pickup_latitude'], **options)
show(p)

CPU times: user 72.8 ms, sys: 6.79 ms, total: 79.6 ms
Wall time: 79 ms


In [13]:
%%time
# Again, let's plot 10k sample dropoff
samples = df.sample(n=10000)
p = base_plot()

p.circle(x=samples['dropoff_longitude'], y=samples['dropoff_latitude'], **options)
show(p)

CPU times: user 67.3 ms, sys: 4.33 ms, total: 71.6 ms
Wall time: 71.1 ms


In [14]:
import datashader as ds
from datashader import transfer_functions as tr_fns
from datashader.colors import Greys9
Greys9_r = list(reversed(Greys9))[:2]

In [15]:
%%time
cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height, x_range=x_range, y_range=y_range)
agg = cvs.points(df, 'dropoff_longitude', 'dropoff_latitude')
img = tr_fns.shade(agg, cmap=["white",'darkblue'], how='linear')

img

CPU times: user 307 ms, sys: 19.8 ms, total: 327 ms
Wall time: 336 ms


In [16]:
df_group = df.groupby(['dropoff_longitude','dropoff_latitude'])
# len(list(df_group['trip_start_timestamp'].count()))
# [a[1] for a in df_group.groups.keys()])
temp_df = pd.DataFrame({'dropoff_longitude':[a[0] for a in df_group.groups.keys()],'dropoff_latitude':[a[1] for a in df_group.groups.keys()],'count':list(df_group['trip_start_timestamp'].count())})
temp_df.head(10)

Unnamed: 0,dropoff_longitude,dropoff_latitude,count
0,-87.913625,41.980264,129
1,-87.90304,41.979071,199
2,-87.877305,41.982775,4
3,-87.818042,41.988967,1
4,-87.813781,42.007613,7
5,-87.80602,41.946511,14
6,-87.804532,41.985015,41
7,-87.798032,41.929297,4
8,-87.771167,41.97883,36
9,-87.769615,41.792592,89


In [17]:
temp_df['count'].head()

0    129
1    199
2      4
3      1
4      7
Name: count, dtype: int64

In [None]:
from datashader.bokeh_ext import InteractiveImage
from functools import partial
from datashader.utils import export_image
from datashader.colors import colormap_select, Greys9, Hot, viridis, inferno
from IPython.core.display import HTML, display

background = "black"
export = partial(export_image, export_path="export", background=background)
cm = partial(colormap_select, reverse=(background=="black"))

def create_image(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(temp_df, 'dropoff_longitude', 'dropoff_latitude',ds.count('count'))
    img = tr_fns.shade(agg, cmap=Hot, how='eq_hist')
    return tr_fns.dynspread(img, threshold=1, max_px=1000)

p = base_plot(background_fill_color=background)
export(create_image(*Chicago), "Chicago_HOT")
InteractiveImage(p, create_image)

In [None]:
from functools import partial

def create_image90(x_range, y_range, w=plot_width, h=plot_height):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    agg = cvs.points(df, 'dropoff_longitude', 'dropoff_latitude')
    img = tr_fns.shade(agg.where(agg > np.percentile(agg, 90)), cmap=inferno, how='eq_hist')
    return tr_fns.dynspread(img, threshold=0.3, max_px=4)
    
p = base_plot()
export(create_image(*Chicago), "Chicago_90th")
InteractiveImage(p, create_image90)

In [None]:
def merged_images(x_range, y_range, w=plot_width, h=plot_height, how='log'):
    cvs = ds.Canvas(plot_width=w, plot_height=h, x_range=x_range, y_range=y_range)
    picks = cvs.points(df, 'pickup_longitude', 'pickup_latitude')
    drops = cvs.points(df, 'dropoff_longitude', 'dropoff_latitude')
    more_drops = tr_fns.shade(drops.where(drops > picks), cmap=["darkblue", 'cornflowerblue'], how=how)
    more_picks = tr_fns.shade(drops.where(picks > drops), cmap=["darkred", 'orangered'], how=how)
    img = tr_fns.stack(more_picks, more_drops)
    return tr_fns.dynspread(img, threshold=0.3, max_px=4)

p = base_plot(background_fill_color=background)
export(merged_images(*Chicago), "Chicago_pickups_vs_drops")
InteractiveImage(p, merged_images)

In [None]:
len(x_range)

In [None]:
x_range