# Geo analytics AAA time series - Intentionally Blank

In [1]:
import json

import pandas as pd
import numpy as np

import folium
import h3
import matplotlib.pyplot as plt
import geopandas as gpd
import branca.colormap as cm
import shapely
from shapely.geometry import shape
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'iframe_connected'

In [2]:
taxi_df = pd.read_parquet('data/prepared/taxi_data_prepared.gzip')
taxi_df['pickup_centroid_location'] = gpd.GeoSeries.from_wkt(taxi_df['pickup_centroid_location'])
taxi_df['dropoff_centroid_location'] = gpd.GeoSeries.from_wkt(taxi_df['dropoff_centroid_location'])
census_tract_borders = gpd.read_file('data/chicago_census_tract_borders.zip')

In [3]:
census_tract_borders = census_tract_borders.drop(census_tract_borders.columns.difference(['GEOID', 'geometry']), axis=1)

In [4]:
unique_census_tract_id = np.append(taxi_df['pickup_census_tract'].unique(), taxi_df['dropoff_census_tract'].unique()).astype('str')
census_tract_borders= census_tract_borders[census_tract_borders['GEOID'].isin(unique_census_tract_id)].reset_index(drop=True)
census_tract_borders

Unnamed: 0,GEOID,geometry
0,17031221000,"POLYGON ((-87.71699 41.92460, -87.71675 41.924..."
1,17031221100,"POLYGON ((-87.71374 41.92464, -87.71366 41.924..."
2,17031242800,"POLYGON ((-87.69670 41.89565, -87.69646 41.895..."
3,17031242900,"POLYGON ((-87.68684 41.89574, -87.68635 41.895..."
4,17031243000,"POLYGON ((-87.68195 41.89583, -87.68146 41.895..."
...,...,...
686,17031191000,"POLYGON ((-87.74630 41.92424, -87.74606 41.924..."
687,17031560400,"POLYGON ((-87.74322 41.80762, -87.74316 41.807..."
688,17031550100,"POLYGON ((-87.54958 41.65143, -87.54958 41.651..."
689,17031283200,"POLYGON ((-87.66172 41.87211, -87.66120 41.872..."


In [5]:
def plotByFeatureTime(dataframe, location='pickup', feature='all', aggregation='sum', time_interval='hour', start_time = True):
    """ Plot a feature of a dataframe on a map.

    Parameters
    ----------

    dataframe :  (pandas.DataFrame) 
        The dataframe to plot.
    location : (str) 
        The location column of the dataframe. Can be either 'pickup' or 'dropoff'. Default is 'pickup'.
    feature : (str) 
        The feature to aggregate. If 'all', all features are aggregated. Default is 'all'
    aggregation : (str)  
        The aggregation function to use. Can be either 'mean', 'median', 'sum', 'count', 'min', 'max'. Default is 'sum'.
    containMissingCensusTract : (bool)
        If True, census tracts with no data are included in the plot. Default is False.

    Returns
    ----------

    dataframe_grouped : (geopandas.GeoDataFrame) 
        The geodataframe grouped by the location column and the feature column. Contains always a geometry column and trip_count column.
    """
    dataframe_grouped = dataframe.copy()
    dataframe_grouped.dropna(inplace = True)
    if feature == 'all':
        features = dataframe_grouped.columns.difference(['pickup_census_tract', 'dropoff_census_tract', 'pickup_centroid_location', 'dropoff_centroid_location']).tolist()
    else:
        features = [feature]
    
    if location == 'pickup':
        features.append('pickup_census_tract')
        features.append('pickup_centroid_location')
        dataframe_grouped = dataframe_grouped.drop(columns=['dropoff_census_tract', 'dropoff_centroid_location'])
    elif location == 'dropoff':
        features.append('dropoff_census_tract')
        features.append('dropoff_centroid_location')
        dataframe_grouped = dataframe_grouped.drop(columns=['pickup_census_tract', 'pickup_centroid_location'])
    else:
        raise ValueError("Location must be either 'pickup' or 'dropoff'.")

    if start_time == True:
        dataframe_grouped.rename(columns={'trip_start_timestamp': 'timestamp'}, inplace=True)
        dataframe_grouped.drop(columns=['trip_end_timestamp'], inplace=True)
    else:
        dataframe_grouped.rename(columns={'trip_end_timestamp': 'timestamp'}, inplace=True)
        dataframe_grouped.drop(columns=['trip_start_timestamp'], inplace=True)
    features[0]='timestamp'
    if time_interval == 'hour':
        dataframe_grouped["hour"] = dataframe_grouped['timestamp'].dt.hour
        dataframe_grouped.rename(columns={'timestamp': 'trip_count'}, inplace=True)
        dataframe_grouped = dataframe_grouped.groupby([location + '_census_tract', location + '_centroid_location', 'hour']).agg(lambda column: column.agg('count') if column.name == 'trip_count' else column.agg(aggregation)).reset_index()
    elif time_interval == 'month':
        dataframe_grouped["month"] = dataframe_grouped['timestamp'].dt.month
        dataframe_grouped.rename(columns={'timestamp': 'trip_count'}, inplace=True)
        dataframe_grouped = dataframe_grouped.groupby([location + '_census_tract', location + '_centroid_location', 'month']).agg(lambda column: column.agg('count') if column.name == 'trip_count' else column.agg(aggregation)).reset_index()
    elif time_interval == 'weekday':
        dataframe_grouped["weekday"] = dataframe_grouped['timestamp'].dt.weekday
        dataframe_grouped.rename(columns={'timestamp': 'trip_count'}, inplace=True)
        dataframe_grouped = dataframe_grouped.groupby([location + '_census_tract', location + '_centroid_location', 'weekday']).agg(lambda column: column.agg('count') if column.name == 'trip_count' else column.agg(aggregation)).reset_index()
    dataframe_grouped["trip_count"] =dataframe_grouped["trip_count"].astype("int32")
    dataframe_grouped = dataframe_grouped.rename(columns={location + '_census_tract': 'GEOID'})
    dataframe_grouped['GEOID'] = dataframe_grouped['GEOID'].astype('str')
    dataframe_grouped = dataframe_grouped.merge(census_tract_borders, on='GEOID', how='left')
    dataframe_grouped = gpd.GeoDataFrame(dataframe_grouped)
    dataframe_grouped.sort_values(by=[time_interval], inplace=True)
    return dataframe_grouped

In [6]:
interactive_Plot_Pickup_hourly = plotByFeatureTime(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='hour', start_time = True)

In [7]:
interactive_Plot_Dropoff_hourly = plotByFeatureTime(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='hour', start_time = True)

In [None]:
viridis = px.colors.sequential.Viridis
fig = px.choropleth_mapbox(interactive_Plot_Pickup_hourly,
                        geojson=interactive_Plot_Pickup_hourly[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Pickup_hourly.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='hour',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each hour of day - Census tract borders', title_x=0.5)
fig.show()
fig = px.choropleth_mapbox(interactive_Plot_Dropoff_hourly,
                        geojson=interactive_Plot_Dropoff_hourly[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Dropoff_hourly.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='hour',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each hour of day - Census tract borders', title_x=0.5)
fig.show()

In [9]:
interactive_Plot_Pickup_monthly = plotByFeatureTime(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='month', start_time = True)

In [10]:
interactive_Plot_Dropoff_monthly = plotByFeatureTime(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='month', start_time = True)

In [11]:
viridis = px.colors.sequential.Viridis
fig = px.choropleth_mapbox(interactive_Plot_Pickup_monthly,
                        geojson=interactive_Plot_Pickup_monthly[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Pickup_monthly.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='month',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each month of year - Census tract borders', title_x=0.5)
fig.show()
fig = px.choropleth_mapbox(interactive_Plot_Dropoff_monthly,
                        geojson=interactive_Plot_Dropoff_monthly[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Dropoff_monthly.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='month',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each month of year - Census tract borders', title_x=0.5)
fig.show()

In [12]:
interactive_Plot_Pickup_weekday = plotByFeatureTime(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='weekday', start_time = True)

In [13]:
interactive_Plot_Dropoff_weekday = plotByFeatureTime(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='weekday', start_time = True)

In [14]:
viridis = px.colors.sequential.Viridis
fig = px.choropleth_mapbox(interactive_Plot_Pickup_weekday,
                        geojson=interactive_Plot_Pickup_weekday[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Pickup_weekday.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='weekday',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each weekday - Census tract borders', title_x=0.5)
fig.show()
fig = px.choropleth_mapbox(interactive_Plot_Pickup_weekday,
                        geojson=interactive_Plot_Pickup_weekday[['GEOID', 'geometry']].drop_duplicates(subset=['GEOID']), 
                        locations=interactive_Plot_Pickup_weekday.GEOID,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='weekday',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each weekday - Census tract borders', title_x=0.5)
fig.show()

In [15]:
# Create docstring for the following function
def plotH3_HexagonMapInteractive(dataframe, location='pickup', feature='all', aggregation='sum', hexRes = 10, time_interval='hour'):
    """ Plot a feature of a dataframe on a map.

    Parameters
    ----------

    dataframe :  (pandas.DataFrame) 
        The dataframe to plot.
    location : (str) 
        The location column of the dataframe. Can be either 'pickup' or 'dropoff'. Default is 'pickup'.
    feature : (str) 
        The feature to aggregate. If 'all', all features are aggregated. Default is 'all'
    aggregation : (str)  
        The aggregation function to use. Can be either 'mean', 'median', 'sum', 'count', 'min', 'max'. Default is 'sum'.
    containMissingCensusTract : (bool)
        If True, census tracts with no data are included in the plot. Default is False.
    hexRes : (int)
        H3 hexagon resolution size. Default is 10.
    Returns
    ----------

    taxi_df_geo_grouped : (geopandas.GeoDataFrame) 
        The geodataframe grouped by the location column and the feature column. Contains always a geometry column and trip_count column.
    """
    taxi_df_geo = plotByFeatureTime(dataframe, location= location,feature = feature, aggregation = aggregation, time_interval=time_interval)
    # geometry to h3 index
    taxi_df_geo['h3_index'] = taxi_df_geo.apply(lambda row: h3.geo_to_h3(row[location + '_centroid_location'].y, row[location + '_centroid_location'].x, hexRes), axis=1)

    geojson = []
    geometries = []
    indexes = []

    for geometry in taxi_df_geo['geometry']:
        geojson.append(shapely.to_geojson(geometry))

    for geometry in geojson:
        obj = json.loads(geometry)
        h3_indexes = h3.polyfill(obj, hexRes ,True)
        for index in h3_indexes:
            geometries.append(shape({"type": "Polygon",
                    "coordinates": [h3.h3_to_geo_boundary(index, geo_json=True)],
                    "properties": ""
                    }))
            indexes.append(index)   
    if location == "pickup":
        taxi_df_geo.drop(columns= ['geometry', 'pickup_centroid_location', 'GEOID'], inplace = True)
    else:
        taxi_df_geo.drop(columns= ['geometry', 'dropoff_centroid_location', 'GEOID'], inplace = True)
    df_h3_polyfilled = pd.DataFrame({'h3_index': indexes})
    taxi_df_geo_grouped = taxi_df_geo.groupby(['h3_index', time_interval]).agg(aggregation).reset_index()
    taxi_df_geo_grouped = taxi_df_geo_grouped.merge(df_h3_polyfilled, on='h3_index', how='outer')
    taxi_df_geo_grouped['geometry'] = taxi_df_geo_grouped.apply(lambda row: shape({"type": "Polygon",
                                           "coordinates": [h3.h3_to_geo_boundary(row["h3_index"], geo_json=True)],
                                           "properties": ""
                                           }), axis=1)
    taxi_df_geo_grouped = gpd.GeoDataFrame(taxi_df_geo_grouped, crs='EPSG:4326', geometry='geometry')
    taxi_df_geo_grouped.sort_values(by=[time_interval], inplace=True)
    return taxi_df_geo_grouped


In [16]:
interactive_Plot_Pickup_hexRes8_hour = plotH3_HexagonMapInteractive(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='hour', hexRes=8)

In [17]:
interactive_Plot_Dropoff_hexRes8_hour = plotH3_HexagonMapInteractive(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='hour', hexRes=8)

In [18]:
fig = px.choropleth_mapbox(interactive_Plot_Pickup_hexRes8_hour,
                        geojson=interactive_Plot_Pickup_hexRes8_hour[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Pickup_hexRes8_hour.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='hour',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each hour of day - Hexagon resolution 8 borders', title_x=0.5)
fig.show()

fig = px.choropleth_mapbox(interactive_Plot_Dropoff_hexRes8_hour,
                        geojson=interactive_Plot_Dropoff_hexRes8_hour[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Dropoff_hexRes8_hour.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='hour',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each hour of day - Hexagon resolution 8 borders', title_x=0.5)
fig.show()

In [19]:
interactive_Plot_Pickup_hexRes8_month = plotH3_HexagonMapInteractive(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='month', hexRes=8)

In [20]:
interactive_Plot_Dropoff_hexRes8_month = plotH3_HexagonMapInteractive(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='month', hexRes=8)

In [21]:
fig = px.choropleth_mapbox(interactive_Plot_Pickup_hexRes8_month,
                        geojson=interactive_Plot_Pickup_hexRes8_month[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Pickup_hexRes8_month.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='month',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each month of year - Hexagon resolution 8 borders', title_x=0.5)
fig.show()

fig = px.choropleth_mapbox(interactive_Plot_Dropoff_hexRes8_month,
                        geojson=interactive_Plot_Dropoff_hexRes8_month[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Dropoff_hexRes8_month.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='month',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each month of year - Hexagon resolution 8 borders', title_x=0.5)
fig.show()

In [22]:
interactive_Plot_Pickup_hexRes8_weekday = plotH3_HexagonMapInteractive(taxi_df, location='pickup', feature='all', aggregation='sum', time_interval='weekday', hexRes=8)

In [23]:
interactive_Plot_Dropoff_hexRes8_weekday = plotH3_HexagonMapInteractive(taxi_df, location='dropoff', feature='all', aggregation='sum', time_interval='weekday', hexRes=8)

In [24]:
fig = px.choropleth_mapbox(interactive_Plot_Pickup_hexRes8_weekday,
                        geojson=interactive_Plot_Pickup_hexRes8_weekday[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Pickup_hexRes8_weekday.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='weekday',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by pickup location for each weekday - Hexagon resolution 8 borders', title_x=0.5)
fig.show()

fig = px.choropleth_mapbox(interactive_Plot_Dropoff_hexRes8_weekday,
                        geojson=interactive_Plot_Dropoff_hexRes8_weekday[['h3_index', 'geometry']].drop_duplicates(subset=['h3_index']), 
                        locations=interactive_Plot_Dropoff_hexRes8_weekday.h3_index,
                        featureidkey="properties.h3_index",
                        color='trip_count',
                        animation_frame='weekday',
                        mapbox_style='open-street-map', 
                        zoom=9.5,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.84, 'lon': -87.723177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.update_layout(title_text='Trip count by dropoff location for each weekday - Hexagon resolution 8 borders', title_x=0.5)
fig.show()