# Geo analytics AAA - Intentionally Blank

Census_tracts_borders = https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2010/5jrd-6zik
https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Census+Tracts

In [None]:
import json

import pandas as pd
import numpy as np

import h3
import geopandas as gpd
import shapely
from shapely.geometry import shape
import plotly.express as px


In [None]:
taxi_df = pd.read_parquet('data/prepared/taxi_data_prepared.gzip')
taxi_df['pickup_centroid_location'] = gpd.GeoSeries.from_wkt(taxi_df['pickup_centroid_location'])
taxi_df['dropoff_centroid_location'] = gpd.GeoSeries.from_wkt(taxi_df['dropoff_centroid_location'])
census_tract_borders = gpd.read_file('data/chicago_census_tract_borders.zip')

In [None]:
census_tract_borders = census_tract_borders.drop(census_tract_borders.columns.difference(['GEOID', 'geometry']), axis=1)

In [None]:
unique_census_tract_id = np.append(taxi_df['pickup_census_tract'].unique(), taxi_df['dropoff_census_tract'].unique()).astype('str')
census_tract_borders= census_tract_borders[census_tract_borders['GEOID'].isin(unique_census_tract_id)].reset_index(drop=True)
census_tract_borders

In [None]:
# Create docstring for the following function
def plotByFeatureStatic(dataframe, location='pickup', feature='all', aggregation='sum', missingCensusTract=False):
    """ Plot a feature of a dataframe on a map.

    Parameters
    ----------

    dataframe :  (pandas.DataFrame) 
        The dataframe to plot.
    location : (str) 
        The location column of the dataframe. Can be either 'pickup' or 'dropoff'. Default is 'pickup'.
    feature : (str) 
        The feature to aggregate. If 'all', all features are aggregated. Default is 'all'
    aggregation : (str)  
        The aggregation function to use. Can be either 'mean', 'median', 'sum', 'count', 'min', 'max'. Default is 'sum'.
    containMissingCensusTract : (bool)
        If True, census tracts with no data are included in the plot. Default is False.

    Returns
    ----------

    dataframe_grouped : (geopandas.GeoDataFrame) 
        The geodataframe grouped by the location column and the feature column. Contains always a geometry column and trip_count column.
    """
    dataframe_grouped = dataframe.copy()
    if feature == 'all':
        features = dataframe_grouped.columns.difference(['pickup_census_tract', 'dropoff_census_tract', 'pickup_centroid_location', 'dropoff_centroid_location', 'trip_start_timestamp', 'trip_end_timestamp']).tolist()
    else:
        features = [feature]
    
    if location == 'pickup':
        features.append('pickup_census_tract')
        features.append('pickup_centroid_location')
        dataframe_grouped = dataframe_grouped.drop(columns=dataframe_grouped.columns.difference(features))
    elif location == 'dropoff':
        features.append('dropoff_census_tract')
        features.append('dropoff_centroid_location')
        dataframe_grouped = dataframe_grouped.drop(columns=dataframe_grouped.columns.difference(features))
    else:
        raise ValueError("Location must be either 'pickup' or 'dropoff'.")
    
    dataframe_grouped['trip_count'] = dataframe_grouped[features[0]]
    dataframe_grouped = dataframe_grouped.groupby([location + '_census_tract', location + '_centroid_location']).agg(lambda column: column.agg('count') if column.name == 'trip_count' else column.agg(aggregation)).reset_index()
    dataframe_grouped = dataframe_grouped.rename(columns={location + '_census_tract': 'GEOID'})
    dataframe_grouped['GEOID'] = dataframe_grouped['GEOID'].astype('str')
    if missingCensusTract == True:
        dataframe_grouped = dataframe_grouped.merge(census_tract_borders, on='GEOID', how='right')
        dataframe_grouped = gpd.GeoDataFrame(dataframe_grouped)
        dataframe_grouped[location + '_centroid_location'] = dataframe_grouped['geometry'].to_crs('+proj=cea').centroid.to_crs('EPSG:4326')
        dataframe_grouped = dataframe_grouped.fillna(0)
    else:
        dataframe_grouped = dataframe_grouped.merge(census_tract_borders, on='GEOID', how='left')
        dataframe_grouped = gpd.GeoDataFrame(dataframe_grouped)
    return dataframe_grouped


In [None]:
geo_df_trip_seconds = plotByFeatureStatic(taxi_df, missingCensusTract = True)
geo_df_trip_seconds.explore(column='trip_count', tooltip=True, vmin=0, cmap='viridis')


In [None]:
geo_df_trip_seconds

In [None]:
# Create docstring for the following function
def plotH3_HexagonMap(dataframe, location='pickup', feature='all', aggregation='sum', missingCensusTract=False, hexRes = 9):
    """ Plot a feature of a dataframe on a map.

    Parameters
    ----------

    dataframe :  (pandas.DataFrame) 
        The dataframe to plot.
    location : (str) 
        The location column of the dataframe. Can be either 'pickup' or 'dropoff'. Default is 'pickup'.
    feature : (str) 
        The feature to aggregate. If 'all', all features are aggregated. Default is 'all'
    aggregation : (str)  
        The aggregation function to use. Can be either 'mean', 'median', 'sum', 'count', 'min', 'max'. Default is 'sum'.
    containMissingCensusTract : (bool)
        If True, census tracts with no data are included in the plot. Default is False.
    hexRes : (int)
        H3 hexagon resolution size. Default is 10.
    Returns
    ----------

    taxi_df_geo_grouped : (geopandas.GeoDataFrame) 
        The geodataframe grouped by the location column and the feature column. Contains always a geometry column and trip_count column.
    """
    taxi_df_geo = plotByFeatureStatic(dataframe, location= location,feature = feature, aggregation = aggregation, missingCensusTract = missingCensusTract)
    # geometry to h3 index
    taxi_df_geo['h3_index'] = taxi_df_geo.apply(lambda row: h3.geo_to_h3(row[location + '_centroid_location'].y, row[location + '_centroid_location'].x, hexRes), axis=1)

    geojson = []
    geometries = []
    indexes = []

    for geometry in taxi_df_geo['geometry']:
        geojson.append(shapely.to_geojson(geometry))

    for geometry in geojson:
        obj = json.loads(geometry)
        h3_indexes = h3.polyfill(obj, hexRes ,True)
        for index in h3_indexes:
            geometries.append(shape({"type": "Polygon",
                    "coordinates": [h3.h3_to_geo_boundary(index, geo_json=True)],
                    "properties": ""
                    }))
            indexes.append(index)   
    taxi_df_geo.drop(columns= ['geometry', 'pickup_centroid_location'], inplace = True)
    df_h3_polyfilled = pd.DataFrame({'h3_index': indexes, 'geometry': geometries})
    taxi_df_geo_grouped = taxi_df_geo.groupby('h3_index').agg(aggregation).reset_index()
    taxi_df_geo_grouped = taxi_df_geo_grouped.merge(df_h3_polyfilled, on='h3_index', how='right')
    taxi_df_geo_grouped = taxi_df_geo_grouped.fillna(0)
    taxi_df_geo_grouped = gpd.GeoDataFrame(taxi_df_geo_grouped, crs='EPSG:4326')
    return taxi_df_geo_grouped


In [None]:
plotH3_HexagonMap(taxi_df, location='pickup', feature='trip_seconds', aggregation='sum', missingCensusTract=True,hexRes= 8).explore(column='trip_seconds', tooltip=True, vmin=0, cmap='viridis')

In [None]:
# h3 index to geometry
taxi_df_geo['geometry'] = taxi_df_geo[(taxi_df_geo['h3_index'].notna())].apply(lambda row: shape({"type": "Polygon",
                                           "coordinates": [h3.h3_to_geo_boundary(row["h3_index"], geo_json=True)],
                                           "properties": ""
                                           }), axis=1)
taxi_df_geo_grouped = taxi_df_geo.groupby(['h3_index', 'geometry']).agg({'trip_seconds': 'sum', 'trip_miles': 'sum', 'trip_total': 'sum', 'trip_count': 'sum'}).reset_index()
taxi_df_geo_grouped=gpd.GeoDataFrame(taxi_df_geo_grouped)
taxi_df_geo_grouped.explore(column='trip_count', tooltip=True, vmin=0, cmap='viridis')

In [None]:
taxi_df_geo

In [None]:
taxi_df['hour'] = taxi_df['trip_start_timestamp'].dt.hour

In [None]:
#Group taxi_df by district and trip_start_timestamp
taxi_df_groupedDistricts = taxi_df.groupby(['hour','pickup_census_tract'],).agg({'trip_seconds': 'sum', 'trip_miles': 'sum', 'trip_total': 'sum', 'dropoff_census_tract': 'count'}).reset_index()
taxi_df_groupedDistricts.rename(columns={'dropoff_census_tract': 'trip_count'}, inplace=True)
taxi_df_groupedDistricts["pickup_census_tract"]=taxi_df_groupedDistricts["pickup_census_tract"].astype("int64").astype("str")
taxi_df_groupedDistricts['color'] = np.log10(taxi_df_groupedDistricts['trip_count'])
taxi_df_groupedDistricts

In [None]:
#Group taxi_df by district and trip_start_timestamp
taxi_df_groupedDistricts = taxi_df.groupby([ 'hour','pickup_census_tract'],).agg({'trip_seconds': 'sum', 'trip_miles': 'sum', 'trip_total': 'sum', 'dropoff_census_tract': 'count'}).reset_index()
taxi_df_groupedDistricts.rename(columns={'dropoff_census_tract': 'trip_count'}, inplace=True)
taxi_df_groupedDistricts["pickup_census_tract"]=taxi_df_groupedDistricts["pickup_census_tract"].astype("int64").astype("str")
taxi_df_groupedDistricts['color'] = np.log10(taxi_df_groupedDistricts['trip_count'])
taxi_df_groupedDistricts

In [None]:
viridis = px.colors.sequential.Viridis
fig = px.choropleth_mapbox(taxi_df_groupedDistricts,
                        geojson=census_tract_borders, 
                        locations=taxi_df_groupedDistricts.pickup_census_tract,
                        featureidkey="properties.GEOID",
                        color='trip_count',
                        animation_frame='hour',
                        mapbox_style='open-street-map', 
                        zoom=10,
                        color_continuous_scale=[
                            [0, viridis[0]],
                            [1./1000000, viridis[2]],
                            [1./10000, viridis[4]],
                            [1./100, viridis[7]],
                            [1., viridis[9]],
                        ],
                        opacity=0.5, 
                        range_color=[0, 300000],
                        center={'lat': 41.881832, 'lon': -87.623177},
                        labels={'trip_count':'trip_count'},
                        width=800,
                        height=800)
fig.show()