# shst_02_add_lion_characteristic

In this notebook, we will add street characteristics into sharedstreet segments and node point from the LION dataset. As a result of the notebook, csv files that contain street characteristics for the sharedstreets segment and node point will be created. 

## 1. Calculate street characteristic for Segment and Short Segment

In [3]:
import pandas as pd
import numpy as np
import geopandas as gpd

In [4]:
# make sure that you run the 'lion_01_lookup_table_between_LION_Shst.ipynb' notebook to get the 'shst_lion_lookuptable.csv'
# import the lookup table
df_shst_lion_lookup = pd.read_csv('../data/cleaned_data/shst_lion_lookuptable.csv')

In [5]:
# drop unnecessary columns
df_shst_lion_lookup = df_shst_lion_lookup[['shstGeometryId','pp_segment_id']]

In [6]:
def get_lion_segment_id(geometryid):
    '''
    This function returns list of the lion segments id from shst geometry id.
    
    ------
    Input :
        geometryid (string): Sharedstreet geometryid    
    
    -----
    Output :
        list_segment_id (list): list of lion segment id that matched to sharedstreet segment
    
    '''
    list_segment_id = df_shst_lion_lookup.loc[df_shst_lion_lookup['shstGeometryId']==geometryid]['pp_segment_id'].tolist()
    return list_segment_id

In [7]:
# make sure that you run the 'shst_00_processing_sharedstreets_geometry' notebook to get the 'shst_segment.shp'
# import the sharedstreet segments
gdf_segment = gpd.read_file('../data/sharedstreets_geometry/segment/shst_segment.shp')

In [8]:
# create an empty dataframe of sharedstreet segment
df_segment = pd.DataFrame(gdf_segment.loc[:,'id'])

In [9]:
# get list of lion segment ids from shstgeometry ID 
df_segment['lion_segment_list'] = df_segment['id'].apply(lambda x:get_lion_segment_id(x))

In [10]:
# explode the dataframe. 
df_segment_unstack = df_segment.explode('lion_segment_list')

In [11]:
# rename the column and fillna values as -1
df_segment_unstack = df_segment_unstack.rename(columns={'lion_segment_list':'lion_segment_id'})
df_segment_unstack['lion_segment_id']= df_segment_unstack['lion_segment_id'].fillna(-1)
df_segment_unstack['lion_segment_id'] = df_segment_unstack['lion_segment_id'].astype(int)

In [12]:
# make sure that you run the 'lion_00_data_wrangling_&_filtration.ipynb' notebook to get the 'lion_filtered.shp'
# import the lion dataset
gdf_lion_filtered = gpd.read_file('../data/cleaned_data/lion_filtered/lion_filtered.shp')

In [13]:
# Extract useful columns as a different geodataframe
gdf_lion_simplified = gdf_lion_filtered[['segment_id',
                                        'segment_ty',
                                        'rw_type',
                                        'traf_dir',
                                        'loc_status',
                                        'curve_flag',
                                        'radius',
                                        'street_wid',
                                        'bike_lane',
                                        'bike_trafd',
                                        'number_tra',
                                        'number_par',
                                        'number_tot',
                                        'posted_spe',
                                        'snow_prior']]

In [14]:
# correct column names
gdf_lion_simplified = gdf_lion_simplified.rename(columns={'segment_ty':'segment_type',
                                                          'rw_type': 'roadway_type',
                                                          'traf_dir':'traffic_direction',
                                                          'bike_trafd':'bike_traffic_direction',
                                                          'number_par':'number_park',
                                                          'number_tra':'number_travel',
                                                          'number_tot':'number_total',
                                                          'posted_spe': 'posted_speed',
                                                          'street_wid': 'street_width'})

In [15]:
# change a type of segment id from string to int
gdf_lion_simplified['segment_id'] = gdf_lion_simplified['segment_id'].astype(int)

In [16]:
# change lion geodataframe's index as segment id
gdf_lion_simplified = gdf_lion_simplified.set_index('segment_id')

In [17]:
# merge above geodataframe with lion geodataframe based on the segment id
df_segment_unstack  = df_segment_unstack.merge(gdf_lion_simplified, left_on='lion_segment_id', right_index=True, how='left')

In [18]:
# drop an unnecessary column
df_segment_unstack = df_segment_unstack.drop('lion_segment_id', axis=1)

In [19]:
# fill NaN values as -1
df_segment_unstack = df_segment_unstack.fillna(-1)

In [20]:
# drop unnecessary columns
df_segment_unstack = df_segment_unstack[['id','roadway_type', 'street_width','posted_speed', 'number_total', 'number_travel', 'number_park']]

In [21]:
# groupby columns were in the 511 datasets. we will pick the most frequent value
df_segment_lion = df_segment_unstack.groupby('id', as_index=False).agg(lambda x:x.value_counts().index[0])

In [22]:
# create a dictionary for transforming int numbers to street types
dict_roadway_type = {
-1:'Unknown',    
1:'Street',
2:'Highway',
3:'Bridge',
4: 'Tunnel',
5: 'Boardwalk',
6: 'Path/Trail',
7: 'Step Street',
8: 'Driveway',
9: 'Ramp',
10: 'Alley',
11: 'Unknown',
12: 'Non-Physical Street Segment',
13: 'U-Turn',
14: 'Ferry Route'
}

In [23]:
df_segment_lion.head()

Unnamed: 0,id,roadway_type,street_width,posted_speed,number_total,number_travel,number_park
0,0000b4f516894dfb309654e1a12bc7b1,1,30.0,25,3,1,2
1,00010fd3ee560483c21bb98e414741c7,1,34.0,25,2,2,-1
2,000115ffe0b626b4c1310827d7b28822,1,30.0,25,3,1,2
3,000182e6b337ab6b7c6053a7499de445,1,36.0,25,4,2,2
4,0001f6598a4739e7244d278eb317cb39,1,28.0,25,2,1,1


In [24]:
df_segment_lion.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 92795 entries, 0 to 92794
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             92795 non-null  object 
 1   roadway_type   92795 non-null  object 
 2   street_width   92795 non-null  float64
 3   posted_speed   92795 non-null  object 
 4   number_total   92795 non-null  object 
 5   number_travel  92795 non-null  object 
 6   number_park    92795 non-null  object 
dtypes: float64(1), object(6)
memory usage: 5.7+ MB


In [25]:
# transform the int number to string
df_segment_lion['roadway_type'] = df_segment_lion['roadway_type'].apply(lambda x: dict_roadway_type[int(x)])

In [27]:
df_segment_lion.to_csv('../data/cleaned_data/shared_street_with_attribute.csv', index=False)

## 2. Calculate street characteristic for Node

In [30]:
# make sure that you run the 'shst_00_processing_sharedstreets_geometry' notebook to get the 'shst_segment.shp'
# import the sharedstreet segments
gdf_segment = gpd.read_file('../data/sharedstreets_geometry/segment/shst_segment.shp')

In [31]:
gdf_segment.head()

Unnamed: 0,id,fromInters,toIntersec,forwardRef,backRefere,roadClass,length,geometry
0,db6792075ebbddc84479fda26174ca30,374b01a56e64379b8d7198962eaede90,2922a5babc5f921116a9fed4131a5bb1,48b7ab8e4cbafb2c1893cd682ded6704,a8475c8bd67f9e0ec8ce6a404aae41c1,Residential,299.421958,"LINESTRING (-73.91694 40.64668, -73.91625 40.6..."
1,42ccdc2b9ebc38f98c22bb0035045628,37db438d57f16f92e5ba91f1ad1793bb,374b01a56e64379b8d7198962eaede90,febaf06db79d8a16588d1c387a62fdb2,9db38906c3d8ae5df463e297be4e2b9b,Residential,256.577253,"LINESTRING (-73.91765 40.64623, -73.91732 40.6..."
2,84afb6627019b793945a7aab1feefe77,374b01a56e64379b8d7198962eaede90,5b6e4972c82ad4eb6d24c17b94b33b59,3f53ec240fc39c6b6810243b5b6fc830,fbbb71d35b794421e030d3ec9e1dcede,Residential,264.356932,"LINESTRING (-73.91694 40.64668, -73.91662 40.6..."
3,cce2402bd9841cb406b283d10a814940,5b6e4972c82ad4eb6d24c17b94b33b59,a3e9299f85dbecb22e829acc84c10e0e,3087b5f676af0e0026291f350b24859f,2b5b359caf490894be2d52ac51ebd0aa,Residential,256.824988,"LINESTRING (-73.91621 40.64715, -73.91550 40.6..."
4,9fed2268f9da5e9d263fdec0cb322aaa,5b6e4972c82ad4eb6d24c17b94b33b59,4701018d0dd721de27e208b19f0b20b6,4e2b3b2e438d9d7d504c1f0c76a5e4b0,564d3c4d11cb8340bfa4cc2b1ac2e6ff,Residential,378.440368,"LINESTRING (-73.91621 40.64715, -73.91611 40.6..."


In [29]:
df_segment_lion.head()

Unnamed: 0,id,roadway_type,street_width,posted_speed,number_total,number_travel,number_park
0,0000b4f516894dfb309654e1a12bc7b1,Street,30.0,25,3,1,2
1,00010fd3ee560483c21bb98e414741c7,Street,34.0,25,2,2,-1
2,000115ffe0b626b4c1310827d7b28822,Street,30.0,25,3,1,2
3,000182e6b337ab6b7c6053a7499de445,Street,36.0,25,4,2,2
4,0001f6598a4739e7244d278eb317cb39,Street,28.0,25,2,1,1


In [33]:
# extract 'from node id' and 'segment_id'
df_from_node = gdf_segment[['fromInters','id']]

# extract 'to node id' and 'segment_id'
df_to_node = gdf_segment[['toIntersec','id']]

In [34]:
# change the column names 
df_from_node = df_from_node.rename(columns={'fromInters':'node_id',
                                            'id':'segment_id'})  

df_to_node = df_to_node.rename(columns={'toIntersec':'node_id',
                                        'id':'segment_id'})

In [35]:
# merge (concatenate) the datasets
df_node = pd.concat([df_from_node,df_to_node], axis=0, ignore_index=True)

In [38]:
# drop duplicates
df_node = df_node.drop_duplicates()

In [42]:
# add lion characteristic from 'df_segment_lion' dataframe
df_node = df_node.merge(df_segment_lion, left_on = 'segment_id', right_on='id')

In [45]:
# drop unnecessary columns
df_node = df_node.drop(['segment_id','id'], axis=1)

In [47]:
df_node.head()

Unnamed: 0,node_id,roadway_type,street_width,posted_speed,number_total,number_travel,number_park
0,374b01a56e64379b8d7198962eaede90,Street,40.0,25,4,2,2
1,2922a5babc5f921116a9fed4131a5bb1,Street,40.0,25,4,2,2
2,37db438d57f16f92e5ba91f1ad1793bb,Street,44.0,25,4,2,2
3,374b01a56e64379b8d7198962eaede90,Street,44.0,25,4,2,2
4,374b01a56e64379b8d7198962eaede90,Street,44.0,25,4,2,2


In [48]:
# groupby columns based on node_id. we will pick the most frequent value
df_node_lion = df_node.groupby('node_id', as_index=False).agg(lambda x:x.value_counts().index[0])

In [50]:
df_node_lion.head()

Unnamed: 0,node_id,roadway_type,street_width,posted_speed,number_total,number_travel,number_park
0,0002db54439476ed731ba50bf7dcdb76,Street,30.0,25,3,2,-1
1,0005ae85e017c72c69cbdcd38f986f04,Street,26.0,25,3,2,1
2,0005f85368f314bfb0c82b84d0208b9f,Street,55.0,25,3,1,2
3,00069e55a187497605fe946926934e4b,Street,34.0,25,3,3,-1
4,0008525ffca74a2e2af21a2acc91458e,Street,50.0,20,3,1,2


In [51]:
df_node_lion.to_csv('../data/cleaned_data/shared_node_with_attribute.csv', index=False)