The drive and walk/bike centroid connectors are created separately, because they use different rules. But in many cases, they can be connected to the same network street nodes.  When that happens, currently we keep the drive and walk/bike connector links as separate records, although they do have the same A/B nodes and the same shape. They can be differentiated by the drive_access, bike_access, walk_access fields. Now thinking more about this, I think it’s probably better to consolidate such connectors into one record.

This notebook was created to be run after Ranch, to consolidate the connectors. The result connectors will be loaded and written out to model network in notebooks such as CreateVersion01.ipynb.

In [1]:
import pandas as pd
import geopandas as gpd
import os

In [2]:
centroid_data_dir = r"D:\metcouncil_network_rebuild\data\interim"

consolidated_centroid_data_dir = r"D:\metcouncil_network_rebuild\data\processed\version_00\standard_networks"

In [3]:
taz_node_gdf = pd.read_pickle(os.path.join(centroid_data_dir, "centroid_node.pickle"))
taz_cc_link_gdf = pd.read_pickle(os.path.join(centroid_data_dir, "cc_link.pickle"))
taz_cc_shape_gdf = pd.read_pickle(os.path.join(centroid_data_dir, "cc_shape.pickle"))

In [4]:
print(taz_node_gdf.shape)
print(taz_cc_link_gdf.shape)
print(taz_cc_shape_gdf.shape)

(3030, 8)
(47642, 14)
(23819, 4)


In [5]:
# archive crs of links
crs = taz_cc_link_gdf.crs

# Process

## nodes

In [6]:
taz_node_gdf.head(3)

Unnamed: 0,OBJECTID,N,X,Y,taz_id,model_node,model_node_id,geometry
0,1,1,462454.755934,5027410.0,1,1,1,POINT (-93.47971 45.39920)
1,2,2,468423.045357,5028244.0,2,2,2,POINT (-93.40351 45.40700)
2,3,3,466398.768445,5026811.0,3,3,3,POINT (-93.42928 45.39401)


In [7]:
# check if centroid node records are unique
assert taz_node_gdf.shape[0] == taz_node_gdf.taz_id.nunique()

## links

In [8]:
taz_cc_link_gdf.head(3)

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,county,roadway,geometry
0,225259.0,1460.0,1,0,0,be68d5105155224803a916f1b7673a60,be68d5105155224803a916f1b7673a60,980049814.0,,e3809600ae9687b10115883a13a3bf4c,,,taz,"LINESTRING (-93.28633 44.93045, -93.28632 44.9..."
1,218458.0,1460.0,1,0,0,bd96aa983487595e203ce16ab2a453a5,bd96aa983487595e203ce16ab2a453a5,814491420.0,,8b73508201be5b4e066bf0d3fcac1048,,,taz,"LINESTRING (-93.28504 44.93045, -93.28632 44.9..."
2,225258.0,1460.0,1,0,0,a068b76ce96e7d6dc4b6a5856cda2d3a,a068b76ce96e7d6dc4b6a5856cda2d3a,980049813.0,,d02eb23520161ef8cb557f486f8380b9,,Hennepin,taz,"LINESTRING (-93.28632 44.92864, -93.28632 44.9..."


In [9]:
# count records by 'A', 'B' pairs, count records and sort from high to low
taz_cc_link_gdf.groupby(['A','B'])['drive_access'].count().sort_values(ascending=False)

A         B       
1874.0    125480.0    2
308049.0  1926.0      2
56800.0   54.0        2
1293.0    228928.0    2
          28735.0     2
                     ..
2166.0    141570.0    1
          142103.0    1
          142104.0    1
          352699.0    1
415324.0  2575.0      1
Name: drive_access, Length: 41812, dtype: int64

In [10]:
taz_cc_link_gdf[(taz_cc_link_gdf['A'] == 1874) & (taz_cc_link_gdf['B'] == 125480)]

Unnamed: 0,A,B,drive_access,walk_access,bike_access,shstGeometryId,id,u,v,fromIntersectionId,toIntersectionId,county,roadway,geometry
13201,1874.0,125480.0,1,0,0,e1c3ebad0eca043c4ddff68a64b782da,e1c3ebad0eca043c4ddff68a64b782da,,187878022.0,,acb78640dc99c5bca1fab8d42ad03534,Ramsey,taz,"LINESTRING (-93.17836 45.00820, -93.18188 45.0..."
39067,1874.0,125480.0,0,1,1,e1c3ebad0eca043c4ddff68a64b782da,e1c3ebad0eca043c4ddff68a64b782da,,187878022.0,,acb78640dc99c5bca1fab8d42ad03534,Ramsey,taz,"LINESTRING (-93.17836 45.00820, -93.18188 45.0..."


In [11]:
# groupby 'A', 'B' pairs, and aggregate 'drive_access' by max, keep the rest of the columns

taz_cc_link_gdf = taz_cc_link_gdf.groupby(['A','B']).agg(
    {
        'drive_access':'max', # consolidating drive and bike/walk
        'walk_access':'max', # consolidating drive and bike/walk
        'bike_access':'max', # consolidating drive and bike/walk
        'shstGeometryId':'first', 
        'id':'first',
        'u':'first',
        'v':'first',
        'fromIntersectionId':'first',
        'toIntersectionId':'first',
        'county':'first',
        'roadway':'first', 
        'geometry':'first'
    }
).reset_index()

# convert links to geodataframe
taz_cc_link_gdf = gpd.GeoDataFrame(taz_cc_link_gdf, geometry='geometry', crs=crs)

In [12]:
taz_cc_link_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 41812 entries, 0 to 41811
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   A                   41812 non-null  float64 
 1   B                   41812 non-null  float64 
 2   drive_access        41812 non-null  int64   
 3   walk_access         41812 non-null  int64   
 4   bike_access         41812 non-null  int64   
 5   shstGeometryId      41812 non-null  object  
 6   id                  41812 non-null  object  
 7   u                   20906 non-null  float64 
 8   v                   20906 non-null  float64 
 9   fromIntersectionId  20906 non-null  object  
 10  toIntersectionId    20906 non-null  object  
 11  county              41808 non-null  object  
 12  roadway             41812 non-null  object  
 13  geometry            41812 non-null  geometry
dtypes: float64(4), geometry(1), int64(3), object(6)
memory usage: 4.5+ MB


In [13]:
# check if centroid link records are unique
assert taz_cc_link_gdf.groupby(['A','B']).ngroups == taz_cc_link_gdf.shape[0]

## shapes

In [14]:
taz_cc_shape_gdf.head(3)

Unnamed: 0,id,geometry,fromIntersectionId,county
0,be68d5105155224803a916f1b7673a60,"LINESTRING (-93.28633 44.93045, -93.28632 44.9...",e3809600ae9687b10115883a13a3bf4c,
1,bd96aa983487595e203ce16ab2a453a5,"LINESTRING (-93.28504 44.93045, -93.28632 44.9...",8b73508201be5b4e066bf0d3fcac1048,
2,a068b76ce96e7d6dc4b6a5856cda2d3a,"LINESTRING (-93.28632 44.92864, -93.28632 44.9...",d02eb23520161ef8cb557f486f8380b9,Hennepin


In [15]:
taz_cc_shape_gdf.groupby(['id']).size().sort_values(ascending=False)

id
26da46413bcf0fd055980bd57e3539ae    2
71330b11196dc3877941d32bfea6b232    2
7132801f56755db5cc4048b5e246334b    2
712d1ba2bc628fef08c17e3b1e6b7fb5    2
3cb932714ade81e7a5f23191c45cbebf    2
                                   ..
5a5a562318dd1156e44df85b0585e0e5    1
5a5a09e7c8af04b447a180a92d97abe9    1
5a547a8ff784950df055f2a914a8318d    1
5a5444aabbec428d5cce2a7a74916630    1
fffbaa5695b4e8716fecbbb23a9fc720    1
Length: 20904, dtype: int64

In [16]:
# drop duplicate records in shapes

taz_cc_shape_gdf = taz_cc_shape_gdf.groupby(['id']).agg(
    {
        'fromIntersectionId':'first',
        'county':'first',
        'geometry':'first'
    }
).reset_index()

# convert shapes to geodataframe
taz_cc_shape_gdf = gpd.GeoDataFrame(taz_cc_shape_gdf, geometry='geometry', crs=crs)

In [17]:
taz_cc_shape_gdf[taz_cc_shape_gdf.id == 'be68d5105155224803a916f1b7673a60']

Unnamed: 0,id,fromIntersectionId,county,geometry
15619,be68d5105155224803a916f1b7673a60,e3809600ae9687b10115883a13a3bf4c,Hennepin,"LINESTRING (-93.28633 44.93045, -93.28632 44.9..."


In [18]:
assert taz_cc_shape_gdf.id.nunique() == taz_cc_shape_gdf.shape[0]

In [19]:
print(taz_node_gdf.shape)
print(taz_cc_link_gdf.shape)
print(taz_cc_shape_gdf.shape)

(3030, 8)
(41812, 14)
(20904, 4)


# Write out

In [20]:
# write out back to pickle

taz_node_gdf.to_pickle(os.path.join(consolidated_centroid_data_dir, "centroid_node.pickle"))
taz_cc_link_gdf.to_pickle(os.path.join(consolidated_centroid_data_dir, "cc_link.pickle"))
taz_cc_shape_gdf.to_pickle(os.path.join(consolidated_centroid_data_dir, "cc_shape.pickle"))

In [21]:
# write nodes and shapes to geojson, write links to json

taz_node_gdf.to_file(os.path.join(consolidated_centroid_data_dir, "centroid_node.geojson"), driver="GeoJSON")
with open(os.path.join(consolidated_centroid_data_dir, "cc_link.json"), 'w') as f:
    f.write(pd.DataFrame(taz_cc_link_gdf.drop(columns=['geometry'])).to_json(orient='records'))
taz_cc_link_gdf.to_file(os.path.join(consolidated_centroid_data_dir, "cc_link.geojson"), driver="GeoJSON")
taz_cc_shape_gdf.to_file(os.path.join(consolidated_centroid_data_dir, "cc_shape.geojson"), driver="GeoJSON")