In [2]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
from pyproj import CRS
import pathlib
from pathlib import Path
from shapely import wkt
from tqdm import tqdm
# set the working directory
BASE_DIR = Path.cwd()
# define the exported folder path
# Check if folder exists
folder_path = pathlib.Path(BASE_DIR.parent.joinpath("Exported_Files","census_tract","agg_network"))
folder_path.mkdir(parents=True, exist_ok=True)
# print(BASE_DIR)

In [3]:
# Let's read the SF-Champ network and get the basic stats

In [4]:
# Road network comparision: YR 2010 vs YR 2016

In [5]:
dfrdnetwrk2010 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","SFChamp_2010_CT_PCS.geojson"), crs = "EPSG:3857")
dfrdnetwrk2010taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","SFChamp_2010_TAZ_PCS.geojson"), crs = "EPSG:3857")

dfrdnetwrk2016 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","SFChamp_2016_CT_PCS.geojson"), crs = "EPSG:3857")
dfrdnetwrk2016taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","SFChamp_2016_TAZ_PCS.geojson"), crs = "EPSG:3857")

print(f"Total number of road segments at CT level - YR2010:{len(dfrdnetwrk2010)}, YR2016:{len(dfrdnetwrk2016)} ")
print(f"Total number of road segments at TAZ level- YR2010:{len(dfrdnetwrk2010taz)}, YR2016:{len(dfrdnetwrk2016taz)} ")

Total number of road segments at CT level - YR2010:33395, YR2016:33462 
Total number of road segments at TAZ level- YR2010:42082, YR2016:42168 


In [6]:
print(dfrdnetwrk2010["FT"].value_counts(normalize=True))

11.0    0.606169
4.0     0.159425
7.0     0.089474
12.0    0.083875
15.0    0.021770
9.0     0.020153
13.0    0.006498
5.0     0.005959
2.0     0.004132
3.0     0.001797
1.0     0.000749
Name: FT, dtype: float64


In [7]:
# also get the distribution by Facility Type
# FT = 1:Fwy-Fwy Connector, 2:Freeway, 3:Expressway, 4:Collector, 5:Ramp, 6:Centroid Connector, 7:Major Arterial, 8:Not used,
# 9:Alley, 10:Metered Ramp, 11:Local, 12:Minor Arterial,13:Bike-Only!, 14:Not used, 15:Super Arterial,
# Segregate the road network into three categories
# 1. Category 1 = contains FT = [1, 2, 3, 5]
# 2. Category 2 = contains FT = [4, 7,12,13,15]
# 3. Category 3 = contains FT = [9,11]

# modify the dataframe to create a new column "CATEGORY" using "FacilityType"
def label_df_by_road_category(_df,fld):
    _df["category"]=0
    _df.loc[_df[fld].isin([1, 2, 3, 5,13]),'category']=1
    _df.loc[_df[fld].isin([4,7,12,15]),'category']=2
    _df.loc[_df[fld].isin([9,11 ]),'category']=3
    return _df

In [8]:
dfrdntwrk2010_cat =  label_df_by_road_category(dfrdnetwrk2010.copy(),"FT")
print("For Census Tract - YR:2010")
print(dfrdntwrk2010_cat["category"].value_counts(ascending=True))

dfrdntwrk2016_cat =  label_df_by_road_category(dfrdnetwrk2016.copy(),"FT")
print("For Census Tract - YR:2016")
print(dfrdntwrk2016_cat["category"].value_counts(ascending=True))

For Census Tract - YR:2010
1      639
2    11840
3    20916
Name: category, dtype: int64
For Census Tract - YR:2016
1      695
2    11851
3    20916
Name: category, dtype: int64


In [9]:
dfrdntwrk2016taz_cat =  label_df_by_road_category(dfrdnetwrk2010taz.copy(),"FT")
print("For Census Tract - YR:2010")
print(dfrdntwrk2016taz_cat["category"].value_counts(ascending=True))

dfrdntwrk2016taz_cat =  label_df_by_road_category(dfrdnetwrk2016taz.copy(),"FT")
print("For Census Tract - YR:2016")
print(dfrdntwrk2016taz_cat["category"].value_counts(ascending=True))

For Census Tract - YR:2010
1      858
2    15024
3    26200
Name: category, dtype: int64
For Census Tract - YR:2016
1      932
2    15036
3    26200
Name: category, dtype: int64


In [None]:
# Road Crashes
# Collision severity types:
# Fatal:1, Severe_Injury:2, Visible_Injury:3, Other Injury: 4, PDO: 5

In [14]:
dfroadcrash2010 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","SFCrash_2010_CT_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2010 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","NN_SFCrash_SFChamp_2010_CT_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2010 = dfnnroadcrash2010.loc[dfnnroadcrash2010["D2NL"]<10.00,:]
dfroadcrash2010taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","SFCrash_2010_TAZ_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2010taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","NN_SFCrash_2010_TAZ_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2010taz = dfnnroadcrash2010taz.loc[dfnnroadcrash2010taz["D2NL"]<10.00,:]

dfroadcrash2016 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","SFCrash_2016_CT_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2016 = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","NN_SFCrash_SFChamp_2016_CT_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2016 = dfnnroadcrash2016.loc[dfnnroadcrash2016["D2NL"]<10.00,:]
dfroadcrash2016taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","SFCrash_2016_TAZ_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2016taz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","NN_SFCrash_2016_TAZ_PCS.geojson"), crs = "EPSG:3857")
dfnnroadcrash2016taz = dfnnroadcrash2016taz.loc[dfnnroadcrash2016taz["D2NL"]<10.00,:]

print(f"Total Crashes - Census Tract: YR2010: {dfroadcrash2010['Total_Crash'].sum()} vs YR 2016: {dfroadcrash2016['Total_Crash'].sum()}")
print(f"Total Crashes (Nearest Neightbour) - CT: YR2010: {dfnnroadcrash2010['Total_Crash'].sum()} vs YR 2016: {dfnnroadcrash2016['Total_Crash'].sum()}")

print(f"Total Crashes - TAZ: YR2010: {dfroadcrash2010taz['Total_Crash'].sum()} vs YR 2016: {dfroadcrash2016taz['Total_Crash'].sum()}")
print(f"Total Crashes (Nearest Neightbour) - TAZ: YR2010: {dfnnroadcrash2010taz['Total_Crash'].sum()} vs YR 2016: {dfnnroadcrash2016taz['Total_Crash'].sum()}")

Total Crashes - Census Tract: YR2010: 6340 vs YR 2016: 7179
Total Crashes (Nearest Neightbour) - CT: YR2010: 5817 vs YR 2016: 6388
Total Crashes - TAZ: YR2010: 6288 vs YR 2016: 7020
Total Crashes (Nearest Neightbour) - TAZ: YR2010: 5780 vs YR 2016: 6296


In [36]:
# by crash types
# YR 2010
pd.concat([dfroadcrash2010['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (CT)")["YR 2010 (CT)"],
           dfnnroadcrash2010['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfroadcrash2010taz['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (TAZ)")["YR 2010 (TAZ)"],
           dfnnroadcrash2010taz['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN_TAZ)")["YR 2010 (NN_TAZ)"],
           ],axis=1)

Unnamed: 0,YR 2010 (CT),YR 2010 (NN CT),YR 2010 (TAZ),YR 2010 (NN_TAZ)
0,2680,2371,2662,2364
1,2305,2159,2284,2140
2,1132,1079,1121,1070
3,194,183,192,181
4,29,25,29,25


In [39]:
# Crash YR 2010 in percentage
pd.concat([dfroadcrash2010['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2010 (CT)")["YR 2010 (CT)"],
           dfnnroadcrash2010['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfroadcrash2010taz['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2010 (TAZ)")["YR 2010 (TAZ)"],
           dfnnroadcrash2010taz['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN_TAZ)")["YR 2010 (NN_TAZ)"],
           ],axis=1)

Unnamed: 0,YR 2010 (CT),YR 2010 (NN CT),YR 2010 (TAZ),YR 2010 (NN_TAZ)
0,0.422713,0.407598,0.423346,0.408997
1,0.363565,0.371154,0.363232,0.370242
2,0.178549,0.185491,0.178276,0.185121
3,0.030599,0.03146,0.030534,0.031315
4,0.004574,0.004298,0.004612,0.004325


In [37]:
# YR 2016
pd.concat([dfroadcrash2016['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (CT)")["YR 2010 (CT)"],
           dfnnroadcrash2016['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfroadcrash2016taz['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (TAZ)")["YR 2010 (TAZ)"],
           dfnnroadcrash2016taz['COLLISION_SEVERITY'].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN_TAZ)")["YR 2010 (NN_TAZ)"],
           ],axis=1)

Unnamed: 0,YR 2016 (CT),YR 2016 (NN CT),YR 2016 (TAZ),YR 2016 (NN_TAZ)
0,0.443655,0.422668,0.440883,0.423761
1,0.365232,0.377113,0.366952,0.376429
2,0.148628,0.154665,0.149003,0.153907
3,0.037749,0.040701,0.038462,0.041137
4,0.004736,0.004853,0.004701,0.004765


In [38]:
# Crash YR 2016 in percentage
pd.concat([dfroadcrash2016['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2016 (CT)")["YR 2016 (CT)"],
           dfnnroadcrash2016['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN CT)")["YR 2016 (NN CT)"],
           dfroadcrash2016taz['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2016 (TAZ)")["YR 2016 (TAZ)"],
           dfnnroadcrash2016taz['COLLISION_SEVERITY'].value_counts(normalize=True).rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN_TAZ)")["YR 2016 (NN_TAZ)"],
           ],axis=1)

Unnamed: 0,YR 2016 (CT),YR 2016 (NN CT),YR 2016 (TAZ),YR 2016 (NN_TAZ)
0,0.443655,0.422668,0.440883,0.423761
1,0.365232,0.377113,0.366952,0.376429
2,0.148628,0.154665,0.149003,0.153907
3,0.037749,0.040701,0.038462,0.041137
4,0.004736,0.004853,0.004701,0.004765


In [None]:
# Road Crashes in each category based on FT

In [46]:
dfnnroadcrash2010_cat = label_df_by_road_category(dfnnroadcrash2010,"FT")
dfnnroadcrash2010taz_cat = label_df_by_road_category(dfnnroadcrash2010taz,"FT")

dfnnroadcrash2016_cat = label_df_by_road_category(dfnnroadcrash2016,"FT")
dfnnroadcrash2016taz_cat = label_df_by_road_category(dfnnroadcrash2016taz,"FT")

In [47]:
pd.concat([dfnnroadcrash2010_cat["category"].value_counts().rename_axis("TotalCrashes").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfnnroadcrash2016_cat["category"].value_counts().rename_axis("TotalCrashes").reset_index(name="YR 2016 (NN CT)")["YR 2016 (NN CT)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN CT),YR 2016 (NN CT)
0,3751,3897
1,1264,1542
2,802,949


In [49]:
pd.concat([dfnnroadcrash2010_cat.loc[dfnnroadcrash2010_cat["category"]==1,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfnnroadcrash2016_cat.loc[dfnnroadcrash2016_cat["category"]==1,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN CT)")["YR 2016 (NN CT)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN CT),YR 2016 (NN CT)
0,506,566
1,202,293
2,80,74
3,11,12
4,3,4


In [51]:
pd.concat([dfnnroadcrash2010_cat.loc[dfnnroadcrash2010_cat["category"]==2,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfnnroadcrash2016_cat.loc[dfnnroadcrash2016_cat["category"]==2,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN CT)")["YR 2016 (NN CT)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN CT),YR 2016 (NN CT)
0,1553,1607
1,1268,1406
2,772,677
3,139,188
4,19,19


In [52]:
pd.concat([dfnnroadcrash2010_cat.loc[dfnnroadcrash2010_cat["category"]==3,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN CT)")["YR 2010 (NN CT)"],
           dfnnroadcrash2016_cat.loc[dfnnroadcrash2016_cat["category"]==3,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN CT)")["YR 2016 (NN CT)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN CT),YR 2016 (NN CT)
0,597,728
1,404,509
2,227,237
3,33,60
4,3,8


In [58]:
pd.concat([dfnnroadcrash2010taz_cat['category'].value_counts().rename_axis("TotalCrashes").reset_index(name="YR 2010 (NN_TAZ)")["YR 2010 (NN_TAZ)"],
           dfnnroadcrash2016taz_cat['category'].value_counts().rename_axis("TotalCrashes").reset_index(name="YR 2016 (NN_TAZ)")["YR 2016 (NN_TAZ)"],
           ],axis=1)

Unnamed: 0,YR 2010 (NN_TAZ),YR 2016 (NN_TAZ)
0,3732,3876
1,1260,1538
2,788,882


In [55]:
pd.concat([dfnnroadcrash2010taz_cat.loc[dfnnroadcrash2010taz_cat["category"]==1,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN TAZ)")["YR 2010 (NN TAZ)"],
           dfnnroadcrash2016taz_cat.loc[dfnnroadcrash2016taz_cat["category"]==1,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN TAZ)")["YR 2016 (NN TAZ)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN TAZ),YR 2016 (NN TAZ)
0,504,539
1,192,264
2,79,65
3,10,11
4,3,3


In [56]:
pd.concat([dfnnroadcrash2010taz_cat.loc[dfnnroadcrash2010taz_cat["category"]==2,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN TAZ)")["YR 2010 (NN TAZ)"],
           dfnnroadcrash2016taz_cat.loc[dfnnroadcrash2016taz_cat["category"]==2,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN TAZ)")["YR 2016 (NN TAZ)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN TAZ),YR 2016 (NN TAZ)
0,1546,1599
1,1265,1402
2,764,668
3,138,188
4,19,19


In [57]:
pd.concat([dfnnroadcrash2010taz_cat.loc[dfnnroadcrash2010taz_cat["category"]==3,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2010 (NN TAZ)")["YR 2010 (NN TAZ)"],
           dfnnroadcrash2016taz_cat.loc[dfnnroadcrash2016taz_cat["category"]==3,"COLLISION_SEVERITY"].value_counts().rename_axis("Collision_Severity").reset_index(name="YR 2016 (NN TAZ)")["YR 2016 (NN TAZ)"]
           ],axis=1)

Unnamed: 0,YR 2010 (NN TAZ),YR 2016 (NN TAZ)
0,595,727
1,402,507
2,227,236
3,33,60
4,3,8


In [None]:
# San Francisco Polygon Tract information

In [36]:
dfsfct = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","SF_CensusTract_PCS.geojson"), crs = "EPSG:3857")
print(f"Number of Census Tracts: {len(dfsfct['tractce10'])}")

Number of Census Tracts: 197


In [46]:
# San Francisco TAZ information
dfsftaz = gpd.read_file(BASE_DIR.parent.joinpath(folder_path,"Feb162022","TAZ","SF_TAZ_rectified_PCS.geojson"), crs = "EPSG:3857")
print(f"Number of TAZ: {len(dfsftaz['TAZ'])}")

Number of TAZ: 978
