In [1]:
import pandas as pd
import geopandas as geopd
import areal_interpolation as areal
import numpy as np
import argparse

In [46]:
LODES_path = "/home/data/census/nyc/LODES/ny_od_main_JT01_2019.csv"
SVI_path = "/home/data/social_vulnerability_index/SVI2020_US.csv"
census_geo_path = "/home/data/census/nyc/geo/tracts.geojson"
polygon_path = "https://data.cityofnewyork.us/api/geospatial/r8nu-ymqj?method=export&format=GeoJSON"

In [15]:
def prep_SVI(SVI): 
    """Drops unneed columns and replaces missing values with median for SVI scores"""
    
    SVI = SVI[["FIPS", "SPL_THEMES", "SPL_THEME1", "SPL_THEME2", "SPL_THEME3", "SPL_THEME4"]]
    
    # Set missing values (coded -999) to median - could be improved by setting to regional medians but not a big deal because there aren't that many
    SVI.loc[SVI.SPL_THEMES < 0, "SPL_THEMES"] = np.median(SVI.SPL_THEMES)
    SVI.loc[SVI.SPL_THEME1 < 0, "SPL_THEME1"] = np.median(SVI.SPL_THEME1)
    SVI.loc[SVI.SPL_THEME2 < 0, "SPL_THEME2"] = np.median(SVI.SPL_THEME2)
    SVI.loc[SVI.SPL_THEME3 < 0, "SPL_THEME3"] = np.median(SVI.SPL_THEME3)
    SVI.loc[SVI.SPL_THEME4 < 0, "SPL_THEME4"] = np.median(SVI.SPL_THEME4)
    
    return SVI

In [4]:
def prep_geo(census_geo, polygons, crs): 
    """Standardizes crs between polygons, census tracts, and adds needed area value to census data"""
    
    census_geo = census_geo.to_crs(crs)
    polygons = polygons.to_crs(crs)
    
    census_geo = areal.calculate_census_areas(census_geo)
    
    return census_geo, polygons

In [5]:
def aggregate_LODES_at_tract(LODES):
    """Groups LODES data by tract rather than block so it can be merged with SVI data"""
    
    LODES["w_GEOID"] = LODES["w_geocode"].astype(str).str.slice(start = 0, stop = 11)
    LODES["h_GEOID"] = LODES["h_geocode"].astype(str).str.slice(start = 0, stop = 11)
    
    return LODES.groupby(["w_GEOID", "h_GEOID"]).agg(total_jobs = ("S000", "sum")).reset_index()

In [6]:
def polygons_to_home_tracts(polygons, tracts, LODES_tract, crs, polygon_id):
    """"""
    
    intersection_weights = areal.calculate_areal_weights(school_dis, census_geo, polygon_id)
    
    LODES_weights = LODES_tract.merge(intersection_weights, left_on = "w_GEOID", right_on = "GEOID")
    
    LODES_weights["jobs_interpolated"] = LODES_weights["intersection_weight"] * LODES_weights["total_jobs"]
    
    # Groups home tracts by polygon and sums up jobs in each tract, accounting for areal weights defined at the work tract level
    polygon_home_tracts = (LODES_weights
                           .groupby([polygon_id, "h_GEOID", "intersection_weight", "total_jobs"])
                           .agg(jobs_interpolated = ("jobs_interpolated", "mean"))
                           .reset_index())
    
    return polygon_home_tracts

In [39]:
def polygon_SVI_weights(polygon_home_tracts, SVI, polygon_id): 
    """Takes output of polygons_to_home_tracts() and joins to SVI, then calculates weights for aggregation"""
    
    polygons_SVI = polygon_home_tracts.merge(SVI, left_on = "h_GEOID", right_on = "FIPS")
    
    polygon_denoms = polygons_SVI.groupby(polygon_id).agg(job_weight_denom = ("jobs_interpolated", "sum")).reset_index()
    polygons_SVI = polygon_denoms.merge(polygons_SVI)
    
    # All weights add up to 1 within groups to make it easy to calculate weighted mean
    polygons_SVI["jobs_weight"] = polygons_SVI["jobs_interpolated"] / polygons_SVI["job_weight_denom"] 
    
    return polygons_SVI

In [40]:
def aggregate_SVI_to_polygons(polygons_SVI):
    """Uses weights to calculate weighted means for each SVI measure and output final dataset"""
    
    polygons_SVI["SPL_THEMES_weight"] = polygons_SVI["SPL_THEMES"] * polygons_SVI["jobs_weight"]
    polygons_SVI["SPL_THEME1_weight"] = polygons_SVI["SPL_THEME1"] * polygons_SVI["jobs_weight"]
    polygons_SVI["SPL_THEME2_weight"] = polygons_SVI["SPL_THEME2"] * polygons_SVI["jobs_weight"]
    polygons_SVI["SPL_THEME3_weight"] = polygons_SVI["SPL_THEME3"] * polygons_SVI["jobs_weight"]
    polygons_SVI["SPL_THEME4_weight"] = polygons_SVI["SPL_THEME4"] * polygons_SVI["jobs_weight"]
    
    out = polygons_SVI.groupby(polygon_id).agg(SVI_total = ("SPL_THEMES_weight", "sum"),
                                    SVI_SES = ("SPL_THEME1_weight", "sum"),
                                    SVI_household = ("SPL_THEME2_weight", "sum"),
                                    SVI_race = ("SPL_THEME3_weight", "sum"),
                                    SVI_housing_transport = ("SPL_THEME4_weight", "sum")).reset_index()
    
    return out

In [50]:
def main():
    parser = argparse.ArgumentParser("Process stops")
    parser.add_argument("--LODES_path", required=True)
    parser.add_argument("--SVI_path", required=True)
    parser.add_argument("--census_geo_path", required=True)
    parser.add_argument("--polygon_path", required=True)
    parser.add_argument("--crs", required=True)
    parser.add_argument("--output_path", required=True)
    
    opts = parser.parse_args()
    SVI_path = opts.SVI_path
    census_geo_path = opts.census_geo_path
    polygon_path = opts.polygon_path
    crs = opts.crs
    output_path = opts.output_path

    LODES = pd.read_csv(LODES_path)
    SVI = pd.read_csv(SVI_path, dtype = {"FIPS" : "str"})
    census_geo =  geopd.read_file(census_geo_path )
    polygons = geopd.read_file(polygon_path)
    crs = "EPSG:2263"
    
    SVI = prep_SVI(SVI)
    LODES = aggregate_LODES_at_tract(LODES)
    census_geo, polygons = prep_geo(census_geo, school_dis)

    polygon_home_tracts = polygons_to_home_tracts(polygons, tracts, LODES_tract, crs, polygon_id)
    polygons_SVI = polygon_SVI_weights(polygon_home_tracts, SVI, polygon_id)
    out = aggregate_SVI_to_polygons(polygons_SVI)
    
    out.to_csv(output_path)
    

In [49]:
if __name__ == "__main__":
    main()

In [19]:
# python3 -m analysis.src.features.jobs_vulnerability --LODES_path "/home/data/census/nyc/LODES/ny_od_main_JT01_2019.csv" --SVI_path "/home/data/social_vulnerability_index/SVI2020_US.csv" --census_geo_path "/home/data/census/nyc/geo/tracts.geojson" --polygon_path "/home/data/osm/nyc/walksheds/transit_walkshed.geojson" --crs "EPSG:2263" --output_path "test.csv"  

In [20]:
LODES_tract = aggregate_LODES_at_tract(LODES)

In [21]:
polygon_home_tracts = back_out_polygon_to_work_tracts(polygons = school_dis, 
                                tracts = census_geo,
                                LODES_tract = LODES_tract,
                                crs = "EPSG:2263", 
                                polygon_id = "school_dist")

In [33]:
polygon_id = "school_dist"


Unnamed: 0,school_dist,SVI_total,SVI_SES,SVI_household,SVI_race,SVI_housing_transport
0,1,9.098975,2.908573,2.444598,0.73119,3.01415
1,10,9.611482,3.035185,2.810698,0.773123,2.9922
2,11,9.651711,3.067408,2.829901,0.790071,2.964153
3,12,9.812808,3.15634,2.848144,0.799891,3.008917
4,13,8.900863,2.79799,2.508631,0.732967,2.861141
5,14,9.160417,2.984758,2.500163,0.693868,2.981973
6,15,8.95791,2.831287,2.515359,0.70531,2.905881
7,16,9.098867,2.897542,2.54179,0.778218,2.881296
8,17,9.024528,2.855954,2.525491,0.745769,2.897217
9,18,8.850462,2.766681,2.556201,0.75469,2.772483


In [160]:
LODES_tract[LODES_tract["w_GEOID"] == "36119005500"]

Unnamed: 0,w_GEOID,h_GEOID,total_jobs
2204895,36119005500,36001001400,1
2204896,36119005500,36001013200,1
2204897,36119005500,36001013602,1
2204898,36119005500,36001014201,2
2204899,36119005500,36001014202,1
...,...,...,...
2206204,36119005500,36119015000,3
2206205,36119005500,36119983000,1
2206206,36119005500,36121970600,1
2206207,36119005500,36121970700,2


In [176]:
LODES_weights.merge(SVI, left_on = "h_GEOID", right_on = "FIPS").groupby("school_dist").agg(mean = ("SPL_THEMES", "mean"))

Unnamed: 0_level_0,mean
school_dist,Unnamed: 1_level_1
1,8.76824
10,9.245302
11,9.111703
12,9.344145
13,8.622873
14,8.843714
15,8.769234
16,8.981353
17,8.842419
18,8.743035


In [153]:
average_SVI_to_polygons(LODES_weights, SVI, "school_dist")

Unnamed: 0,school_dist,numerator,denominator,SVI_estimate
0,1,220130.2,24188.28,9.100698
1,10,564683.9,58484.96,9.655199
2,11,522352.8,53936.0,9.684678
3,12,139994.4,14267.05,9.812427
4,13,1010214.0,113486.7,8.9016
5,14,609027.0,66625.42,9.141061
6,15,750753.5,83828.37,8.955841
7,16,97080.76,10666.42,9.101529
8,17,230203.1,25469.73,9.038302
9,18,256075.5,28882.91,8.865987
