In [1]:
INSERT DOC HEADER

In [29]:
# package imports
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
from sklearn.preprocessing import MinMaxScaler

# specify filepaths
path_nyc_tracts = 'data/processed/nyc_tracts.gpkg'
path_model_features = 'data/processed/master_features.csv'
path_health_predictions = 'data/processed/predictions_health.csv'
path_mobility_predictions = 'data/processed/predictions_econ_mobility.csv'
path_output_nori = 'data/processed/tract_nori.csv'
output_gpkg_layer = 'nori'


# ensure cwd is project root for file paths to function properly
project_root = Path(os.getcwd())            # get current directory
while not (project_root / "data").exists(): # keep moving up until in parent
    project_root = project_root.parent
os.chdir(project_root)                      # switch to parent directory

In [30]:
# load model predictions
df_health_preds = pd.read_csv(path_health_predictions)
df_mobility_preds = pd.read_csv(path_mobility_predictions)

#load nyc tracts so we can add nori as a layer
gdf_nyc_tracts = gpd.read_file(path_nyc_tracts, layer='tracts')

In [31]:
# suffix col names since they are currently the same in both dfs
df_health_preds = df_health_preds.add_suffix('_health')
df_mobility_preds = df_mobility_preds.add_suffix('_mobility')

# get all predictions in one df
df_nori = df_mobility_preds.merge(df_health_preds,
                                 how='left',
                                 left_on='GEOID_mobility',
                                 right_on='GEOID_health')

# only one GEOID col needed: drop one, rename the other
df_nori.drop(columns=['GEOID_health'], inplace=True)
df_nori.rename(columns={'GEOID_mobility':'GEOID'}, inplace=True)

In [32]:
df_nori.head()

Unnamed: 0,GEOID,actual_mobility,predicted_mobility,residual_mobility,actual_health,predicted_health,residual_health
0,36085024402,0.595816,0.584794,0.011023,0.284,0.281582,0.002418
1,36085027705,0.597892,0.560722,0.03717,0.281,0.276005,0.004995
2,36085012806,0.525586,0.520427,0.005159,0.328,0.317195,0.010805
3,36047024400,0.509934,0.476076,0.033858,0.242,0.259259,-0.017259
4,36047023000,0.426163,0.412003,0.01416,0.29,0.295527,-0.005527


### 2. Calculate NORI

**A. Normalize Predictions**

Although both health and mobility predictions share the same units, we need to apply the MinMaxScaler to ensure they share the same spread/distribution. Otherwise, the variable with the wider spread of values will dominate the composite NORI index. This ensures that both variables are contributing equally.

In [33]:
scaler = MinMaxScaler()

df_nori['predicted_health_norm'] = scaler.fit_transform(df_nori[['predicted_health']])
df_nori['predicted_mobility_norm'] = scaler.fit_transform(df_nori[['predicted_mobility']])

Currently, our model predictions point in two different directions:

**RISK:** High health outcome value means worse health outcome (i.e. higher prevalence of obesity)

**ADVANTAGE:** High mobility outcome value means better long-run economic mobility (i.e. income rank of children from low-income families) 

We need both scores to point in the same conceptual direction so that when combined, they either measure a collective RISK, or a collective ADVANTAGE. Since NORI is a measure of prioritization for policy intervention, we would like a high NORI value to indicate high need. For this reason, we will invert our mobility score so it becomes "mobility risk".

In [34]:
df_nori['predicted_mobility_risk_norm'] = 1 - df_nori['predicted_mobility_norm']

In [35]:
df_nori.head()

Unnamed: 0,GEOID,actual_mobility,predicted_mobility,residual_mobility,actual_health,predicted_health,residual_health,predicted_health_norm,predicted_mobility_norm,predicted_mobility_risk_norm
0,36085024402,0.595816,0.584794,0.011023,0.284,0.281582,0.002418,0.3939,0.815935,0.184065
1,36085027705,0.597892,0.560722,0.03717,0.281,0.276005,0.004995,0.378351,0.760169,0.239831
2,36085012806,0.525586,0.520427,0.005159,0.328,0.317195,0.010805,0.493186,0.666817,0.333183
3,36047024400,0.509934,0.476076,0.033858,0.242,0.259259,-0.017259,0.331664,0.56407,0.43593
4,36047023000,0.426163,0.412003,0.01416,0.29,0.295527,-0.005527,0.432777,0.415632,0.584368


**B. Composite Index**

For now, we simply take the mean of the two values. This is a simple and straightforward approach, though later different composite methods will be used (e.g. weighted contributions)

In [36]:
df_nori['NORI'] = (df_nori['predicted_health_norm'] + df_nori['predicted_mobility_risk_norm']) / 2

**C. Simple Priority Tiers**

In [37]:
df_nori['priority'] = pd.qcut(
    df_nori['NORI'],
    q=4,
    labels=['Low', 'Moderate', 'High', 'Critical']
)

In [38]:
df_nori.head()

Unnamed: 0,GEOID,actual_mobility,predicted_mobility,residual_mobility,actual_health,predicted_health,residual_health,predicted_health_norm,predicted_mobility_norm,predicted_mobility_risk_norm,NORI,priority
0,36085024402,0.595816,0.584794,0.011023,0.284,0.281582,0.002418,0.3939,0.815935,0.184065,0.288983,Low
1,36085027705,0.597892,0.560722,0.03717,0.281,0.276005,0.004995,0.378351,0.760169,0.239831,0.309091,Moderate
2,36085012806,0.525586,0.520427,0.005159,0.328,0.317195,0.010805,0.493186,0.666817,0.333183,0.413184,High
3,36047024400,0.509934,0.476076,0.033858,0.242,0.259259,-0.017259,0.331664,0.56407,0.43593,0.383797,Moderate
4,36047023000,0.426163,0.412003,0.01416,0.29,0.295527,-0.005527,0.432777,0.415632,0.584368,0.508572,Critical


### 3. Output Data

In [39]:
# output NORI index as csv
df_nori.to_csv(path_output_nori, index=False)

In [41]:
# add NORI predictions to nyc tracts gpkg as new layer
df_nori = df_nori[['GEOID', 'predicted_health_norm', 'predicted_mobility_norm', 'predicted_mobility_risk_norm', 'NORI', 'priority']]
df_nori.GEOID = df_nori.GEOID.astype(int)
gdf_nyc_tracts.GEOID = gdf_nyc_tracts.GEOID.astype(int)
gdf_nyc_tracts = gdf_nyc_tracts[['GEOID', 'geometry']].merge(df_nori,
                                                            how='left',
                                                            on='GEOID')

gdf_nyc_tracts.to_file(path_nyc_tracts, layer=output_gpkg_layer)