In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.linear_model import ElasticNet, ElasticNetCV, LassoCV, Lasso
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
from sklearn.preprocessing import StandardScaler, RobustScaler, scale
import seaborn as sns 

In [2]:
tracts_deserts_path = 'sf_desert_tracts.csv'
place_abbr = 'sf'
save_data_name = place_abbr+'_zcta_health.csv'
save_transf_data_name = place_abbr+'_zcta_health_logt.csv'
save_results_name = place_abbr+'_zcta_lasso_results.csv'

# Aggregation/Cleaning

In [3]:
tracts_deserts = pd.read_csv(tracts_deserts_path, dtype={'GEOID':'str'})

In [4]:
tracts_deserts

Unnamed: 0,food_closest_travel_times,physical_closest_dist,transport_closest_dist,education_closest_travel_times,worship_closest_travel_times,GEOID
0,55.082780,0.024852,0.000259,0.000000,116.293950,06075010100
1,104.510900,0.106618,0.000181,109.609030,63.006050,06075010200
2,97.889350,0.000234,0.210090,16.621742,74.510390,06075010300
3,125.605340,0.046652,0.046652,66.053140,50.311584,06075010400
4,121.480840,0.000211,0.000211,87.040130,159.234540,06075010500
...,...,...,...,...,...,...
192,84.441340,0.285294,0.015445,133.434310,55.752556,06075980401
193,250.735890,0.000360,0.000360,54.229710,145.571400,06075980501
194,280.894650,0.279868,0.372384,159.898420,96.200000,06075980600
195,62.344883,0.118217,0.155635,99.950930,99.950930,06075980900


In [5]:
zip_to_tract = pd.read_csv("zcta_to_tract10.csv", dtype=str)
zip_to_tract = zip_to_tract[['GEOID', 'ZCTA5']]

In [6]:
tracts_zcta_deserts = pd.merge(tracts_deserts, zip_to_tract, on='GEOID').groupby('ZCTA5', as_index=False).median()

In [7]:
zcta_health = pd.read_csv("PLACES__ZCTA_Data__GIS_Friendly_Format___2021_release.csv", dtype={'ZCTA5':'str', 'GEOID':'str'})
zcta_health = pd.merge(zcta_health, tracts_zcta_deserts, on='ZCTA5')
zcta_health.to_csv(save_data_name, index=False)

In [8]:
desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']

zcta_health_logt = zcta_health.copy()
zcta_health_logt[desert_measures] = zcta_health_logt[desert_measures].apply(lambda x: np.log(x+1))
zcta_health_logt.to_csv(save_transf_data_name, index=False)

# Analysis

In [9]:
name_mapping = {'access2': 'Health insurance access', 'arthritis': 'Arthritis prevalence', 'binge': 'Binge drinking prevalence',
               'bphigh': 'High blood pressure prevalence', 'bpmed': 'Medium blood pressure prevalence', 'cancer': 'Cancer prevalence',
               'casthma': 'Asthma prevalence', 'cervical': 'Cervical cancer screenings', 'chd': 'Coronary heart disease prevalence',
               'checkup': 'Routine checkups', 'cholscreen': 'Cholesterol screenings', 'colon_screen': 'Colon cancer screenings',
               'copd': 'COPD prevalence', 'corem': 'Core men\'s health', 'corew': 'Core women\'s health', 'csmoking': 'Smoking prevalence',
               'dental': 'Dental checkups', 'depression': 'Depression prevalence', 'diabetes': 'Diabetes prevalence', 'ghlth': 'General poor health prevalence',
               'highchol': 'High cholesterol prevalence', 'kidney': 'Chronic kidney disease', 'lpa': 'No physical activity', 'mammouse': 'Mammograms',
               'mhlth': 'Poor mental health prevalence', 'obesity': 'Obesity prevalence', 'phlth': 'Poor physical health', 'sleep': 'Poor sleep prevalence',
               'stroke': 'Stroke prevalence', 'teethlost': 'Teeth loss prevalence'}

results_nan = pd.DataFrame({'Health condition': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Food': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                       'Physical health': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Public transport': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                        'Education': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'Houses of worship': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')],
                       'RSquared': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')], 'MSE': [np.nan for c in zcta_health.columns if c.endswith('CrudePrev')]})

desert_measures = ['food_closest_travel_times', 'physical_closest_dist', 'transport_closest_dist', 'education_closest_travel_times', 'worship_closest_travel_times']

In [10]:
zcta_health

Unnamed: 0,ZCTA5,TotalPopulation,ACCESS2_CrudePrev,ACCESS2_Crude95CI,ARTHRITIS_CrudePrev,ARTHRITIS_Crude95CI,BINGE_CrudePrev,BINGE_Crude95CI,BPHIGH_CrudePrev,BPHIGH_Crude95CI,...,STROKE_CrudePrev,STROKE_Crude95CI,TEETHLOST_CrudePrev,TEETHLOST_Crude95CI,Geolocation,food_closest_travel_times,physical_closest_dist,transport_closest_dist,education_closest_travel_times,worship_closest_travel_times
0,94108,13768,12.7,"(11.7, 13.7)",20.1,"(19.7, 20.6)",15.8,"(15.6, 16.0)",30.0,"(29.4, 30.5)",...,3.7,"( 3.5, 4.0)",15.7,"(12.4, 18.9)",POINT (-122.4085753 37.79200744),47.9299,0.108287,0.000245,37.29907,40.42915
1,94110,69333,12.0,"(11.4, 12.8)",14.5,"(14.3, 14.6)",23.2,"(23.0, 23.4)",20.6,"(20.4, 20.9)",...,1.9,"( 1.8, 1.9)",6.8,"( 6.2, 7.5)",POINT (-122.41533 37.74996907),58.330529,0.178051,0.119997,38.645458,41.4608
2,94134,40798,16.0,"(15.3, 16.7)",18.2,"(18.0, 18.5)",14.8,"(14.7, 15.0)",28.5,"(28.2, 28.8)",...,3.1,"( 3.0, 3.2)",11.6,"(10.6, 12.6)",POINT (-122.4117932 37.71934056),194.395235,0.198447,0.104342,41.504357,74.5136
3,94122,56023,8.7,"( 8.1, 9.2)",16.1,"(15.9, 16.3)",18.5,"(18.3, 18.7)",23.2,"(22.9, 23.5)",...,2.1,"( 2.1, 2.2)",6.9,"( 5.9, 7.9)",POINT (-122.4851275 37.75879608),76.755385,0.278228,0.094322,44.899738,39.102986
4,94109,55984,9.5,"( 9.0, 10.0)",16.5,"(16.3, 16.7)",21.2,"(21.0, 21.4)",23.1,"(22.9, 23.3)",...,2.3,"( 2.2, 2.3)",6.9,"( 6.1, 7.9)",POINT (-122.4220985 37.7942243),44.989017,0.14662,0.06518,44.378593,48.890266
5,94115,33021,8.0,"( 7.5, 8.5)",17.4,"(17.2, 17.7)",21.4,"(21.2, 21.6)",23.8,"(23.5, 24.0)",...,2.4,"( 2.3, 2.5)",8.7,"( 7.3, 10.2)",POINT (-122.4372532 37.78596963),42.553627,0.156236,0.104649,33.29422,35.068043
6,94129,3183,6.5,"( 5.3, 8.5)",9.9,"( 9.4, 10.6)",30.0,"(28.7, 31.3)",13.4,"(12.7, 14.3)",...,0.9,"( 0.9, 1.1)",4.1,"( 2.5, 7.7)",POINT (-122.4667497 37.79737203),240.17874,0.059068,0.08922,177.91356,81.030556
7,94104,406,11.9,"( 9.2, 14.6)",21.4,"(20.4, 22.6)",14.8,"(14.3, 15.3)",31.9,"(30.6, 33.4)",...,3.5,"( 3.1, 4.0)",11.3,"( 6.8, 16.6)",POINT (-122.4021298 37.7914094),49.65654,0.170669,0.000219,37.29907,49.65654
8,94133,26237,12.5,"(11.4, 13.6)",20.6,"(20.2, 21.1)",16.3,"(16.0, 16.5)",30.1,"(29.6, 30.6)",...,3.6,"( 3.4, 3.8)",13.0,"(10.3, 15.7)",POINT (-122.4108828 37.80279141),62.58229,0.099149,0.046652,61.164684,50.311584
9,94114,31124,6.1,"( 5.8, 6.6)",15.5,"(15.2, 15.7)",25.2,"(24.9, 25.5)",20.6,"(20.3, 21.0)",...,1.7,"( 1.6, 1.7)",4.1,"( 3.6, 5.0)",POINT (-122.4320803 37.75873612),86.04483,0.209229,0.110116,48.119775,53.364368


In [11]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = np.log(zcta_health[desert_measures].to_numpy(copy=True)+1)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        data_centered = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.25, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        enet_cv = ElasticNetCV(l1_ratio = [.1, .5, .7, .9, .95, .99, 1], 
                                     cv = 10, normalize=True).fit(X_train,y_train)
        
        alpha = enet_cv.alpha_
        l1 = enet_cv.l1_ratio_
        alphas.append(alpha)
        l1_ratios.append(l1)
        
        regr = ElasticNet(alpha=alpha, l1_ratio = l1, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        predictions = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, predictions)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(X_test, y_test)
        results.iat[i, 7] = mse_test
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Mammograms,-0.2517,0.2352,0.0,0.0,0.339288,0.1148,4.237889
1,Obesity prevalence,-0.0,0.0,0.0,0.0,-0.0,-0.0104,4.153225
2,Poor sleep prevalence,0.0,0.0,0.0,0.0,-0.0,-0.0146,3.927596
3,Cancer prevalence,0.0,-0.4948,-0.2005,-0.0,0.0,-0.0193,1.206981
4,Core women's health,-0.0,-0.0,-0.0,-0.0,0.0,-0.0401,17.727882
5,Health insurance access,0.0,0.0,0.0,0.0,-0.0,-0.0854,11.644286
6,Smoking prevalence,0.0,0.0,0.0,0.0,-0.0,-0.113,4.403314
7,Poor mental health prevalence,-0.0,0.0134,0.036,0.034,-0.0,-0.1379,1.686471
8,Dental checkups,-0.0,-0.0,-0.0,-0.0,0.0,-0.1385,58.554743
9,Core men's health,-0.0,-0.0,-0.0,-0.0,0.0,-0.1453,18.663529


<Figure size 1440x1080 with 0 Axes>

In [12]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = zcta_health[desert_measures].to_numpy(copy=True)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        data_centered = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.25, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        enet_cv = ElasticNetCV(l1_ratio = [.1, .5, .7, .9, .95, .99, 1], 
                                     cv = 10, normalize=True).fit(X_train,y_train)
        
        alpha = enet_cv.alpha_
        l1 = enet_cv.l1_ratio_
        alphas.append(alpha)
        l1_ratios.append(l1)
        
        regr = ElasticNet(alpha=alpha, l1_ratio = l1, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        predictions = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, predictions)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(X_test, y_test)
        results.iat[i, 7] = mse_test
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Mammograms,-0.1983,0.1945,0.0557,0.0125,0.243115,0.0619,4.491207
1,Obesity prevalence,-0.0,0.0,0.0,0.0,-0.0,-0.0104,4.153225
2,Poor sleep prevalence,0.0,0.0,0.0,0.0,-0.0,-0.0146,3.927596
3,Cancer prevalence,0.1516,-0.2951,-0.267,-0.1762,0.016029,-0.0358,1.22649
4,Core women's health,-0.0,0.0,-0.0,-0.0,0.0,-0.0401,17.727882
5,Health insurance access,0.0,0.0,0.0,0.0,-0.0,-0.0854,11.644286
6,Poor mental health prevalence,-0.0,0.0,0.0,0.1004,-0.0,-0.0951,1.622925
7,Smoking prevalence,0.0,0.0,0.0,0.0,-0.0,-0.113,4.403314
8,Dental checkups,-0.0,-0.0,-0.0,-0.0,0.0,-0.1385,58.554743
9,Arthritis prevalence,0.2425,-1.2302,-0.4712,-0.0,-0.0,-0.1436,14.954678


<Figure size 1440x1080 with 0 Axes>

In [13]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = zcta_health[desert_measures].to_numpy(copy=True)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        predictions = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, predictions)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(X_test, y_test)
        results.iat[i, 7] = mse_test

        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4})
results_round.sort_values(by='RSquared', ascending=False).reset_index(drop=True)

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Mammograms,-0.0641,0.4299,0.0,0.0,0.4035,0.2553,3.587825
1,Medium blood pressure prevalence,0.5107,-4.2644,-2.0108,-0.0,0.0,0.0571,44.989972
2,Routine checkups,0.2758,-1.3134,-1.1271,-0.0,0.150669,0.0138,5.954354
3,Arthritis prevalence,0.0,-1.5566,-0.4056,-0.0,-0.0,-0.0071,13.489827
4,Cholesterol screenings,0.0,-0.0,-0.0,-0.0,0.0,-0.0161,2.127191
5,Asthma prevalence,-0.0,0.0,0.0,0.0357,-0.0,-0.0186,0.529647
6,Cancer prevalence,0.136,-0.4216,-0.5011,-0.0726,0.039362,-0.0304,1.16498
7,Poor mental health prevalence,-0.0,0.0,0.0,0.1135,-0.0,-0.0339,2.175985
8,Binge drinking prevalence,-0.3613,1.7371,0.0,-0.0,-0.0,-0.045,17.492355
9,Cervical cancer screenings,-0.0,0.0,-0.0,-0.0,0.0,-0.0758,24.411852


<Figure size 1440x1080 with 0 Axes>

In [14]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = np.log(zcta_health[desert_measures].to_numpy(copy=True)+1)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(X_train, y_train.reshape(-1, 1))
        
        y_test_pred = regr.predict(X_test)
        y_train_pred = regr.predict(X_train)
        mse_test = mean_squared_error(y_test, y_test_pred)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(X_test, y_test)
        results.iat[i, 7] = mse_test
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4}).sort_values(by='RSquared', ascending=False).reset_index(drop=True)
results_round

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Mammograms,-0.2085,0.3758,0.0,0.0,0.5345119,0.3224,3.264633
1,Cancer prevalence,0.0,-0.4521,-0.354,-0.0,0.0,0.0681,1.0536
2,Asthma prevalence,-0.0,0.0,0.0,0.0,-0.0,-0.0085,0.524444
3,Poor mental health prevalence,-0.0,0.0,0.0,0.0,-0.0,-0.0132,2.132469
4,Cholesterol screenings,0.0,-0.0,-0.0,-0.0,0.0,-0.0161,2.127191
5,Cervical cancer screenings,-0.0,0.0,-0.0,-0.0,0.0,-0.0758,24.411852
6,Obesity prevalence,-0.0,0.0,0.0,0.0,-0.0,-0.0793,6.883704
7,Poor sleep prevalence,0.0,0.0,0.0,0.0,-0.0,-0.0962,8.887191
8,Core women's health,-0.0,0.0,-0.0,-0.0,0.0,-0.114,19.453457
9,Binge drinking prevalence,-0.4096,1.4853,0.0,-0.0,-0.0,-0.1221,18.783829


<Figure size 1440x1080 with 0 Axes>

In [15]:
alphas = []
l1_ratios = []
i=0
results = results_nan.copy()

plt.figure(figsize=(20, 15))
plt.subplots_adjust(hspace=0.5)

for c in zcta_health.columns:
    if c.endswith('CrudePrev'):
        name = name_mapping[c[:-10].lower()]
        
        x = np.log(zcta_health[desert_measures].to_numpy(copy=True)+1)
        scaler = StandardScaler()
        xscale = scaler.fit_transform(x)
        y = zcta_health[c].to_numpy(copy=True)
        xscale = xscale[~np.isnan(y)]
        y = y[~np.isnan(y)]
        #create function to center data
        center_function = lambda x: x - x.mean()

        #apply function to original NumPy array
        y = center_function(y)
        
        X_train, X_test, y_train, y_test = train_test_split(xscale, 
                                                    y, 
                                                    test_size=0.3, 
                                                    random_state=42)
        
        #l1 ratio is from suggested values in ElasticNetCV documentation
        lasso_cv = LassoCV(cv = 5, normalize=True).fit(X_train,y_train)
        
        alpha = lasso_cv.alpha_
        alphas.append(alpha)
        
        regr = Lasso(alpha=alpha, normalize=True)  # Could try others, or other parameters?
        regr.fit(xscale, y.reshape(-1, 1))
        
        y_pred = regr.predict(xscale)
        mse = mean_squared_error(y, y_pred)
        
        results.iat[i, 0] = name
        results.iat[i, 1] = regr.coef_[1]
        results.iat[i, 2] = regr.coef_[3]
        results.iat[i, 3] = regr.coef_[4]
        results.iat[i, 4] = regr.coef_[0]
        results.iat[i, 5] = regr.coef_[2]
        results.iat[i, 6] = regr.score(xscale, y)
        results.iat[i, 7] = mse
            
        i += 1

results_round = results.round({'Food': 4, 'Physical health':4, 'Public transport':4, 'Education':4,
             'House of worship':4, 'RSquared':4}).sort_values(by='RSquared', ascending=False).reset_index(drop=True)
results_round.to_csv(save_results_name)
results_round

Unnamed: 0,Health condition,Food,Physical health,Public transport,Education,Houses of worship,RSquared,MSE
0,Medium blood pressure prevalence,0.0,-3.5583,-1.8331,-0.0,-0.703299,0.5774,24.641919
1,Routine checkups,0.0,-1.2722,-0.892,0.0,-0.0,0.5465,3.993514
2,Cancer prevalence,-0.0,-0.32,-0.3241,-0.0,-0.0,0.4823,0.695676
3,Arthritis prevalence,0.0,-0.7643,-0.8124,-0.0,-0.552997,0.4484,6.087268
4,High cholesterol prevalence,0.0,-1.3693,-0.8551,-0.0,-0.412408,0.4181,11.080476
5,Coronary heart disease prevalence,0.0,-0.1282,-0.2317,-0.0,-0.412195,0.4074,1.085401
6,Mammograms,-0.204,0.3611,0.0,0.0,0.735508,0.3833,2.02861
7,Stroke prevalence,0.0,-0.066,-0.1296,-0.0,-0.224264,0.3271,0.431311
8,Chronic kidney disease,0.0,-0.0664,-0.0832,-0.0,-0.131,0.3077,0.238622
9,High blood pressure prevalence,0.0,-0.5618,-0.6493,-0.0,-0.609425,0.2546,18.466767


<Figure size 1440x1080 with 0 Axes>