In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn import metrics
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import GridSearchCV


sns.set(style="white")

import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('./df_foodaccess_allmortality_SVI.csv')

## <font color = darkorchid> Feature Selection </font>

In [3]:
# Create feature list with percentage metrics
features_proportional_share = ['lablack10share', 'lablack20share', 
                               'lahisp10share', 'lahisp20share', 
                               'lahunv10share', 'lahunv20share', 
                               'lasnap10share', 'lasnap20share',
                               'lawhite10share', 'lawhite20share',
                               'lalowi10share', 'lalowi20share', 
                               'lapop10share', 'lapop20share',
                               #'GroupQuartersFlag', 
                               #'PovertyRate',
                               #'LILATracts_1And10', 'LILATracts_1And20',
                               #'LILATracts_Vehicle', 'LILATracts_halfAnd10', 
                               #'LowIncomeTracts',
                               #'LA1and10', 'LA1and20', 'LAhalfand10',
                              ]

In [4]:
# Log transformation
for feature in features_proportional_share:
    df[f'{feature} Log10'] = np.log10(df[feature])
    
# Create new Log10 feature list
features_proportional_share_Log10 = [f'{x} Log10' for x in features_proportional_share]

# Convert -inf to 0for log_feature in features_proportional_share_Log10:
for log_feature in features_proportional_share_Log10:
    df.loc[df[log_feature] == -np.inf, log_feature] = np.log(0.0001)

In [5]:
# Set age adjusted rates as targets
target_list = ['Age Adjusted Rate Endocrine',
               'Age Adjusted Rate Obesity', 'Age Adjusted Rate Metabolic', 'Age Adjusted Rate Diabetes', 
               'Age Adjusted Rate Circulatory',
               'Age Adjusted Rate Pulmonary', 'Age Adjusted Rate Arteries', 'Age Adjusted Rate Hypertension',
               'Age Adjusted Rate External',
               'Age Adjusted Rate Assault', 'Age Adjusted Rate Intentional', 'Age Adjusted Rate Transport',
               'Age Adjusted Rate Mental',
               'Age Adjusted Rate Psychoactive',
               'Age Adjusted Rate Neoplasms', 
               'Age Adjusted Rate MalignantDigestive', 'Age Adjusted Rate MalignantBreast', 'Age Adjusted Rate MalignantFemale', 'Age Adjusted Rate MalignantMale', 'Age Adjusted Rate MalignantEndocrine',
               'Age Adjusted Rate All']

In [6]:
# Define function to consolidate r2 & MAE
def make_scores_df(r2_train, r2_test, mae_train, mae_test, model):
    r2 = pd.DataFrame(data = np.array([[np.mean(r2_train)],[np.mean(r2_test)]]), 
                 index = ['R^2 Train', 'R^2 Test'], columns = [model])

    mae = pd.DataFrame(data = np.array([[np.mean(mae_train)],[np.mean(mae_test)]]), 
                 index = ['MAE Train', 'MAE Test'], columns = [model])

    combined_scores = pd.merge(r2, mae, on = model, how = 'outer',
                               left_index = True, right_index = True)
    
    return combined_scores

## <font color = darkorchid> Linear Regression </font>

In [7]:
scores_lin_reg = pd.DataFrame()

for t in target_list:
    # Drop if target value is not available
    df_linear = df.dropna(subset = [t])
    
    # Bootstrap observations
    df_linear_sample = df_linear.sample(n = df_linear.shape[0], replace = True)

    df_linear = pd.concat([df_linear, df_linear_sample])

    # Set features to percentage metrics
    X = df_linear[features_proportional_share_Log10]
    y = df_linear[t]
    
    # Train / Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    # StandardScale features
    #ss = StandardScaler()
    #X_train = ss.fit_transform(X_train)
    #X_test = ss.transform(X_test)
    
    ### Linear Regression
    
    # Fit OLS Linear Regression
    lin_reg = linear_model.LinearRegression(n_jobs = -1)
    lin_reg.fit(X_train, y_train)
    
    # Evaluate baseline train scores (r2)
    lin_reg_cv_score = cross_val_score(lin_reg, X_train, y_train , cv=3, scoring= 'r2', n_jobs = -1)
    
    # Evaluate baseline test scores (r2)
    lin_reg_cv_score_test = cross_val_score(lin_reg, X_test, y_test , cv=3, scoring= 'r2', n_jobs = -1)
    
    # Evaluate baseline train scores (MAE)
    lin_reg_cv_score_mae = cross_val_score(lin_reg, X_train, y_train , 
                                           cv=3, scoring= 'neg_mean_absolute_error', n_jobs = -1)
    
    
    # Evaluate baseline test scores (MAE)
    lin_reg_cv_score_mae_test = cross_val_score(lin_reg, X_test, y_test , 
                                                cv=3, scoring= 'neg_mean_absolute_error', n_jobs = -1)
    
    # Create dataframe of scores
    
    if scores_lin_reg.shape[0] == 0:
        scores_lin_reg = make_scores_df(lin_reg_cv_score, lin_reg_cv_score_test, 
                                        lin_reg_cv_score_mae, lin_reg_cv_score_mae_test,
                                        f'Log Linear Regression - {t}')
    else:
        scores_lin_reg_temp = make_scores_df(lin_reg_cv_score, lin_reg_cv_score_test, 
                                             lin_reg_cv_score_mae, lin_reg_cv_score_mae_test,
                                             f'Log Linear Regression - {t}')
        
        scores_lin_reg = pd.concat([scores_lin_reg, scores_lin_reg_temp], axis = 1)

scores_lin_reg.T

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Linear Regression - Age Adjusted Rate Endocrine,-11.9703,-11.225318,0.2006464,0.143241
Log Linear Regression - Age Adjusted Rate Obesity,-19678710000.0,-0.861631,-2.52596e+22,-0.131764
Log Linear Regression - Age Adjusted Rate Metabolic,-2.453518,-2.548962,0.06107496,0.065478
Log Linear Regression - Age Adjusted Rate Diabetes,-9.222473,-8.81582,0.091938,0.213337
Log Linear Regression - Age Adjusted Rate Circulatory,-45.63179,-45.629989,0.1139644,0.169662
Log Linear Regression - Age Adjusted Rate Pulmonary,-3.350013,-2.237147,-0.08621794,0.031992
Log Linear Regression - Age Adjusted Rate Arteries,-5.973244,-5.475674,-0.1906051,0.040363
Log Linear Regression - Age Adjusted Rate Hypertension,-10.27322,-9.740101,0.05816369,0.176077
Log Linear Regression - Age Adjusted Rate External,-20.90251,-20.215868,0.1608626,0.151708
Log Linear Regression - Age Adjusted Rate Assault,-4.19366,-4.296603,-2.057082,-0.149568


## <font color = darkorchid> Ridge Regression </font>

In [8]:
scores_ridge = pd.DataFrame()

for t in target_list:
    # Drop if target value is not available
    df_ridge = df.dropna(subset = [t])
    
    # Bootstrap observations
    df_ridge_sample = df_ridge.sample(n = df_ridge.shape[0], replace = True)

    df_ridge = pd.concat([df_ridge, df_ridge_sample])

    # Set features to percentage metrics
    X = df_ridge[features_proportional_share_Log10]
    y = df_ridge[t]
    
    # Train / Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    # StandardScale features
    ss = StandardScaler()
    X_train = ss.fit_transform(X_train)
    X_test = ss.transform(X_test)
    
    ### Ridge Regression CV
    
    # Define log space for possible alphas
    ridge_alphas = np.logspace(0, 5, 250)

    # Instantiate and fit ridge regression
    ridge = linear_model.RidgeCV(alphas=ridge_alphas, cv=3)
    ridge.fit(X_train, y_train)
    
    # Evaluate baseline train scores (MAE)
    ridge_cv_score_mae = cross_val_score(ridge, X_train, y_train , cv=3, 
                                         scoring= 'neg_mean_absolute_error', n_jobs = -1, verbose = 1)
    
    # Evaluate baseline test scores (MAE)
    ridge_cv_score_mae_test = cross_val_score(ridge, X_test, y_test , cv=3, 
                                              scoring= 'neg_mean_absolute_error', n_jobs = -1, verbose = 1)
    
    # Create dataframe of scores
    
    if scores_ridge.shape[0] == 0:
        scores_ridge = make_scores_df(ridge.score(X_train, y_train), ridge.score(X_test, y_test), 
                                      np.mean(ridge_cv_score_mae), np.mean(ridge_cv_score_mae_test),
                                      f'Log Ridge - {t}')
    else:
        scores_ridge_temp = make_scores_df(ridge.score(X_train, y_train), ridge.score(X_test, y_test), 
                                           np.mean(ridge_cv_score_mae), np.mean(ridge_cv_score_mae_test),
                                           f'Log Ridge - {t}')
        
        scores_ridge = pd.concat([scores_ridge, scores_ridge_temp], axis = 1)

scores_ridge.T

[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.6s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.6s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.5s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.1s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   10.2s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.1s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.3s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.4s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    9.1s finished
[Paral

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Ridge - Age Adjusted Rate Endocrine,-11.323795,-11.33523,0.210922,0.196516
Log Ridge - Age Adjusted Rate Obesity,-0.744592,-0.787173,0.21289,0.187625
Log Ridge - Age Adjusted Rate Metabolic,-2.796444,-2.560464,0.129126,0.116712
Log Ridge - Age Adjusted Rate Diabetes,-8.69587,-9.006685,0.300766,0.271409
Log Ridge - Age Adjusted Rate Circulatory,-47.654106,-44.612916,0.158279,0.200038
Log Ridge - Age Adjusted Rate Pulmonary,-2.591769,-2.29532,0.085266,0.0968
Log Ridge - Age Adjusted Rate Arteries,-5.546729,-4.903017,0.030057,0.09542
Log Ridge - Age Adjusted Rate Hypertension,-11.121742,-10.022309,0.201636,0.191771
Log Ridge - Age Adjusted Rate External,-20.272203,-19.735345,0.125756,0.169313
Log Ridge - Age Adjusted Rate Assault,-3.271761,-3.855001,0.1784,0.120626


## <font color = darkorchid> Lasso Regression </font>

In [9]:
scores_lasso = pd.DataFrame()

for t in target_list:
    # Drop if target value is not available
    df_lasso = df.dropna(subset = [t])
    
    # Bootstrap observations
    df_lasso_sample = df_lasso.sample(n = df_lasso.shape[0], replace = True)

    df_lasso = pd.concat([df_lasso, df_lasso_sample])

    # Set features to percentage metrics
    X = df_lasso[features_proportional_share_Log10]
    y = df_lasso[t]
    
    # Train / Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    # StandardScale features
    #ss = StandardScaler()
    #X_train = ss.fit_transform(X_train)
    #X_test = ss.transform(X_test)
    
    ### Lasso Regression CV

    # Instantiate and fit lasso regression
    lasso = linear_model.LassoCV(eps = .001, n_alphas = 500, max_iter = 5000)
    lasso.fit(X_train, y_train)
    
    # Evaluate baseline train scores (MAE)
    lasso_cv_score_mae = cross_val_score(lasso, X_train, y_train , cv=3, verbose = 1,
                                         scoring= 'neg_mean_absolute_error', n_jobs = -1)
    
    # Evaluate baseline test scores (MAE)
    lasso_cv_score_mae_test = cross_val_score(lasso, X_test, y_test , cv=3, verbose = 1,
                                              scoring= 'neg_mean_absolute_error', n_jobs = -1)
    
    # Create dataframe of scores
    
    if scores_lasso.shape[0] == 0:
        scores_lasso = make_scores_df(lasso.score(X_train, y_train), lasso.score(X_test, y_test), 
                                      np.mean(lasso_cv_score_mae), np.mean(lasso_cv_score_mae_test),
                                      f'Log Lasso - {t}' )
    else:
        scores_lasso_temp = make_scores_df(lasso.score(X_train, y_train), lasso.score(X_test, y_test), 
                                           np.mean(lasso_cv_score_mae), np.mean(lasso_cv_score_mae_test),
                                           f'Log Lasso - {t}' )
        
        scores_lasso = pd.concat([scores_lasso, scores_lasso_temp], axis = 1)

scores_lasso.T

[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.3s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.1s finished
[Paral

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Lasso - Age Adjusted Rate Endocrine,-10.981724,-11.254541,0.254058,0.191407
Log Lasso - Age Adjusted Rate Obesity,-0.991579,-0.846487,0.207106,0.211911
Log Lasso - Age Adjusted Rate Metabolic,-2.749544,-2.715866,0.17806,0.10436
Log Lasso - Age Adjusted Rate Diabetes,-8.620157,-8.6553,0.193075,0.277622
Log Lasso - Age Adjusted Rate Circulatory,-43.732278,-45.000088,0.167859,0.196876
Log Lasso - Age Adjusted Rate Pulmonary,-2.463155,-2.033632,0.046698,0.044266
Log Lasso - Age Adjusted Rate Arteries,-4.730114,-5.651906,0.077286,0.045861
Log Lasso - Age Adjusted Rate Hypertension,-10.469795,-10.450502,0.195334,0.210033
Log Lasso - Age Adjusted Rate External,-19.496031,-20.153525,0.170264,0.171222
Log Lasso - Age Adjusted Rate Assault,-3.71598,-3.26511,-0.249337,0.183769


## <font color = darkorchid> Elastic Net Regression </font>

In [10]:
scores_enet = pd.DataFrame()

for t in target_list:
    # Drop if target value is not available
    df_enet = df.dropna(subset = [t])
    
    # Bootstrap observations
    df_enet_sample = df_enet.sample(n = df_enet.shape[0], replace = True)

    df_enet = pd.concat([df_enet, df_enet_sample])

    # Set features to percentage metrics
    X = df_enet[features_proportional_share_Log10]
    y = df_enet[t]
    
    # Train / Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    
    # StandardScale features
    #ss = StandardScaler()
    #X_train = ss.fit_transform(X_train)
    #X_test = ss.transform(X_test)
    
    ### Elastic Net CV

    # Instantiate and fit elastic net 
    l1_ratios = np.linspace(0.01, 1.0, 200)

    enet = linear_model.ElasticNetCV(l1_ratio=l1_ratios, n_alphas=200, cv=3, tol = 0.001, 
                                 max_iter = 2000, eps = 0.001)
    enet.fit(X_train, y_train)
    
    # Evaluate baseline train scores (MAE)
    enet_cv_score_mae = cross_val_score(enet, X_train, y_train , cv=3, 
                                        scoring = 'neg_mean_absolute_error', verbose = 1)
    
    # Evaluate baseline test scores (MAE)
    enet_cv_score_mae_test = cross_val_score(enet, X_test, y_test , cv=3, 
                                             scoring = 'neg_mean_absolute_error', verbose = 1)
    
    # Create dataframe of scores
    
    if scores_enet.shape[0] == 0:
        scores_enet = make_scores_df(enet.score(X_train, y_train), enet.score(X_test, y_test), 
                                     np.mean(enet_cv_score_mae), np.mean(enet_cv_score_mae_test),
                                     f'Log Elastic Net - {t}' )
    else:
        scores_enet_temp = make_scores_df(enet.score(X_train, y_train), enet.score(X_test, y_test), 
                                          np.mean(enet_cv_score_mae), np.mean(enet_cv_score_mae_test),
                                          f'Log Elastic Net - {t}' )
        
        scores_enet = pd.concat([scores_enet, scores_enet_temp], axis = 1)

scores_enet.T

[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.0min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   18.0s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  3.0min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.4min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   37.3s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.4min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.2min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   58.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.7min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   27.3s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.1min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  1.8min finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   32.5s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:   25.5s finished
[Paral

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Elastic Net - Age Adjusted Rate Endocrine,-11.17853,-11.367771,0.203549,0.187275
Log Elastic Net - Age Adjusted Rate Obesity,-0.651968,-0.85501,-0.179484,0.147833
Log Elastic Net - Age Adjusted Rate Metabolic,-2.734745,-2.52205,0.059851,0.119228
Log Elastic Net - Age Adjusted Rate Diabetes,-9.187708,-9.239442,0.316778,0.217104
Log Elastic Net - Age Adjusted Rate Circulatory,-45.835493,-46.046816,0.176305,0.177996
Log Elastic Net - Age Adjusted Rate Pulmonary,-2.413313,-2.524669,0.130116,0.193213
Log Elastic Net - Age Adjusted Rate Arteries,-5.979737,-5.318093,0.082957,0.161488
Log Elastic Net - Age Adjusted Rate Hypertension,-10.694734,-10.01596,0.257277,0.196581
Log Elastic Net - Age Adjusted Rate External,-19.887796,-19.640866,0.200684,0.184521
Log Elastic Net - Age Adjusted Rate Assault,-3.665302,-3.333411,0.039422,0.147304


## <font color = darkorchid> Bayesian Ridge </font>

In [11]:
scores_bayridge = pd.DataFrame()

for t in target_list:

    # Drop if target value is not available
    df_bayridge = df.dropna(subset = [t])

    # Bootstrap observations
    df_bayridge_sample = df_bayridge.sample(n = df_bayridge.shape[0], replace = True)

    df_bayridge = pd.concat([df_bayridge, df_bayridge_sample])

    # Set features to percentage metrics
    X = df_bayridge[features_proportional_share_Log10]
    y = df_bayridge[t]

    # Train / Test Split
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # StandardScale features
    #ss = StandardScaler()
    #X_train = ss.fit_transform(X_train)
    #X_test = ss.transform(X_test)
    
    # Instantiate Bayesian Ridge Regression
    bayridge = linear_model.BayesianRidge(n_iter = 2000, tol = 0.001, 
                                          alpha_1 = 1e-06, alpha_2 = 1e-06, 
                                          lambda_1 = 1e-06, lambda_2 = 1e-06)

    bayridge.fit(X_train, y_train)
    

    # Evaluate baseline train scores (r2)
    bayridge_cv_score = cross_val_score(bayridge, X_train, y_train , cv=3, 
                                            scoring = 'r2', verbose = 1)

    # Evaluate baseline test scores (r2)
    bayridge_cv_score_test = cross_val_score(bayridge, X_test, y_test , cv=3, 
                                            scoring = 'r2', verbose = 1)

    # Evaluate baseline train scores (MAE)
    bayridge_cv_score_mae = cross_val_score(bayridge, X_train, y_train , cv=3, 
                                            scoring = 'neg_mean_absolute_error', verbose = 1, n_jobs = -1)

    # Evaluate baseline test scores (MAE)
    bayridge_cv_score_mae_test = cross_val_score(bayridge, X_test, y_test , cv=3, 
                                                 scoring = 'neg_mean_absolute_error', n_jobs = -1)

    # Create dataframe of scores
    
    if scores_bayridge.shape[0] == 0:
        scores_bayridge = make_scores_df(bayridge_cv_score, bayridge_cv_score_test, 
                                         bayridge_cv_score_mae, bayridge_cv_score_mae_test,
                                         f'Log Bayesian Ridge - {t}' )
    else:
        scores_bayridge_temp = make_scores_df(bayridge_cv_score, bayridge_cv_score_test, 
                                              bayridge_cv_score_mae, bayridge_cv_score_mae_test,
                                              f'Log Bayesian Ridge - {t}' )
        
        scores_bayridge = pd.concat([scores_bayridge, scores_bayridge_temp], axis = 1)
        
scores_bayridge.T

[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.9s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.9s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:    0.3s finished
[Paral

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Bayesian Ridge - Age Adjusted Rate Endocrine,-11.363437,-11.033187,0.249097,0.165419
Log Bayesian Ridge - Age Adjusted Rate Obesity,-0.785815,-1.008403,-0.139697,0.133978
Log Bayesian Ridge - Age Adjusted Rate Metabolic,-2.481707,-2.620719,0.032559,0.154669
Log Bayesian Ridge - Age Adjusted Rate Diabetes,-9.247172,-8.816055,0.223827,0.214966
Log Bayesian Ridge - Age Adjusted Rate Circulatory,-43.044395,-45.11387,0.209416,0.193351
Log Bayesian Ridge - Age Adjusted Rate Pulmonary,-2.219322,-2.63224,-0.277198,0.084213
Log Bayesian Ridge - Age Adjusted Rate Arteries,-4.414087,-4.760098,0.00245,0.012423
Log Bayesian Ridge - Age Adjusted Rate Hypertension,-11.241181,-10.598566,0.136121,0.213792
Log Bayesian Ridge - Age Adjusted Rate External,-19.712721,-19.198607,0.133213,0.148983
Log Bayesian Ridge - Age Adjusted Rate Assault,-3.239536,-3.70291,-0.087069,0.011549


## <font color = darkorchid> Score Evaluation Ridge </font>

In [12]:
# Display scores for all diseases / all models
scores_combined = pd.merge(scores_lin_reg, scores_ridge, left_index = True, right_index = True)
scores_combined = pd.merge(scores_combined, scores_lasso, left_index = True, right_index = True)
scores_combined = pd.merge(scores_combined, scores_enet, left_index = True, right_index = True)
scores_combined = pd.merge(scores_combined, scores_bayridge, left_index = True, right_index = True)
scores_combined = scores_combined.T

scores_combined

Unnamed: 0,MAE Test,MAE Train,R^2 Test,R^2 Train
Log Linear Regression - Age Adjusted Rate Endocrine,-1.197030e+01,-11.225318,2.006464e-01,0.143241
Log Linear Regression - Age Adjusted Rate Obesity,-1.967871e+10,-0.861631,-2.525960e+22,-0.131764
Log Linear Regression - Age Adjusted Rate Metabolic,-2.453518e+00,-2.548962,6.107496e-02,0.065478
Log Linear Regression - Age Adjusted Rate Diabetes,-9.222473e+00,-8.815820,9.193800e-02,0.213337
Log Linear Regression - Age Adjusted Rate Circulatory,-4.563179e+01,-45.629989,1.139644e-01,0.169662
Log Linear Regression - Age Adjusted Rate Pulmonary,-3.350013e+00,-2.237147,-8.621794e-02,0.031992
Log Linear Regression - Age Adjusted Rate Arteries,-5.973244e+00,-5.475674,-1.906051e-01,0.040363
Log Linear Regression - Age Adjusted Rate Hypertension,-1.027322e+01,-9.740101,5.816369e-02,0.176077
Log Linear Regression - Age Adjusted Rate External,-2.090251e+01,-20.215868,1.608626e-01,0.151708
Log Linear Regression - Age Adjusted Rate Assault,-4.193660e+00,-4.296603,-2.057082e+00,-0.149568


In [13]:
scores_combined.to_csv('./foodaccess_mortality_GLM_Log10_scores.csv')