## Lasso and Ridge

In [1]:
from sklearn.pipeline import Pipeline
from matplotlib.pyplot import subplots
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from ISLP.models import ModelSpec as MS
from functools import partial
from sklearn.utils import resample

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels import AbsorbingLS
from linearmodels import PanelOLS
import warnings
from linearmodels.iv.absorbing import AbsorbingEffectWarning
import psutil
import statsmodels.formula.api as smf
from sklearn.preprocessing import StandardScaler

## Data Preprocessing

In [5]:
df = pd.read_csv("./Final Data/analysis_input.csv")
df['principal_city_binary'] = df['principal_city'] = np.where(df['principal_city'] == 999999, np.nan, np.where(df['principal_city'] == 0, 0, 1))
df['urban_rural_binary'] = np.where(df['urbanrural'].isin(['M', 'U']), 1, np.where(df['urbanrural'] == 'R', 0, np.nan))
# Merger Sample
df = df.dropna(subset=['overlap'])
control_vars = ['poptot', 'popden', 'pminority', 'pcollege', 'medincome', 'total_branches_county', 'avg_branch_growth_county']
edum_vars = [col for col in df.columns if 'edum_' in col]

df_final = df.dropna(subset = ['branch_closures_tract', 'total_branches_tract', 'sbl_total'] + edum_vars + control_vars + ['clustID'])

### Lasso with Bootstrap

In [11]:
def lasso_bootstrap(df, Y, n_iterations, size, K):
    coefficients = {}
    n_size = int(len(df) * size)
    scaler = StandardScaler(with_mean=True,  with_std=True)

    for i in range(n_iterations):
        # Bootstrap resample
        X_resampled, y_resampled = resample(df, Y, n_samples=n_size)
        
        kfold = skm.KFold(K,
                        random_state=0,
                        shuffle=True)
        
        lassoCV = skl.ElasticNetCV(n_alphas=100,
                            l1_ratio=1,
                            cv=kfold)
        
        pipeCV = Pipeline(steps=[('scaler', scaler),
                                ('lasso', lassoCV)])
        pipeCV.fit(X_resampled, y_resampled)
        tuned_lasso = pipeCV.named_steps['lasso']

        # Store the coefficients
        coefficients[f'sample_{i}'] = tuned_lasso.coef_

    # Create a DataFrame to see the results
    coef_summary = pd.DataFrame(coefficients)

    boot_mean = coef_summary.apply('mean', axis = 1)
    boot_std = coef_summary.apply('std', axis = 1)

    output_df = pd.DataFrame({'Features':df.columns, 'Bootstrapped Coef':boot_mean, 'Bootstrapped Std':boot_std})
    
    return output_df

In [12]:
df_X = df_final[edum_vars+control_vars]

In [13]:
result_closure = lasso_bootstrap(df_X, df_final['branch_closures_tract'], n_iterations = 1000, size = 0.8, K = 5)
result_branch_total = lasso_bootstrap(df_X, df_final['total_branches_tract'], n_iterations = 1000, size = 0.8, K = 5)
result_sbl = lasso_bootstrap(df_X, df_final['sbl_total'], n_iterations = 1000, size = 0.8, K = 5)

In [14]:
result_closure

Unnamed: 0,Features,Bootstrapped Coef,Bootstrapped Std
0,edum_lessm5,-0.000601,0.002457
1,edum_n5,-0.001582,0.00227
2,edum_n4,-0.002211,0.001882
3,edum_n3,-0.003576,0.002335
4,edum_n2,0.000732,0.001422
5,edum_0,-0.00074,0.001635
6,edum_1,0.008627,0.003885
7,edum_2,-0.000128,0.001165
8,edum_3,0.001086,0.002724
9,edum_4,2.6e-05,0.000651


In [15]:
result_branch_total

Unnamed: 0,Features,Bootstrapped Coef,Bootstrapped Std
0,edum_lessm5,0.000744,0.00606
1,edum_n5,0.012552,0.004734
2,edum_n4,0.014183,0.004129
3,edum_n3,0.026536,0.004849
4,edum_n2,0.017957,0.003585
5,edum_0,0.014696,0.004465
6,edum_1,-0.007934,0.004389
7,edum_2,-0.015439,0.004488
8,edum_3,-0.01843,0.00643
9,edum_4,-0.01889,0.007258


In [16]:
result_sbl

Unnamed: 0,Features,Bootstrapped Coef,Bootstrapped Std
0,edum_lessm5,-185.496013,63.14388
1,edum_n5,-182.166151,84.324609
2,edum_n4,-100.275448,52.402601
3,edum_n3,-111.515946,48.549592
4,edum_n2,-27.203459,32.834194
5,edum_0,-81.711278,37.334484
6,edum_1,-58.467109,39.311772
7,edum_2,555.616716,268.471662
8,edum_3,180.786526,94.880145
9,edum_4,87.974213,70.004196
