# Running processes in DataWrangling12 but with PM2.5 aqi only

# Rerun DataWrangling8 processes with alernative datasets from DataWrangling 9

In [11]:
import numpy as np
import pandas as pd
import seaborn as sns
import sys
import os
import matplotlib.pyplot as plt
import klib

from IPython.core.display import display

# pd.set_option('display.max_columns', None)
# pd.reset_optio('max_rows')
#np.set_printoptions(threshold=sys.maxsize)

plt.style.use('dark_background')
plt.rcParams.update({"grid.linewidth":0.5, "grid.alpha":0.5})
sns.set(style='ticks', context='talk')

# Load data and constants

In [12]:
MA, CT, ME, NY, NH, NJ, PA, VT, DE, MD  = [25, 9, 23, 36, 33, 34, 42, 50, 10 , 24]
NORTH_EAST = [MA, CT, ME, NY, NH, NJ, PA, VT, DE, MD]

pm_25_2017 = pd.read_csv('../../data/raw/daily_pm25_2017.csv')
pm_25_2018 = pd.read_csv('../../data/raw/daily_pm25_2018.csv')
pm_25_2019 = pd.read_csv('../../data/raw/daily_pm25_2019.csv')

NE_dem_ratio_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_dem_ratio_2017_2019.csv')

NE_unemploy_rate_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_unemploy_rate_2017_2019.csv')

NE_pov_ratio_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_pov_rate_2017_2019.csv')

NE_educ_vehicle_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_edu_vehicle_2017_2019.csv')

In [13]:

def remove_col(df, col_name):
    '''Returns a dataframe with removed column or columns from old dataframe'''
    new_df = df.copy()

    if(type(col_name) != str and len(col_name) > 1):
        for index in col_name:
            new_df = new_df.drop(str(index), axis=1)
    else:
        new_df = new_df.drop(str(col_name), axis=1)
    return new_df

def get_NE_States(df, col):
    '''Method used to return North Eastern US states'''
    NE_States = (df[col] == MA) | \
                (df[col] == CT) | \
                (df[col] == ME) | \
                (df[col] == NY) | \
                (df[col] == NH) | \
                (df[col] == NJ) | \
                (df[col] == PA) | \
                (df[col] == VT) | \
                (df[col] == DE) | \
                (df[col] == MD)
    return NE_States
def get_df_with_geofips(df, state_code, county_code):
    ''' Returns dataframe with geofips column '''
    state_code = df[state_code].astype(str).values
    county_code = df[county_code].astype(str).values

    geofips = []
    for stateID, countyID in zip(state_code, county_code):
        id = ''
        stateLen = len(stateID)
        countyLen = len(countyID)
        if(stateLen < 2):
            id += '0' + stateID
        else:
            id += stateID
        if(countyLen == 1):
            id += '00' + countyID
        elif(countyLen == 2):
            id += '0' + countyID
        else:
            id += countyID
        geofips.append(id)

    df.insert(loc=0, column='GeoFIPS', value=geofips)
    return df

## Retrieve NE ozone data

In [14]:
print(pm_25_2017.shape)
NE_States_17 = get_NE_States(pm_25_2017, 'State Code')
NE_States_18 = get_NE_States(pm_25_2018, 'State Code')
NE_States_19 = get_NE_States(pm_25_2019, 'State Code')

NE_aqi_2017 = pm_25_2017.loc[NE_States_17]
NE_aqi_2018 = pm_25_2018.loc[NE_States_18]
NE_aqi_2019 = pm_25_2019.loc[NE_States_19]

(450385, 29)


In [15]:
# add GeoFIPS for NE aqi counties
NE_aqi_2017 = get_df_with_geofips(NE_aqi_2017, 'State Code', 'County Code')
NE_aqi_2018 = get_df_with_geofips(NE_aqi_2018, 'State Code', 'County Code')
NE_aqi_2019 = get_df_with_geofips(NE_aqi_2019, 'State Code', 'County Code')


In [16]:
# Remove unnecessary columns

NE_aqi_2017.columns

cols_to_remove = ['State Code', 'County Code', 'Site Num', 'Parameter Code',
       'POC', 'Latitude', 'Longitude', 'Datum', 'Parameter Name',
       'Sample Duration', 'Pollutant Standard', 'Date Local',
       'Units of Measure', 'Event Type', 'Observation Count',
       'Observation Percent', 'Arithmetic Mean', '1st Max Value',
       '1st Max Hour', 'Method Code', 'Method Name', 'Local Site Name',
       'Address', 'CBSA Name', 'Date of Last Change']

NE_aqi_2017 = remove_col(NE_aqi_2017, cols_to_remove)
NE_aqi_2018 = remove_col(NE_aqi_2018, cols_to_remove)
NE_aqi_2019 = remove_col(NE_aqi_2019, cols_to_remove)

In [17]:
NE_aqi_2017.rename(columns={'AQI':'AQI_2017'}, inplace=True)
NE_aqi_2018.rename(columns={'AQI':'AQI_2018'}, inplace=True)
NE_aqi_2019.rename(columns={'AQI':'AQI_2019'}, inplace=True)

In [18]:
print(NE_aqi_2017.columns)
average_NE_aqi_2017 = NE_aqi_2017.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2017']].mean()
average_NE_aqi_2018 = NE_aqi_2018.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2018']].mean()
average_NE_aqi_2019 = NE_aqi_2019.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2019']].mean()

display(average_NE_aqi_2017)
display(average_NE_aqi_2018)
display(average_NE_aqi_2019)


Index(['GeoFIPS', 'AQI_2017', 'State Name', 'County Name', 'City Name'], dtype='object')


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017
0,09001,Connecticut,Fairfield,28.652830
1,09003,Connecticut,Hartford,28.213759
2,09005,Connecticut,Litchfield,15.083141
3,09009,Connecticut,New Haven,25.310385
4,09011,Connecticut,New London,20.155000
...,...,...,...,...
96,42129,Pennsylvania,Westmoreland,40.471667
97,42133,Pennsylvania,York,35.832845
98,50003,Vermont,Bennington,22.036254
99,50007,Vermont,Chittenden,18.308046


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2018
0,09001,Connecticut,Fairfield,30.493590
1,09003,Connecticut,Hartford,29.896907
2,09005,Connecticut,Litchfield,17.717489
3,09009,Connecticut,New Haven,30.825553
4,09011,Connecticut,New London,23.860335
...,...,...,...,...
98,42129,Pennsylvania,Westmoreland,25.054968
99,42133,Pennsylvania,York,38.688889
100,50003,Vermont,Bennington,24.665753
101,50007,Vermont,Chittenden,22.357733


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2019
0,09001,Connecticut,Fairfield,30.802145
1,09003,Connecticut,Hartford,28.745033
2,09005,Connecticut,Litchfield,19.460317
3,09009,Connecticut,New Haven,30.279693
4,09011,Connecticut,New London,25.900474
...,...,...,...,...
99,42131,Pennsylvania,Wyoming,34.259259
100,42133,Pennsylvania,York,35.691558
101,50003,Vermont,Bennington,23.117647
102,50007,Vermont,Chittenden,21.697842


In [19]:
# Merge avg northeast ozone data

avg_NE_aqi_2017_2018 = pd.merge(average_NE_aqi_2017, average_NE_aqi_2018, how='inner')
avg_NE_aqi_2017_2019 = pd.merge(avg_NE_aqi_2017_2018, average_NE_aqi_2019, how='inner')
avg_NE_aqi_2017_2019

avg_NE_aqi_2017_2019['GeoFIPS'] = pd.to_numeric(avg_NE_aqi_2017_2019['GeoFIPS'])
avg_NE_aqi_2017_2019.dtypes

GeoFIPS          int64
State Name      object
County Name     object
AQI_2017       float64
AQI_2018       float64
AQI_2019       float64
dtype: object

* Have 3 datasets for individuals years 2017, 2018, 2019

In [20]:
display(avg_NE_aqi_2017_2019)
display(NE_unemploy_rate_2017_2019)
display(NE_pov_ratio_2017_2019)
display(NE_educ_vehicle_2017_2019)
display(NE_dem_ratio_2017_2019)


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,AQI_2018,AQI_2019
0,9001,Connecticut,Fairfield,28.652830,30.493590,30.802145
1,9003,Connecticut,Hartford,28.213759,29.896907,28.745033
2,9005,Connecticut,Litchfield,15.083141,17.717489,19.460317
3,9009,Connecticut,New Haven,25.310385,30.825553,30.279693
4,9011,Connecticut,New London,20.155000,23.860335,25.900474
...,...,...,...,...,...,...
94,42129,Pennsylvania,Westmoreland,40.471667,25.054968,28.891176
95,42133,Pennsylvania,York,35.832845,38.688889,35.691558
96,50003,Vermont,Bennington,22.036254,24.665753,23.117647
97,50007,Vermont,Chittenden,18.308046,22.357733,21.697842


Unnamed: 0,GeoFIPS,Stabr,area_name,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,Civilian_labor_force_2018,Employed_2018,Unemployed_2018,Unemployment_rate_2018,Civilian_labor_force_2019,Employed_2019,Unemployed_2019,Unemployment_rate_2019
0,9001,CT,"Fairfield County, CT",479458,457625,21833,4.6,479179,460045,19134,4.0,481023,463547,17476,3.6
1,9003,CT,"Hartford County, CT",477390,454205,23185,4.9,478660,458138,20522,4.3,483303,464533,18770,3.9
2,9005,CT,"Litchfield County, CT",104051,99522,4529,4.4,104604,100568,4036,3.9,105182,101543,3639,3.5
3,9007,CT,"Middlesex County, CT",92583,88817,3766,4.1,92811,89517,3294,3.5,93696,90703,2993,3.2
4,9009,CT,"New Haven County, CT",457800,434645,23155,5.1,458799,438485,20314,4.4,461613,443514,18099,3.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,50019,VT,"Orleans County, VT",13601,12920,681,5.0,13501,12924,577,4.3,13275,12713,562,4.2
235,50021,VT,"Rutland County, VT",31096,30030,1066,3.4,30752,29828,924,3.0,30446,29608,838,2.8
236,50023,VT,"Washington County, VT",34260,33273,987,2.9,34512,33658,854,2.5,34360,33586,774,2.3
237,50025,VT,"Windham County, VT",22823,22132,691,3.0,22470,21856,614,2.7,21977,21416,561,2.6


Unnamed: 0,GeoFIPS,Postal Code,Name,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,90% CI UB All Ages_2017,"Poverty Percent, All Ages_2017",90% CI LB percent_2017,90% CI UB percent_2017,"Poverty Estimate, Age 0-17_2017",...,90% CI UB 0-17 percent_2019,"Poverty Estimate, Age 5-17 in Families_2019",90% CI LB 5-17 fam_2019,90% CI UB 5-17 fam_2019,"Poverty Percent, Age 5-17 in Families_2019",90% CI LB 5-17 percent_2019,90% CI UB percent 5-17 percent_2019,Median Household Income_2019,90% CI Lower Bound LB medh inc_2019,90% CI UB medh inc_2019
0,9001,CT,Fairfield County,82428.0,74608.0,90248.0,8.8,8.0,9.6,24161.0,...,13.3,18562.0,16078.0,21046.0,11.9,10.3,13.5,96966.0,93161.0,100771.0
1,9003,CT,Hartford County,96200.0,88256.0,104144.0,11.0,10.1,11.9,28488.0,...,16.0,17852.0,15166.0,20538.0,13.1,11.1,15.1,75336.0,73281.0,77391.0
2,9005,CT,Litchfield County,12481.0,10381.0,14581.0,6.9,5.7,8.1,2642.0,...,11.6,2077.0,1591.0,2563.0,8.5,6.5,10.5,81015.0,77226.0,84804.0
3,9007,CT,Middlesex County,10796.0,8906.0,12686.0,6.8,5.6,8.0,2169.0,...,9.1,1388.0,981.0,1795.0,6.6,4.7,8.5,81721.0,74613.0,88829.0
4,9009,CT,New Haven County,91639.0,82523.0,100755.0,11.0,9.9,12.1,26499.0,...,20.5,22220.0,19581.0,24859.0,17.8,15.7,19.9,69687.0,66999.0,72375.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
233,50019,VT,Orleans County,3962.0,3189.0,4735.0,15.2,12.2,18.2,1007.0,...,23.3,643.0,427.0,859.0,16.7,11.1,22.3,48826.0,43855.0,53797.0
234,50021,VT,Rutland County,6106.0,4691.0,7521.0,10.8,8.3,13.3,1518.0,...,16.4,849.0,480.0,1218.0,11.4,6.5,16.3,51903.0,49484.0,54322.0
235,50023,VT,Washington County,5252.0,4184.0,6320.0,9.4,7.5,11.3,1114.0,...,12.7,636.0,373.0,899.0,8.0,4.7,11.3,65879.0,61501.0,70257.0
236,50025,VT,Windham County,4827.0,3819.0,5835.0,11.6,9.2,14.0,1138.0,...,17.9,664.0,410.0,918.0,12.5,7.7,17.3,52068.0,47230.0,56906.0


Unnamed: 0,GeoFIPS,NAME,state,county,POP_2017,EDU_TOT_2017,LESS_HS_TOT_2017,HS_TOT_2017,COL_OR_ASSOC_TOT_2017,BACH_TOT_2017,...,BACH_TOT_2019,GRAD_TOT_2019,VEHICLE_TOT_2019,EDU_TOT_ratio_2019,LESS_HS_TOT_ratio_2019,HS_TOT_ratio_2019,COL_OR_ASSOC_TOT_ratio_2019,BACH_TOT_ratio_2019,GRAD_TOT_ratio_2019,VEHICLE_TOT_ratio_2019
0,36089,"St. Lawrence County, New York",36,89,109623,72555.0,9164.0,25858.0,20091.0,8279.0,...,8684.0,7733.0,33650,67.044737,8.564136,23.762762,19.480230,8.060145,7.177464,31.232597
1,36091,"Saratoga County, New York",36,91,229869,164414.0,10656.0,39033.0,45561.0,37886.0,...,40148.0,30363.0,104085,72.095118,4.492676,17.754924,19.172290,17.466056,13.209172,45.281320
2,36093,"Schenectady County, New York",36,93,155565,107453.0,10085.0,32144.0,35263.0,17210.0,...,22627.0,15566.0,59970,70.183324,5.047682,20.198456,20.343982,14.569959,10.023245,38.615831
3,36101,"Steuben County, New York",36,101,96281,68011.0,6510.0,25920.0,20685.0,7451.0,...,8066.0,8475.0,35505,71.044989,6.093585,25.791841,21.817171,8.456788,8.885604,37.225175
4,36103,"Suffolk County, New York",36,103,1492953,1036946.0,102621.0,277080.0,279213.0,208064.0,...,214035.0,182346.0,629360,70.148605,6.397598,18.389192,18.517663,14.495114,12.349037,42.622211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,36079,"Putnam County, New York",36,79,99323,71447.0,5090.0,20597.0,18095.0,14929.0,...,15240.0,12261.0,42420,73.443857,5.721115,19.355167,20.396664,15.500407,12.470504,43.144833
148,36081,"Queens County, New York",36,81,2358582,1695302.0,304984.0,493794.0,372885.0,335990.0,...,334272.0,207624.0,387870,72.484558,12.328372,19.812340,16.300805,14.831103,9.211938,17.209159
149,36083,"Rensselaer County, New York",36,83,159722,111047.0,9095.0,31730.0,34954.0,20143.0,...,20896.0,18203.0,65615,70.327759,5.902441,19.919478,19.870963,13.165820,11.469058,41.341659
150,36085,"Richmond County, New York",36,85,479458,334079.0,39031.0,100730.0,83679.0,66108.0,...,71914.0,48875.0,128740,70.041143,7.701258,20.426427,16.545240,15.103446,10.264773,27.038096


Unnamed: 0,GeoFIPS,STNAME,CTYNAME,TOT_POP_2017,TOT_MALE_2017,TOT_FEMALE_2017,WA_MALE_2017,WA_FEMALE_2017,BA_MALE_2017,BA_FEMALE_2017,...,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield County,943038,459242,483796,366178,379174,54885,63641,...,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford County,893076,433833,459243,327876,347484,66613,71919,...,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield County,181667,89749,91918,83969,86459,2095,1760,...,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9007,Connecticut,Middlesex County,162942,79376,83566,70949,74431,4283,4437,...,2.782019,2.829422,0.419858,0.471570,0.088035,0.112044,0.056022,0.052944,0.029550,0.032013
4,9009,Connecticut,New Haven County,857748,413560,444188,324268,344780,59423,66910,...,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,50019,Vermont,Orleans County,26811,13459,13352,12970,12884,128,83,...,0.828494,0.702741,0.073973,0.070274,0.059178,0.051781,0.022192,0.018493,0.011096,0.007397
235,50021,Vermont,Rutland County,59006,29118,29888,28147,28858,245,148,...,0.740664,0.704576,0.089361,0.044680,0.054991,0.049836,0.024059,0.015466,0.006874,0.005155
236,50023,Vermont,Washington County,58253,28808,29445,27604,28267,334,218,...,0.919379,0.910819,0.089027,0.051362,0.068483,0.053074,0.018833,0.020545,0.006848,0.000000
237,50025,Vermont,Windham County,42851,20998,21853,19944,20751,355,247,...,1.162901,1.025532,0.118422,0.146843,0.073421,0.066316,0.033158,0.028421,0.011842,0.004737


## Try make one df of 2017
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [21]:
avg_NE_aqi_2017 = avg_NE_aqi_2017_2019.iloc[:,:4]
NE_unemploy_rate_2017 = NE_unemploy_rate_2017_2019.iloc[:, [0, 3, 4, 5, 6]]

pov_col_2017 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[3:24])
NE_pov_ratio_2017 = NE_pov_ratio_2017_2019.loc[:, pov_col_2017]

educ_vehicle_2017_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[4:19])
NE_educ_vehicle_2017 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2017_col]

dem_ratio_2017_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[3:148])
NE_dem_ratio_2017 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2017_col]


In [22]:
NE_avg_aqi_unemploy_2017 = pd.merge(avg_NE_aqi_2017, NE_unemploy_rate_2017, how='inner')
NE_avg_aqi_unemploy_pov_2017 = pd.merge(NE_avg_aqi_unemploy_2017, NE_pov_ratio_2017, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2017 = pd.merge(NE_avg_aqi_unemploy_pov_2017, NE_educ_vehicle_2017, how='inner')
df_2017 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2017, NE_dem_ratio_2017, how='inner')

df_2017

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,...,HWAC_MALE_ratio_2017,HWAC_FEMALE_ratio_2017,HBAC_MALE_ratio_2017,HBAC_FEMALE_ratio_2017,HIAC_MALE_ratio_2017,HIAC_FEMALE_ratio_2017,HAAC_MALE_ratio_2017,HAAC_FEMALE_ratio_2017,HNAC_MALE_ratio_2017,HNAC_FEMALE_ratio_2017
0,9001,Connecticut,Fairfield,28.652830,479458,457625,21833,4.6,82428.0,74608.0,...,8.771863,8.578764,1.047678,1.109499,0.298079,0.263828,0.113781,0.094164,0.076773,0.055883
1,9003,Connecticut,Hartford,28.213759,477390,454205,23185,4.9,96200.0,88256.0,...,7.358612,7.653212,1.394618,1.551380,0.293480,0.299639,0.128544,0.121266,0.068415,0.050612
2,9005,Connecticut,Litchfield,15.083141,104051,99522,4529,4.4,12481.0,10381.0,...,2.739078,2.630637,0.390825,0.361100,0.142569,0.131559,0.040183,0.039633,0.030826,0.019816
3,9009,Connecticut,New Haven,25.310385,457800,434645,23155,5.1,91639.0,82523.0,...,7.649333,7.797512,1.208747,1.338155,0.262198,0.275372,0.093034,0.090236,0.044419,0.045701
4,9011,Connecticut,New London,20.155000,137470,131267,6203,4.5,22246.0,18575.0,...,4.418908,4.215482,0.811835,0.856334,0.323088,0.322715,0.143595,0.118167,0.075163,0.048613
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,42101,Pennsylvania,Philadelphia,36.242007,704948,661229,43719,6.2,388221.0,371900.0,...,5.325632,5.327214,1.701125,1.813994,0.459952,0.474693,0.153296,0.158294,0.077502,0.089586
82,42125,Pennsylvania,Washington,35.712524,106359,100831,5528,5.2,18913.0,15932.0,...,0.809852,0.687191,0.104793,0.090305,0.054087,0.034770,0.021248,0.021731,0.005312,0.004346
83,42129,Pennsylvania,Westmoreland,40.471667,180797,171495,9302,5.1,34404.0,30495.0,...,0.542320,0.494310,0.085226,0.079544,0.031534,0.027556,0.008807,0.010227,0.004829,0.003693
84,42133,Pennsylvania,York,35.832845,234928,224744,10184,4.3,40966.0,36227.0,...,3.045227,2.871708,0.733809,0.703953,0.159153,0.140521,0.052303,0.054098,0.024692,0.024243


* df_2017 contains data on NorthEastern counties of the US
* The columns of df_2017 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios


## Try make one df of 2018
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [23]:
avg_NE_aqi_2018 = avg_NE_aqi_2017_2019.iloc[:, [0, 1, 2, 4]]

unemploy_col_2018 = list(NE_unemploy_rate_2017_2019.columns[0:1]) + list(NE_unemploy_rate_2017_2019.columns[7:11])
NE_unemploy_rate_2018 = NE_unemploy_rate_2017_2019.loc[:, unemploy_col_2018]

pov_col_2018 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[24:45])
NE_pov_ratio_2018 = NE_pov_ratio_2017_2019.loc[:, pov_col_2018]

educ_vehicle_2018_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[19:34])
NE_educ_vehicle_2018 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2018_col]

dem_ratio_2018_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[149:293])
NE_dem_ratio_2018 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2018_col]

In [24]:
NE_avg_aqi_unemploy_2018 = pd.merge(avg_NE_aqi_2018, NE_unemploy_rate_2018, how='inner')
NE_avg_aqi_unemploy_pov_2018 = pd.merge(NE_avg_aqi_unemploy_2018, NE_pov_ratio_2018, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2018 = pd.merge(NE_avg_aqi_unemploy_pov_2018, NE_educ_vehicle_2018, how='inner')
df_2018 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2018, NE_dem_ratio_2018, how='inner')

df_2018

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2018,Civilian_labor_force_2018,Employed_2018,Unemployed_2018,Unemployment_rate_2018,"Poverty Estimate, All Ages_2018",90% CI LB All Ages_2018,...,HWAC_MALE_ratio_2018,HWAC_FEMALE_ratio_2018,HBAC_MALE_ratio_2018,HBAC_FEMALE_ratio_2018,HIAC_MALE_ratio_2018,HIAC_FEMALE_ratio_2018,HAAC_MALE_ratio_2018,HAAC_FEMALE_ratio_2018,HNAC_MALE_ratio_2018,HNAC_FEMALE_ratio_2018
0,9001,Connecticut,Fairfield,30.493590,479179,460045,19134,4.0,92971.0,86234.0,...,8.943071,8.786817,1.085838,1.145798,0.307001,0.280199,0.118542,0.098944,0.075426,0.059536
1,9003,Connecticut,Hartford,29.896907,478660,458138,20522,4.3,96957.0,89308.0,...,7.534003,7.856327,1.422730,1.596496,0.301149,0.312801,0.132425,0.121446,0.070806,0.051872
2,9005,Connecticut,Litchfield,17.717489,104604,100568,4036,3.9,12441.0,10517.0,...,2.973577,2.829454,0.425191,0.387642,0.155719,0.137497,0.043071,0.046937,0.025401,0.023744
3,9009,Connecticut,New Haven,30.825553,458799,438485,20314,4.4,96563.0,88623.0,...,7.905402,8.066201,1.249050,1.394563,0.274688,0.284257,0.096736,0.095686,0.047143,0.046909
4,9011,Connecticut,New London,23.860335,136856,131394,5462,4.0,25063.0,21810.0,...,4.566160,4.354733,0.846086,0.893779,0.344743,0.338359,0.143455,0.119045,0.077736,0.045816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,42101,Pennsylvania,Philadelphia,35.325617,709058,670112,38946,5.5,372322.0,355281.0,...,5.468075,5.489545,1.741737,1.842583,0.470386,0.489267,0.158437,0.161658,0.080387,0.090490
82,42125,Pennsylvania,Washington,32.652945,106242,101702,4540,4.3,18274.0,15388.0,...,0.826981,0.706219,0.111584,0.092263,0.063280,0.040093,0.022220,0.018839,0.003381,0.003381
83,42129,Pennsylvania,Westmoreland,25.054968,179859,172002,7857,4.4,31749.0,27861.0,...,0.566115,0.507335,0.089597,0.082749,0.034241,0.027393,0.012555,0.011128,0.002568,0.003709
84,42133,Pennsylvania,York,38.688889,234583,225632,8951,3.8,38582.0,33740.0,...,3.207792,3.067565,0.766780,0.733956,0.162556,0.150944,0.055823,0.056046,0.026348,0.026795


* df_2018 contains data on NorthEastern counties of the US
* The columns of df_2018 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios

## Try make one df of 2019
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [25]:
avg_NE_aqi_2019 = avg_NE_aqi_2017_2019.iloc[:,[0, 1, 2, 5]]

unemploy_col_2019 = list(NE_unemploy_rate_2017_2019.columns[0:1]) + list(NE_unemploy_rate_2017_2019.columns[11:])
NE_unemploy_rate_2019 = NE_unemploy_rate_2017_2019.loc[:, unemploy_col_2019]

pov_col_2019 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[45:])
NE_pov_ratio_2019 = NE_pov_ratio_2017_2019.loc[:, pov_col_2019]

educ_vehicle_2019_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[34:])
NE_educ_vehicle_2019 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2019_col]

dem_ratio_2019_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[293:])
NE_dem_ratio_2019 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2019_col]

In [26]:
NE_avg_aqi_unemploy_2019 = pd.merge(avg_NE_aqi_2019, NE_unemploy_rate_2019, how='inner')
NE_avg_aqi_unemploy_pov_2019 = pd.merge(NE_avg_aqi_unemploy_2019, NE_pov_ratio_2019, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2019 = pd.merge(NE_avg_aqi_unemploy_pov_2019, NE_educ_vehicle_2019, how='inner')
df_2019 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2019, NE_dem_ratio_2019, how='inner')

df_2019

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2019,Civilian_labor_force_2019,Employed_2019,Unemployed_2019,Unemployment_rate_2019,"Poverty Estimate, All Ages_2019",90% CI LB All Ages_2019,...,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield,30.802145,481023,463547,17476,3.6,83047.0,75296.0,...,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford,28.745033,483303,464533,18770,3.9,93694.0,86070.0,...,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield,19.460317,105182,101543,3639,3.5,12741.0,10601.0,...,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9009,Connecticut,New Haven,30.279693,461613,443514,18099,3.9,99423.0,91433.0,...,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
4,9011,Connecticut,New London,25.900474,137386,132423,4963,3.6,19739.0,16173.0,...,4.662036,4.452011,0.880448,0.915138,0.354441,0.351802,0.139891,0.122923,0.078430,0.045248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,42101,Pennsylvania,Philadelphia,32.890160,722078,682012,40066,5.5,352748.0,333894.0,...,5.527491,5.569409,1.758830,1.860847,0.472140,0.498591,0.158769,0.165397,0.083014,0.092799
82,42125,Pennsylvania,Washington,31.069617,107299,102475,4824,4.5,19860.0,17015.0,...,0.839195,0.734295,0.108283,0.094264,0.066710,0.042056,0.021753,0.020787,0.005801,0.003867
83,42129,Pennsylvania,Westmoreland,28.891176,181453,173320,8133,4.5,35870.0,31419.0,...,0.584983,0.526227,0.097449,0.082259,0.033534,0.028948,0.012898,0.011751,0.003439,0.004299
84,42133,Pennsylvania,York,35.691558,236377,227297,9080,3.8,40477.0,35485.0,...,3.301133,3.162175,0.806355,0.775401,0.172583,0.157218,0.057899,0.059458,0.029172,0.027836


* df_2019 contains data on NorthEastern counties of the US
* The columns of df_2019 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios

# Merge dataframe into df 2017-2019

In [27]:
pd.set_option('display.max_columns', 200)

df_2017_2018 = pd.merge(df_2017, df_2018, how='inner')
df_2017_2019 = pd.merge(df_2017_2018, df_2019, how='inner')

df_2017_2019

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,90% CI UB All Ages_2017,"Poverty Percent, All Ages_2017",90% CI LB percent_2017,90% CI UB percent_2017,"Poverty Estimate, Age 0-17_2017",90% CI LB percent 0-17_2017,90% CI UB 0-17_2017,"Poverty Percent, Age 0-17_2017",90% CI LB percent 0-17 percent_2017,90% CI UB 0-17 percent_2017,"Poverty Estimate, Age 5-17 in Families_2017",90% CI LB 5-17 fam_2017,90% CI UB 5-17 fam_2017,"Poverty Percent, Age 5-17 in Families_2017",90% CI LB 5-17 percent_2017,90% CI UB percent 5-17 percent_2017,Median Household Income_2017,90% CI Lower Bound LB medh inc_2017,90% CI UB medh inc_2017,POP_2017,EDU_TOT_2017,LESS_HS_TOT_2017,HS_TOT_2017,COL_OR_ASSOC_TOT_2017,BACH_TOT_2017,GRAD_TOT_2017,VEHICLE_TOT_2017,EDU_TOT_ratio_2017,LESS_HS_TOT_ratio_2017,HS_TOT_ratio_2017,COL_OR_ASSOC_TOT_ratio_2017,BACH_TOT_ratio_2017,GRAD_TOT_ratio_2017,VEHICLE_TOT_ratio_2017,TOT_POP_2017,TOT_MALE_2017,TOT_FEMALE_2017,WA_MALE_2017,WA_FEMALE_2017,BA_MALE_2017,BA_FEMALE_2017,IA_MALE_2017,IA_FEMALE_2017,AA_MALE_2017,AA_FEMALE_2017,NA_MALE_2017,NA_FEMALE_2017,TOM_MALE_2017,TOM_FEMALE_2017,WAC_MALE_2017,WAC_FEMALE_2017,BAC_MALE_2017,BAC_FEMALE_2017,IAC_MALE_2017,IAC_FEMALE_2017,AAC_MALE_2017,AAC_FEMALE_2017,NAC_MALE_2017,NAC_FEMALE_2017,NH_MALE_2017,NH_FEMALE_2017,NHWA_MALE_2017,NHWA_FEMALE_2017,NHBA_MALE_2017,NHBA_FEMALE_2017,NHIA_MALE_2017,NHIA_FEMALE_2017,NHAA_MALE_2017,NHAA_FEMALE_2017,NHNA_MALE_2017,NHNA_FEMALE_2017,NHTOM_MALE_2017,NHTOM_FEMALE_2017,NHWAC_MALE_2017,NHWAC_FEMALE_2017,NHBAC_MALE_2017,NHBAC_FEMALE_2017,NHIAC_MALE_2017,NHIAC_FEMALE_2017,NHAAC_MALE_2017,NHAAC_FEMALE_2017,NHNAC_MALE_2017,NHNAC_FEMALE_2017,H_MALE_2017,H_FEMALE_2017,HWA_MALE_2017,HWA_FEMALE_2017,HBA_MALE_2017,HBA_FEMALE_2017,HIA_MALE_2017,...,NHAAC_MALE_2019,NHAAC_FEMALE_2019,NHNAC_MALE_2019,NHNAC_FEMALE_2019,H_MALE_2019,H_FEMALE_2019,HWA_MALE_2019,HWA_FEMALE_2019,HBA_MALE_2019,HBA_FEMALE_2019,HIA_MALE_2019,HIA_FEMALE_2019,HAA_MALE_2019,HAA_FEMALE_2019,HNA_MALE_2019,HNA_FEMALE_2019,HTOM_MALE_2019,HTOM_FEMALE_2019,HWAC_MALE_2019,HWAC_FEMALE_2019,HBAC_MALE_2019,HBAC_FEMALE_2019,HIAC_MALE_2019,HIAC_FEMALE_2019,HAAC_MALE_2019,HAAC_FEMALE_2019,HNAC_MALE_2019,HNAC_FEMALE_2019,TOT_MALE_ratio_2019,TOT_FEMALE_ratio_2019,WA_MALE_ratio_2019,WA_FEMALE_ratio_2019,BA_MALE_ratio_2019,BA_FEMALE_ratio_2019,IA_MALE_ratio_2019,IA_FEMALE_ratio_2019,AA_MALE_ratio_2019,AA_FEMALE_ratio_2019,NA_MALE_ratio_2019,NA_FEMALE_ratio_2019,TOM_MALE_ratio_2019,TOM_FEMALE_ratio_2019,WAC_MALE_ratio_2019,WAC_FEMALE_ratio_2019,BAC_MALE_ratio_2019,BAC_FEMALE_ratio_2019,IAC_MALE_ratio_2019,IAC_FEMALE_ratio_2019,AAC_MALE_ratio_2019,AAC_FEMALE_ratio_2019,NAC_MALE_ratio_2019,NAC_FEMALE_ratio_2019,NH_MALE_ratio_2019,NH_FEMALE_ratio_2019,NHWA_MALE_ratio_2019,NHWA_FEMALE_ratio_2019,NHBA_MALE_ratio_2019,NHBA_FEMALE_ratio_2019,NHIA_MALE_ratio_2019,NHIA_FEMALE_ratio_2019,NHAA_MALE_ratio_2019,NHAA_FEMALE_ratio_2019,NHNA_MALE_ratio_2019,NHNA_FEMALE_ratio_2019,NHTOM_MALE_ratio_2019,NHTOM_FEMALE_ratio_2019,NHWAC_MALE_ratio_2019,NHWAC_FEMALE_ratio_2019,NHBAC_MALE_ratio_2019,NHBAC_FEMALE_ratio_2019,NHIAC_MALE_ratio_2019,NHIAC_FEMALE_ratio_2019,NHAAC_MALE_ratio_2019,NHAAC_FEMALE_ratio_2019,NHNAC_MALE_ratio_2019,NHNAC_FEMALE_ratio_2019,H_MALE_ratio_2019,H_FEMALE_ratio_2019,HWA_MALE_ratio_2019,HWA_FEMALE_ratio_2019,HBA_MALE_ratio_2019,HBA_FEMALE_ratio_2019,HIA_MALE_ratio_2019,HIA_FEMALE_ratio_2019,HAA_MALE_ratio_2019,HAA_FEMALE_ratio_2019,HNA_MALE_ratio_2019,HNA_FEMALE_ratio_2019,HTOM_MALE_ratio_2019,HTOM_FEMALE_ratio_2019,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield,28.652830,479458,457625,21833,4.6,82428.0,74608.0,90248.0,8.8,8.0,9.6,24161.0,20926.0,27396.0,11.3,9.8,12.8,16941.0,14642.0,19240.0,10.5,9.1,11.9,91170.0,88902.0,93438.0,949921,647725.0,71384.0,138454.0,132522.0,168292.0,137073.0,357185,68.187249,7.514730,14.575317,13.950844,17.716421,14.429937,37.601548,943038,459242,483796,366178,379174,54885,63641,2544,2231,25280,28261,545,460,9810,10029,374704,387799,60472,69753,4568,4489,28796,31590,1345,1122,365528,391893,286103,300823,47287,55487,590,606,24652,27687,152,158,6744,7132,291982,306898,50592,59290,1757,2001,27723,30702,621,595,93714,91903,80075,78351,7598,8154,1954,...,28804,31628,620,626,97438,96334,82818,81793,8199,8693,2148,1783,640,599,389,337,3244,3129,85628,84584,10644,11169,3010,2699,1130,940,728,563,48.700776,51.299224,38.498959,39.859350,5.985591,6.946971,0.292474,0.249753,2.788838,3.093397,0.057032,0.052050,1.077881,1.097705,39.438289,40.808750,6.614956,7.623509,0.502792,0.490707,3.173220,3.452443,0.142898,0.126043,38.371644,41.087125,29.719653,31.188701,5.116438,6.025450,0.064770,0.060742,2.720993,3.029898,0.015795,0.016325,0.733994,0.766008,30.361103,31.842236,5.486616,6.439514,0.183711,0.204594,3.053432,3.352796,0.065724,0.066361,10.329131,10.212099,8.779306,8.670648,0.869153,0.921521,0.227704,0.189011,0.067845,0.063498,0.041237,0.035724,0.343887,0.331697,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford,28.213759,477390,454205,23185,4.9,96200.0,88256.0,104144.0,11.0,10.1,11.9,28488.0,24981.0,31995.0,15.3,13.4,17.2,20592.0,17992.0,23192.0,14.8,12.9,16.7,70433.0,68420.0,72446.0,895388,625275.0,66723.0,169460.0,152999.0,130942.0,105151.0,368995,69.832855,7.451853,18.925873,17.087453,14.624051,11.743624,41.210626,893076,433833,459243,327876,347484,66613,71919,2453,2463,25325,25383,574,503,10992,11491,337633,357669,73775,79668,5198,5493,28242,28199,1042,917,355133,376784,265211,282304,56683,60761,804,838,24730,24727,122,146,7583,8008,271915,289320,61320,65813,2577,2817,27094,27116,431,465,78700,82459,62665,65180,9930,11158,1649,...,28907,28783,445,479,81461,85814,64744,67678,10311,11699,1691,1709,638,652,459,360,3618,3716,68010,71071,13007,14567,2704,2813,1228,1113,618,471,48.580720,51.419280,36.327883,38.455569,7.573341,8.251918,0.281142,0.287534,3.035482,3.026062,0.064931,0.057081,1.297941,1.341116,37.484524,39.647535,8.428318,9.160835,0.596936,0.629682,3.379424,3.352622,0.119208,0.106536,39.445454,41.795855,29.067308,30.865967,6.417037,6.939959,0.091509,0.095882,2.963935,2.952945,0.013457,0.016709,0.892208,0.924393,29.857691,31.677432,6.969677,7.527251,0.293702,0.314224,3.241713,3.227807,0.049904,0.053716,9.135267,9.623424,7.260575,7.589602,1.156305,1.311959,0.189634,0.191652,0.071547,0.073117,0.051474,0.040371,0.405733,0.416723,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield,15.083141,104051,99522,4529,4.4,12481.0,10381.0,14581.0,6.9,5.7,8.1,2642.0,1990.0,3294.0,7.9,5.9,9.9,1935.0,1456.0,2414.0,7.5,5.6,9.4,77968.0,74165.0,81771.0,182177,136414.0,10020.0,38651.0,37928.0,29259.0,20556.0,83455,74.879924,5.500145,21.216180,20.819313,16.060754,11.283532,45.809844,181667,89749,91918,83969,86459,2095,1760,305,271,1775,1890,66,58,1539,1480,85407,87822,2814,2478,802,749,2195,2286,129,128,83982,86448,79259,81946,1551,1279,154,143,1740,1855,29,36,1249,1189,80431,83043,2104,1822,543,510,2122,2214,73,92,5767,5470,4710,4513,544,481,151,...,2268,2312,78,88,6587,6213,5364,5123,627,547,182,149,43,45,37,24,334,325,5666,5420,820,753,302,261,97,92,55,41,49.515618,50.484382,46.003782,47.257573,1.331426,1.079115,0.185767,0.160259,1.055825,1.106287,0.039926,0.034935,0.898893,0.846212,46.839458,48.043342,1.757859,1.499448,0.463032,0.416452,1.311463,1.333089,0.073752,0.071534,45.862931,47.039089,43.029285,44.416718,0.983736,0.775787,0.084843,0.077634,1.031980,1.081333,0.019409,0.021627,0.713680,0.665990,43.697493,45.037791,1.303145,1.081887,0.295564,0.271720,1.257673,1.282073,0.043253,0.048799,3.652687,3.445293,2.974497,2.840856,0.347690,0.303328,0.100924,0.082625,0.023845,0.024954,0.020518,0.013309,0.185213,0.180222,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9009,Connecticut,New Haven,25.310385,457800,434645,23155,5.1,91639.0,82523.0,100755.0,11.0,9.9,12.1,26499.0,22681.0,30317.0,15.3,13.1,17.5,18873.0,16055.0,21691.0,14.7,12.5,16.9,66764.0,64782.0,68746.0,860435,598860.0,55565.0,187277.0,151361.0,110253.0,94404.0,346640,69.599679,6.457780,21.765386,17.591218,12.813635,10.971660,40.286599,857748,413560,444188,324268,344780,59423,66910,2274,2252,17193,19087,439,444,9963,10715,333028,354141,66049,74163,4776,5115,19721,21692,777,787,337182,365574,261087,280503,51122,57678,786,772,16767,18664,155,141,7265,7816,267416,287258,55681,62685,2527,2753,18923,20918,396,395,76378,78614,63181,64277,8301,9232,1488,...,19327,21289,400,392,80460,83156,66259,67663,8910,9981,1612,1631,465,452,311,327,2903,3102,68877,70440,11137,12414,2443,2544,830,813,417,408,48.211246,51.788754,37.473808,39.822429,7.131384,8.055038,0.282302,0.282653,2.056257,2.273746,0.054167,0.053816,1.213327,1.301072,38.544288,40.959126,7.940034,8.947104,0.582271,0.618655,2.358214,2.585764,0.095583,0.093594,38.798044,42.060141,29.722015,31.906378,6.088982,6.887338,0.093711,0.091839,2.001855,2.220865,0.017783,0.015560,0.873699,0.938161,30.486208,32.718188,6.637091,7.494762,0.296459,0.321027,2.261110,2.490649,0.046797,0.045861,9.413202,9.728613,7.751794,7.916051,1.042402,1.167700,0.188592,0.190814,0.054401,0.052881,0.036385,0.038256,0.339629,0.362910,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
4,9011,Connecticut,New London,20.155000,137470,131267,6203,4.5,22246.0,18575.0,25917.0,8.6,7.2,10.0,6399.0,5037.0,7761.0,12.3,9.7,14.9,4164.0,3176.0,5152.0,10.8,8.2,13.4,71721.0,69358.0,74084.0,269033,188610.0,15060.0,54039.0,56512.0,33647.0,29352.0,114825,70.106641,5.597826,20.086383,21.005602,12.506644,10.910186,42.680638,267419,134088,133331,112027,112253,9771,8631,1608,1498,5630,5968,228,171,4824,4810,116235,116428,12475,11459,3228,3181,7040,7257,480,397,119737,119594,101034,101842,8220,7002,1083,1016,5429,5804,100,102,3871,3828,104418,105155,10304,9169,2364,2318,6656,6941,279,267,14351,13737,10993,10411,1551,1629,525,...,6609,6981,299,259,15042,14401,11470,10897,1669,1706,568,519,190,166,122,69,1023,1044,12364,11807,2335,2427,940,933,371,326,208,120,50.151957,49.848043,41.731333,41.827485,3.726914,3.265763,0.628191,0.575402,2.085926,2.239391,0.083709,0.061085,1.895885,1.878917,43.388913,43.459801,4.791370,4.379237,1.259021,1.228102,2.631916,2.755217,0.191172,0.142908,44.480140,44.417924,37.406394,37.718604,3.097592,2.622490,0.414018,0.379705,2.014283,2.176798,0.037707,0.035067,1.510147,1.485261,38.726876,39.007790,3.910922,3.464100,0.904580,0.876300,2.492025,2.632293,0.112743,0.097660,5.671817,5.430118,4.324940,4.108881,0.629322,0.643274,0.214173,0.195697,0.071642,0.062593,0.046002,0.026018,0.385738,0.393656,4.662036,4.452011,0.880448,0.915138,0.354441,0.351802,0.139891,0.122923,0.078430,0.045248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81,42101,Pennsylvania,Philadelphia,36.242007,704948,661229,43719,6.2,388221.0,371900.0,404542.0,25.3,24.2,26.4,108051.0,99494.0,116608.0,31.9,29.4,34.4,72959.0,66331.0,79587.0,31.0,28.2,33.8,40193.0,38699.0,41687.0,1580863,1075853.0,158205.0,384070.0,228729.0,176732.0,128117.0,358940,68.054790,10.007509,24.294958,14.468616,11.179463,8.104244,22.705320,1580601,747877,832724,346275,360779,317694,378714,6533,6943,56698,62773,1124,1257,19553,22258,361685,377889,331566,394926,12804,14599,62299,68869,2190,2472,632785,715790,266442,281102,295125,354782,1573,1925,55314,61306,208,199,14123,16476,277508,293687,304678,366254,5534,7096,59876,66367,965,1056,115092,116934,79833,79677,22569,23932,4960,...,62117,68891,985,1067,119459,121966,82969,83479,23254,24484,5075,5301,1425,1521,979,1096,5757,6085,87559,88223,27861,29477,7479,7898,2515,2620,1315,1470,47.317469,52.682531,21.940970,22.901789,19.876028,23.676505,0.415135,0.448846,3.708562,4.111703,0.073608,0.080868,1.303167,1.462820,22.973125,24.031163,20.805346,24.746349,0.821242,0.942071,4.080138,4.514401,0.145196,0.160158,39.776171,44.982968,16.703239,17.631863,18.408031,22.130861,0.094756,0.114200,3.618604,4.015684,0.011805,0.011679,0.939735,1.078681,17.445634,18.461754,19.046516,22.885502,0.349102,0.443480,3.921369,4.349004,0.062182,0.067358,7.541299,7.699563,5.237730,5.269926,1.467996,1.545645,0.320378,0.334646,0.089958,0.096019,0.061803,0.069189,0.363432,0.384139,5.527491,5.569409,1.758830,1.860847,0.472140,0.498591,0.158769,0.165397,0.083014,0.092799
82,42125,Pennsylvania,Washington,35.712524,106359,100831,5528,5.2,18913.0,15932.0,21894.0,9.4,7.9,10.9,4870.0,3863.0,5877.0,12.2,9.7,14.7,3206.0,2466.0,3946.0,10.8,8.3,13.3,60332.0,57217.0,63447.0,207298,148873.0,9667.0,54955.0,37634.0,30381.0,16236.0,83705,71.815936,4.663335,26.510145,18.154541,14.655713,7.832203,40.379068,207075,101716,105359,95005,98956,3528,3165,206,174,1065,1169,33,38,1879,1857,96802,100710,4794,4440,600,579,1383,1476,91,89,99785,103755,93446,97653,3402,3070,121,130,1034,1138,29,35,1753,1729,95125,99287,4577,4253,488,507,1339,1431,80,80,1931,1604,1559,1303,126,95,85,...,1433,1580,77,80,2036,1720,1632,1394,140,105,114,58,28,31,5,3,117,129,1736,1519,224,195,138,87,45,43,12,8,49.196336,50.803664,45.797017,47.545017,1.767820,1.562855,0.118918,0.091364,0.551567,0.619728,0.013052,0.015469,0.947961,0.969231,46.703889,48.467841,2.407367,2.228507,0.309864,0.291978,0.714476,0.784570,0.043023,0.042540,48.212119,49.972204,45.008097,46.871148,1.700143,1.512097,0.063810,0.063326,0.538032,0.604742,0.010635,0.014019,0.891403,0.906872,45.864694,47.733546,2.299084,2.134242,0.243154,0.249921,0.692722,0.763783,0.037222,0.038673,0.984217,0.831460,0.788920,0.673869,0.067677,0.050758,0.055108,0.028038,0.013535,0.014986,0.002417,0.001450,0.056559,0.062360,0.839195,0.734295,0.108283,0.094264,0.066710,0.042056,0.021753,0.020787,0.005801,0.003867
83,42129,Pennsylvania,Westmoreland,40.471667,180797,171495,9302,5.1,34404.0,30495.0,38313.0,9.9,8.8,11.0,8063.0,6548.0,9578.0,12.6,10.2,15.0,5834.0,4691.0,6977.0,12.2,9.8,14.6,60308.0,58585.0,62031.0,352627,261007.0,14708.0,93759.0,72911.0,51308.0,28321.0,151120,74.017872,4.170980,26.588718,20.676522,14.550219,8.031433,42.855482,352006,172009,179997,162682,171193,4829,4162,255,220,1550,1764,46,40,2647,2618,165221,173677,6635,5954,781,761,2040,2245,125,125,169823,177988,160939,169594,4659,3991,180,160,1532,1744,41,34,2472,2465,163312,171937,6335,5674,670,664,2009,2209,108,112,2186,2009,1743,1599,170,171,75,...,2078,2273,119,110,2356,2128,1867,1695,196,181,75,68,20,25,5,8,193,151,2041,1836,340,287,117,101,45,41,12,15,48.947976,51.052024,46.208788,48.470188,1.390087,1.208946,0.077673,0.066495,0.461452,0.517915,0.014331,0.012038,0.795646,0.776442,46.965454,49.210803,1.938670,1.741478,0.229006,0.220981,0.608486,0.663229,0.037547,0.035827,48.272709,50.442105,45.673676,47.984374,1.333910,1.157068,0.056177,0.047005,0.455719,0.510750,0.012898,0.009745,0.740329,0.733163,46.380471,48.684576,1.841221,1.659219,0.195472,0.192033,0.595588,0.651478,0.034107,0.031528,0.675267,0.609919,0.535112,0.485814,0.056177,0.051877,0.021496,0.019490,0.005732,0.007165,0.001433,0.002293,0.055317,0.043279,0.584983,0.526227,0.097449,0.082259,0.033534,0.028948,0.012898,0.011751,0.003439,0.004299
84,42133,Pennsylvania,York,35.832845,234928,224744,10184,4.3,40966.0,36227.0,45705.0,9.4,8.3,10.5,12784.0,10798.0,14770.0,13.2,11.2,15.2,8237.0,6666.0,9808.0,11.4,9.2,13.6,63493.0,61485.0,65501.0,446078,310781.0,32160.0,119750.0,80357.0,50969.0,27545.0,194485,69.669654,7.209501,26.845081,18.014114,11.426029,6.174929,43.598877,445484,220149,225335,196044,200991,15296,14906,851,753,2984,3689,149,165,4825,4831,200597,205506,18522,18197,1885,1825,3992,4634,289,313,203176,209317,183250,188951,12627,12376,381,347,2849,3540,72,93,3997,4010,187031,192713,15253,15061,1176,1199,3759,4393,179,205,16973,16018,12794,12040,2669,2530,470,...,3833,4518,201,208,18539,17726,13926,13297,2898,2773,504,450,154,162,91,77,966,967,14824,14200,3621,3482,775,706,260,267,131,125,49.432145,50.567855,43.777864,44.899991,3.600648,3.453229,0.202646,0.176369,0.683653,0.840203,0.037634,0.039416,1.129698,1.158648,44.840978,45.992054,4.369369,4.244441,0.440478,0.428675,0.911464,1.065564,0.073933,0.074155,45.303725,46.620481,40.676705,41.938903,2.955298,2.835714,0.090411,0.076159,0.649359,0.804128,0.017370,0.022269,0.914581,0.943308,41.539846,42.829879,3.563014,3.469040,0.267894,0.271457,0.853565,1.006106,0.044760,0.046319,4.128420,3.947374,3.101158,2.961087,0.645351,0.617515,0.112235,0.100210,0.034294,0.036076,0.020265,0.017147,0.215117,0.215340,3.301133,3.162175,0.806355,0.775401,0.172583,0.157218,0.057899,0.059458,0.029172,0.027836


# save data


In [28]:
#save 2017-2019 dataset
# data_path = r'../../data/processed/alt_full_dem_df_2017_2019'
# df_2017_2019.to_csv(data_path, index=False)
#
# save datasets for individual years
# data_path = r'../../data/processed/alt_full_dem_df_2017'
# df_2017.to_csv(data_path, index=False)
# data_path = r'../../data/processed/alt_full_dem_df_2018'
# df_2018.to_csv(data_path, index=False)
# data_path = r'../../data/processed/alt_full_dem_df_2019'
# df_2019.to_csv(data_path, index=False)

In [33]:
display(df_2017.corr()['AQI_2017'].sort_values().tail(15))
display(df_2018.corr()['AQI_2018'].sort_values().tail(15))
display(df_2019.corr()['AQI_2019'].sort_values().tail(15))

df_2017.shape

90% CI LB 5-17 percent_2017                   0.129586
Poverty Percent, Age 5-17 in Families_2017    0.134345
90% CI UB percent 5-17 percent_2017           0.136621
BA_FEMALE_ratio_2017                          0.137848
BAC_FEMALE_ratio_2017                         0.140697
NHBA_FEMALE_ratio_2017                        0.146532
BA_MALE_ratio_2017                            0.149432
NHBAC_FEMALE_ratio_2017                       0.151226
BAC_MALE_ratio_2017                           0.151903
NHBA_MALE_ratio_2017                          0.158578
NHBAC_MALE_ratio_2017                         0.163254
Unemployment_rate_2017                        0.337539
HS_TOT_ratio_2017                             0.341866
GeoFIPS                                       0.460059
AQI_2017                                      1.000000
Name: AQI_2017, dtype: float64

NHBA_FEMALE_ratio_2018     0.260163
NHBAC_FEMALE_ratio_2018    0.264310
HAA_MALE_ratio_2018        0.266506
HAA_FEMALE_ratio_2018      0.270254
NHBA_MALE_ratio_2018       0.272856
BA_FEMALE_ratio_2018       0.273949
NHBAC_MALE_ratio_2018      0.276175
HAAC_FEMALE_ratio_2018     0.277638
Unemployment_rate_2018     0.277976
BAC_FEMALE_ratio_2018      0.279714
HAAC_MALE_ratio_2018       0.280254
BA_MALE_ratio_2018         0.286896
BAC_MALE_ratio_2018        0.291820
GeoFIPS                    0.369386
AQI_2018                   1.000000
Name: AQI_2018, dtype: float64

90% CI LB 5-17 percent_2019    0.185888
HAAC_MALE_ratio_2019           0.190961
HS_TOT_2019                    0.193945
Unemployed_2019                0.202138
NHBA_FEMALE_ratio_2019         0.221619
NHBAC_FEMALE_ratio_2019        0.225267
BA_FEMALE_ratio_2019           0.232023
NHBA_MALE_ratio_2019           0.232844
NHBAC_MALE_ratio_2019          0.236199
BAC_FEMALE_ratio_2019          0.236841
BA_MALE_ratio_2019             0.243260
BAC_MALE_ratio_2019            0.247715
Unemployment_rate_2019         0.331029
GeoFIPS                        0.365908
AQI_2019                       1.000000
Name: AQI_2019, dtype: float64

(86, 189)

In [30]:
# # see scatter plots
# df_columns = df_2019.columns[4:]
# for col in df_columns:
#     fig, ax = plt.subplots(figsize=(10,10))
#
#     plt.title(' 2019: mean AQI vs ' + col)
#     plt.xlabel(col)
#     plt.ylabel('Mean AQI')
#     ax.scatter(df_2019[col], df_2019['AQI_2019'], label=col)
#
# ax.legend(bbox_to_anchor=(1.05,1))
# plt.show()


In [31]:
# df_2019.sort_values(['AQI_2019'], ascending=False)
# df_2019.sort_values(['HNAC_FEMALE_ratio_2019'], ascending=False)

In [32]:
# df_2019['AQI_2019'].hist()
# df_2019['AQI_2019'].describe()

With the second largest pollutant PM2.5 we have smaller number of counties to work with only 89. This means that
we get less accurate measure with correlation. As it appears that GeoFIPS is our highest rated correlation from all 3 years which
means that using PM2.5 as our pollutant is not viable.