# Running processes in DataWrangling11 but with ozone aqi only

# Rerun DataWrangling8 processes with alernative datasets from DataWrangling 9

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import sys
import os
import matplotlib.pyplot as plt
import klib

from IPython.core.display import display

# pd.set_option('display.max_columns', None)
# pd.reset_optio('max_rows')
#np.set_printoptions(threshold=sys.maxsize)

plt.style.use('dark_background')
plt.rcParams.update({"grid.linewidth":0.5, "grid.alpha":0.5})
sns.set(style='ticks', context='talk')

# Load data and constants

In [2]:
MA, CT, ME, NY, NH, NJ, PA, VT, DE, MD, RI, DC  = [25, 9, 23, 36, 33, 34, 42, 50, 10, 24, 44, 11]
NORTH_EAST = [MA, CT, ME, NY, NH, NJ, PA, VT, DE, MD, RI, DC]

ozone_2017 = pd.read_csv('../../data/raw/daily_ozone_2017.csv')
ozone_2018 = pd.read_csv('../../data/raw/daily_ozone_2018.csv')
ozone_2019 = pd.read_csv('../../data/raw/daily_ozone_2019.csv')

NE_dem_ratio_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_dem_ratio_2017_2019.csv')

NE_unemploy_rate_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_unemploy_rate_2017_2019.csv')

NE_pov_ratio_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_pov_rate_2017_2019.csv')

NE_educ_vehicle_2017_2019 = pd.read_csv('../../data/processed/alt_clean_NE_edu_vehicle_2017_2019.csv')

In [3]:

def remove_col(df, col_name):
    '''Returns a dataframe with removed column or columns from old dataframe'''
    new_df = df.copy()

    if(type(col_name) != str and len(col_name) > 1):
        for index in col_name:
            new_df = new_df.drop(str(index), axis=1)
    else:
        new_df = new_df.drop(str(col_name), axis=1)
    return new_df

def get_NE_States(df, col):
    '''Method used to return North Eastern US states'''
    NE_States = (df[col] == MA) | \
                (df[col] == CT) | \
                (df[col] == ME) | \
                (df[col] == NY) | \
                (df[col] == NH) | \
                (df[col] == NJ) | \
                (df[col] == PA) | \
                (df[col] == VT) | \
                (df[col] == DE) | \
                (df[col] == MD) | \
                (df[col] == RI) | \
                (df[col] == DC)
    return NE_States
def get_df_with_geofips(df, state_code, county_code):
    ''' Returns dataframe with geofips column '''
    state_code = df[state_code].astype(str).values
    county_code = df[county_code].astype(str).values

    geofips = []
    for stateID, countyID in zip(state_code, county_code):
        id = ''
        stateLen = len(stateID)
        countyLen = len(countyID)
        if(stateLen < 2):
            id += '0' + stateID
        else:
            id += stateID
        if(countyLen == 1):
            id += '00' + countyID
        elif(countyLen == 2):
            id += '0' + countyID
        else:
            id += countyID
        geofips.append(id)

    df.insert(loc=0, column='GeoFIPS', value=geofips)
    return df

## Retrieve NE ozone data

In [4]:
print(ozone_2017.shape)
NE_States_17 = get_NE_States(ozone_2017, 'State Code')
NE_States_18 = get_NE_States(ozone_2018, 'State Code')
NE_States_19 = get_NE_States(ozone_2019, 'State Code')

NE_aqi_2017 = ozone_2017.loc[NE_States_17]
NE_aqi_2018 = ozone_2018.loc[NE_States_18]
NE_aqi_2019 = ozone_2019.loc[NE_States_19]

(405603, 29)


In [5]:
# add GeoFIPS for NE aqi counties
NE_aqi_2017 = get_df_with_geofips(NE_aqi_2017, 'State Code', 'County Code')
NE_aqi_2018 = get_df_with_geofips(NE_aqi_2018, 'State Code', 'County Code')
NE_aqi_2019 = get_df_with_geofips(NE_aqi_2019, 'State Code', 'County Code')

In [6]:
# Remove unnecessary columns

NE_aqi_2017.columns
cols_to_remove = ['State Code', 'County Code', 'Site Num', 'Parameter Code',
       'POC', 'Latitude', 'Longitude', 'Datum', 'Parameter Name',
       'Sample Duration', 'Pollutant Standard', 'Date Local',
       'Units of Measure', 'Event Type', 'Observation Count',
       'Observation Percent', 'Arithmetic Mean', '1st Max Value',
       '1st Max Hour', 'Method Code', 'Method Name', 'Local Site Name',
       'Address', 'CBSA Name', 'Date of Last Change']

NE_aqi_2017 = remove_col(NE_aqi_2017, cols_to_remove)
NE_aqi_2018 = remove_col(NE_aqi_2018, cols_to_remove)
NE_aqi_2019 = remove_col(NE_aqi_2019, cols_to_remove)

In [7]:
NE_aqi_2017.rename(columns={'AQI':'AQI_2017'}, inplace=True)
NE_aqi_2018.rename(columns={'AQI':'AQI_2018'}, inplace=True)
NE_aqi_2019.rename(columns={'AQI':'AQI_2019'}, inplace=True)

In [8]:
print(NE_aqi_2017.columns)
average_NE_aqi_2017 = NE_aqi_2017.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2017']].mean()
average_NE_aqi_2018 = NE_aqi_2018.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2018']].mean()
average_NE_aqi_2019 = NE_aqi_2019.groupby(['GeoFIPS', 'State Name', 'County Name'], as_index=False)[['AQI_2019']].mean()

display(average_NE_aqi_2017)
display(average_NE_aqi_2018)
display(average_NE_aqi_2019)


Index(['GeoFIPS', 'AQI_2017', 'State Name', 'County Name', 'City Name'], dtype='object')


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017
0,09001,Connecticut,Fairfield,45.135453
1,09003,Connecticut,Hartford,34.949296
2,09005,Connecticut,Litchfield,38.488827
3,09007,Connecticut,Middlesex,43.651163
4,09009,Connecticut,New Haven,38.968085
...,...,...,...,...
134,44007,Rhode Island,Providence,38.049587
135,44009,Rhode Island,Washington,40.418605
136,50003,Vermont,Bennington,34.801120
137,50007,Vermont,Chittenden,34.774011


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2018
0,09001,Connecticut,Fairfield,47.718225
1,09003,Connecticut,Hartford,34.149856
2,09005,Connecticut,Litchfield,37.975069
3,09007,Connecticut,Middlesex,43.464455
4,09009,Connecticut,New Haven,38.446957
...,...,...,...,...
135,44007,Rhode Island,Providence,39.642458
136,44009,Rhode Island,Washington,42.051643
137,50003,Vermont,Bennington,34.044444
138,50007,Vermont,Chittenden,35.517906


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2019
0,09001,Connecticut,Fairfield,46.871944
1,09003,Connecticut,Hartford,36.150685
2,09005,Connecticut,Litchfield,37.840220
3,09007,Connecticut,Middlesex,44.316038
4,09009,Connecticut,New Haven,41.133913
...,...,...,...,...
135,44007,Rhode Island,Providence,39.906336
136,44009,Rhode Island,Washington,44.046729
137,50003,Vermont,Bennington,34.277008
138,50007,Vermont,Chittenden,34.721763


In [9]:
# Merge avg northeast ozone data

avg_NE_aqi_2017_2018 = pd.merge(average_NE_aqi_2017, average_NE_aqi_2018, how='inner')
avg_NE_aqi_2017_2019 = pd.merge(avg_NE_aqi_2017_2018, average_NE_aqi_2019, how='inner')
avg_NE_aqi_2017_2019

avg_NE_aqi_2017_2019['GeoFIPS'] = pd.to_numeric(avg_NE_aqi_2017_2019['GeoFIPS'])
avg_NE_aqi_2017_2019.dtypes

GeoFIPS          int64
State Name      object
County Name     object
AQI_2017       float64
AQI_2018       float64
AQI_2019       float64
dtype: object

* Have 3 datasets for individuals years 2017, 2018, 2019

In [10]:
display(avg_NE_aqi_2017_2019)
display(NE_unemploy_rate_2017_2019)
display(NE_pov_ratio_2017_2019)
display(NE_educ_vehicle_2017_2019)
display(NE_dem_ratio_2017_2019)


Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,AQI_2018,AQI_2019
0,9001,Connecticut,Fairfield,45.135453,47.718225,46.871944
1,9003,Connecticut,Hartford,34.949296,34.149856,36.150685
2,9005,Connecticut,Litchfield,38.488827,37.975069,37.840220
3,9007,Connecticut,Middlesex,43.651163,43.464455,44.316038
4,9009,Connecticut,New Haven,38.968085,38.446957,41.133913
...,...,...,...,...,...,...
133,44007,Rhode Island,Providence,38.049587,39.642458,39.906336
134,44009,Rhode Island,Washington,40.418605,42.051643,44.046729
135,50003,Vermont,Bennington,34.801120,34.044444,34.277008
136,50007,Vermont,Chittenden,34.774011,35.517906,34.721763


Unnamed: 0,GeoFIPS,Stabr,area_name,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,Civilian_labor_force_2018,Employed_2018,Unemployed_2018,Unemployment_rate_2018,Civilian_labor_force_2019,Employed_2019,Unemployed_2019,Unemployment_rate_2019
0,9001,CT,"Fairfield County, CT",479458,457625,21833,4.6,479179,460045,19134,4.0,481023,463547,17476,3.6
1,9003,CT,"Hartford County, CT",477390,454205,23185,4.9,478660,458138,20522,4.3,483303,464533,18770,3.9
2,9005,CT,"Litchfield County, CT",104051,99522,4529,4.4,104604,100568,4036,3.9,105182,101543,3639,3.5
3,9007,CT,"Middlesex County, CT",92583,88817,3766,4.1,92811,89517,3294,3.5,93696,90703,2993,3.2
4,9009,CT,"New Haven County, CT",457800,434645,23155,5.1,458799,438485,20314,4.4,461613,443514,18099,3.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,50019,VT,"Orleans County, VT",13601,12920,681,5.0,13501,12924,577,4.3,13275,12713,562,4.2
240,50021,VT,"Rutland County, VT",31096,30030,1066,3.4,30752,29828,924,3.0,30446,29608,838,2.8
241,50023,VT,"Washington County, VT",34260,33273,987,2.9,34512,33658,854,2.5,34360,33586,774,2.3
242,50025,VT,"Windham County, VT",22823,22132,691,3.0,22470,21856,614,2.7,21977,21416,561,2.6


Unnamed: 0,GeoFIPS,Postal Code,Name,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,90% CI UB All Ages_2017,"Poverty Percent, All Ages_2017",90% CI LB percent_2017,90% CI UB percent_2017,"Poverty Estimate, Age 0-17_2017",...,90% CI UB 0-17 percent_2019,"Poverty Estimate, Age 5-17 in Families_2019",90% CI LB 5-17 fam_2019,90% CI UB 5-17 fam_2019,"Poverty Percent, Age 5-17 in Families_2019",90% CI LB 5-17 percent_2019,90% CI UB percent 5-17 percent_2019,Median Household Income_2019,90% CI Lower Bound LB medh inc_2019,90% CI UB medh inc_2019
0,9001,CT,Fairfield County,82428.0,74608.0,90248.0,8.8,8.0,9.6,24161.0,...,13.3,18562.0,16078.0,21046.0,11.9,10.3,13.5,96966.0,93161.0,100771.0
1,9003,CT,Hartford County,96200.0,88256.0,104144.0,11.0,10.1,11.9,28488.0,...,16.0,17852.0,15166.0,20538.0,13.1,11.1,15.1,75336.0,73281.0,77391.0
2,9005,CT,Litchfield County,12481.0,10381.0,14581.0,6.9,5.7,8.1,2642.0,...,11.6,2077.0,1591.0,2563.0,8.5,6.5,10.5,81015.0,77226.0,84804.0
3,9007,CT,Middlesex County,10796.0,8906.0,12686.0,6.8,5.6,8.0,2169.0,...,9.1,1388.0,981.0,1795.0,6.6,4.7,8.5,81721.0,74613.0,88829.0
4,9009,CT,New Haven County,91639.0,82523.0,100755.0,11.0,9.9,12.1,26499.0,...,20.5,22220.0,19581.0,24859.0,17.8,15.7,19.9,69687.0,66999.0,72375.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,50019,VT,Orleans County,3962.0,3189.0,4735.0,15.2,12.2,18.2,1007.0,...,23.3,643.0,427.0,859.0,16.7,11.1,22.3,48826.0,43855.0,53797.0
239,50021,VT,Rutland County,6106.0,4691.0,7521.0,10.8,8.3,13.3,1518.0,...,16.4,849.0,480.0,1218.0,11.4,6.5,16.3,51903.0,49484.0,54322.0
240,50023,VT,Washington County,5252.0,4184.0,6320.0,9.4,7.5,11.3,1114.0,...,12.7,636.0,373.0,899.0,8.0,4.7,11.3,65879.0,61501.0,70257.0
241,50025,VT,Windham County,4827.0,3819.0,5835.0,11.6,9.2,14.0,1138.0,...,17.9,664.0,410.0,918.0,12.5,7.7,17.3,52068.0,47230.0,56906.0


Unnamed: 0,GeoFIPS,NAME,state,county,POP_2017,EDU_TOT_2017,LESS_HS_TOT_2017,HS_TOT_2017,COL_OR_ASSOC_TOT_2017,BACH_TOT_2017,...,BACH_TOT_2019,GRAD_TOT_2019,VEHICLE_TOT_2019,EDU_TOT_ratio_2019,LESS_HS_TOT_ratio_2019,HS_TOT_ratio_2019,COL_OR_ASSOC_TOT_ratio_2019,BACH_TOT_ratio_2019,GRAD_TOT_ratio_2019,VEHICLE_TOT_ratio_2019
0,36089,"St. Lawrence County, New York",36,89,109623,72555.0,9164.0,25858.0,20091.0,8279.0,...,8684.0,7733.0,33650,67.044737,8.564136,23.762762,19.480230,8.060145,7.177464,31.232597
1,36091,"Saratoga County, New York",36,91,229869,164414.0,10656.0,39033.0,45561.0,37886.0,...,40148.0,30363.0,104085,72.095118,4.492676,17.754924,19.172290,17.466056,13.209172,45.281320
2,36093,"Schenectady County, New York",36,93,155565,107453.0,10085.0,32144.0,35263.0,17210.0,...,22627.0,15566.0,59970,70.183324,5.047682,20.198456,20.343982,14.569959,10.023245,38.615831
3,36101,"Steuben County, New York",36,101,96281,68011.0,6510.0,25920.0,20685.0,7451.0,...,8066.0,8475.0,35505,71.044989,6.093585,25.791841,21.817171,8.456788,8.885604,37.225175
4,36103,"Suffolk County, New York",36,103,1492953,1036946.0,102621.0,277080.0,279213.0,208064.0,...,214035.0,182346.0,629360,70.148605,6.397598,18.389192,18.517663,14.495114,12.349037,42.622211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,36079,"Putnam County, New York",36,79,99323,71447.0,5090.0,20597.0,18095.0,14929.0,...,15240.0,12261.0,42420,73.443857,5.721115,19.355167,20.396664,15.500407,12.470504,43.144833
153,36081,"Queens County, New York",36,81,2358582,1695302.0,304984.0,493794.0,372885.0,335990.0,...,334272.0,207624.0,387870,72.484558,12.328372,19.812340,16.300805,14.831103,9.211938,17.209159
154,36083,"Rensselaer County, New York",36,83,159722,111047.0,9095.0,31730.0,34954.0,20143.0,...,20896.0,18203.0,65615,70.327759,5.902441,19.919478,19.870963,13.165820,11.469058,41.341659
155,36085,"Richmond County, New York",36,85,479458,334079.0,39031.0,100730.0,83679.0,66108.0,...,71914.0,48875.0,128740,70.041143,7.701258,20.426427,16.545240,15.103446,10.264773,27.038096


Unnamed: 0,GeoFIPS,STNAME,CTYNAME,TOT_POP_2017,TOT_MALE_2017,TOT_FEMALE_2017,WA_MALE_2017,WA_FEMALE_2017,BA_MALE_2017,BA_FEMALE_2017,...,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield County,943038,459242,483796,366178,379174,54885,63641,...,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford County,893076,433833,459243,327876,347484,66613,71919,...,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield County,181667,89749,91918,83969,86459,2095,1760,...,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9007,Connecticut,Middlesex County,162942,79376,83566,70949,74431,4283,4437,...,2.782019,2.829422,0.419858,0.471570,0.088035,0.112044,0.056022,0.052944,0.029550,0.032013
4,9009,Connecticut,New Haven County,857748,413560,444188,324268,344780,59423,66910,...,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240,50019,Vermont,Orleans County,26811,13459,13352,12970,12884,128,83,...,0.828494,0.702741,0.073973,0.070274,0.059178,0.051781,0.022192,0.018493,0.011096,0.007397
241,50021,Vermont,Rutland County,59006,29118,29888,28147,28858,245,148,...,0.740664,0.704576,0.089361,0.044680,0.054991,0.049836,0.024059,0.015466,0.006874,0.005155
242,50023,Vermont,Washington County,58253,28808,29445,27604,28267,334,218,...,0.919379,0.910819,0.089027,0.051362,0.068483,0.053074,0.018833,0.020545,0.006848,0.000000
243,50025,Vermont,Windham County,42851,20998,21853,19944,20751,355,247,...,1.162901,1.025532,0.118422,0.146843,0.073421,0.066316,0.033158,0.028421,0.011842,0.004737


## Try make one df of 2017
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [11]:
avg_NE_aqi_2017 = avg_NE_aqi_2017_2019.iloc[:,:4]
NE_unemploy_rate_2017 = NE_unemploy_rate_2017_2019.iloc[:, [0, 3, 4, 5, 6]]

pov_col_2017 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[3:24])
NE_pov_ratio_2017 = NE_pov_ratio_2017_2019.loc[:, pov_col_2017]

educ_vehicle_2017_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[4:19])
NE_educ_vehicle_2017 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2017_col]

dem_ratio_2017_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[3:148])
NE_dem_ratio_2017 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2017_col]


In [12]:
NE_avg_aqi_unemploy_2017 = pd.merge(avg_NE_aqi_2017, NE_unemploy_rate_2017, how='inner')
NE_avg_aqi_unemploy_pov_2017 = pd.merge(NE_avg_aqi_unemploy_2017, NE_pov_ratio_2017, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2017 = pd.merge(NE_avg_aqi_unemploy_pov_2017, NE_educ_vehicle_2017, how='inner')
df_2017 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2017, NE_dem_ratio_2017, how='inner')

df_2017

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,...,HWAC_MALE_ratio_2017,HWAC_FEMALE_ratio_2017,HBAC_MALE_ratio_2017,HBAC_FEMALE_ratio_2017,HIAC_MALE_ratio_2017,HIAC_FEMALE_ratio_2017,HAAC_MALE_ratio_2017,HAAC_FEMALE_ratio_2017,HNAC_MALE_ratio_2017,HNAC_FEMALE_ratio_2017
0,9001,Connecticut,Fairfield,45.135453,479458,457625,21833,4.6,82428.0,74608.0,...,8.771863,8.578764,1.047678,1.109499,0.298079,0.263828,0.113781,0.094164,0.076773,0.055883
1,9003,Connecticut,Hartford,34.949296,477390,454205,23185,4.9,96200.0,88256.0,...,7.358612,7.653212,1.394618,1.551380,0.293480,0.299639,0.128544,0.121266,0.068415,0.050612
2,9005,Connecticut,Litchfield,38.488827,104051,99522,4529,4.4,12481.0,10381.0,...,2.739078,2.630637,0.390825,0.361100,0.142569,0.131559,0.040183,0.039633,0.030826,0.019816
3,9007,Connecticut,Middlesex,43.651163,92583,88817,3766,4.1,10796.0,8906.0,...,2.658001,2.714463,0.386641,0.427146,0.088375,0.102490,0.055848,0.054007,0.025162,0.025162
4,9009,Connecticut,New Haven,38.968085,457800,434645,23155,5.1,91639.0,82523.0,...,7.649333,7.797512,1.208747,1.338155,0.262198,0.275372,0.093034,0.090236,0.044419,0.045701
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,42133,Pennsylvania,York,35.621813,234928,224744,10184,4.3,40966.0,36227.0,...,3.045227,2.871708,0.733809,0.703953,0.159153,0.140521,0.052303,0.054098,0.024692,0.024243
113,44003,Rhode Island,Kent,39.743842,90048,86403,3645,4.0,13853.0,11035.0,...,2.093639,2.117486,0.344863,0.296558,0.124738,0.116789,0.058089,0.066038,0.036688,0.017732
114,44007,Rhode Island,Providence,38.049587,323972,308660,15312,4.7,90056.0,83306.0,...,9.005251,8.808762,2.077334,2.185199,0.602400,0.607920,0.165739,0.158169,0.115434,0.117641
115,44009,Rhode Island,Washington,40.418605,68725,66001,2724,4.0,10592.0,8668.0,...,1.298352,1.418687,0.184461,0.183669,0.127460,0.135377,0.041959,0.041959,0.015834,0.016625


* df_2017 contains data on NorthEastern counties of the US
* The columns of df_2017 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios


## Try make one df of 2018
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [13]:
avg_NE_aqi_2018 = avg_NE_aqi_2017_2019.iloc[:, [0, 1, 2, 4]]

unemploy_col_2018 = list(NE_unemploy_rate_2017_2019.columns[0:1]) + list(NE_unemploy_rate_2017_2019.columns[7:11])
NE_unemploy_rate_2018 = NE_unemploy_rate_2017_2019.loc[:, unemploy_col_2018]

pov_col_2018 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[24:45])
NE_pov_ratio_2018 = NE_pov_ratio_2017_2019.loc[:, pov_col_2018]

educ_vehicle_2018_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[19:34])
NE_educ_vehicle_2018 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2018_col]

dem_ratio_2018_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[149:293])
NE_dem_ratio_2018 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2018_col]

In [14]:
NE_avg_aqi_unemploy_2018 = pd.merge(avg_NE_aqi_2018, NE_unemploy_rate_2018, how='inner')
NE_avg_aqi_unemploy_pov_2018 = pd.merge(NE_avg_aqi_unemploy_2018, NE_pov_ratio_2018, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2018 = pd.merge(NE_avg_aqi_unemploy_pov_2018, NE_educ_vehicle_2018, how='inner')
df_2018 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2018, NE_dem_ratio_2018, how='inner')

df_2018

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2018,Civilian_labor_force_2018,Employed_2018,Unemployed_2018,Unemployment_rate_2018,"Poverty Estimate, All Ages_2018",90% CI LB All Ages_2018,...,HWAC_MALE_ratio_2018,HWAC_FEMALE_ratio_2018,HBAC_MALE_ratio_2018,HBAC_FEMALE_ratio_2018,HIAC_MALE_ratio_2018,HIAC_FEMALE_ratio_2018,HAAC_MALE_ratio_2018,HAAC_FEMALE_ratio_2018,HNAC_MALE_ratio_2018,HNAC_FEMALE_ratio_2018
0,9001,Connecticut,Fairfield,47.718225,479179,460045,19134,4.0,92971.0,86234.0,...,8.943071,8.786817,1.085838,1.145798,0.307001,0.280199,0.118542,0.098944,0.075426,0.059536
1,9003,Connecticut,Hartford,34.149856,478660,458138,20522,4.3,96957.0,89308.0,...,7.534003,7.856327,1.422730,1.596496,0.301149,0.312801,0.132425,0.121446,0.070806,0.051872
2,9005,Connecticut,Litchfield,37.975069,104604,100568,4036,3.9,12441.0,10517.0,...,2.973577,2.829454,0.425191,0.387642,0.155719,0.137497,0.043071,0.046937,0.025401,0.023744
3,9007,Connecticut,Middlesex,43.464455,92811,89517,3294,3.5,10556.0,8754.0,...,2.716891,2.783815,0.400319,0.444526,0.098852,0.107448,0.049733,0.054031,0.024559,0.025173
4,9009,Connecticut,New Haven,38.446957,458799,438485,20314,4.4,96563.0,88623.0,...,7.905402,8.066201,1.249050,1.394563,0.274688,0.284257,0.096736,0.095686,0.047143,0.046909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,42133,Pennsylvania,York,33.888736,234583,225632,8951,3.8,38582.0,33740.0,...,3.207792,3.067565,0.766780,0.733956,0.162556,0.150944,0.055823,0.056046,0.026348,0.026795
113,44003,Rhode Island,Kent,41.521327,90521,87254,3267,3.6,12620.0,10058.0,...,2.246225,2.240130,0.368174,0.324285,0.145075,0.134103,0.065832,0.069490,0.048155,0.022554
114,44007,Rhode Island,Providence,39.642458,324904,310929,13975,4.3,98431.0,92071.0,...,9.290167,9.067074,2.132653,2.241296,0.609621,0.622652,0.172854,0.159195,0.118533,0.121359
115,44009,Rhode Island,Washington,42.051643,69017,66601,2416,3.5,9507.0,7499.0,...,1.364029,1.474262,0.187951,0.173676,0.132438,0.119749,0.038066,0.042031,0.013482,0.019033


* df_2018 contains data on NorthEastern counties of the US
* The columns of df_2018 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios

## Try make one df of 2019
* Cleaning for datasets were done in Data_Wrangling4 and Data_Wrangling7 files
* So only transforming datasets into one will work

In [15]:
avg_NE_aqi_2019 = avg_NE_aqi_2017_2019.iloc[:,[0, 1, 2, 5]]

unemploy_col_2019 = list(NE_unemploy_rate_2017_2019.columns[0:1]) + list(NE_unemploy_rate_2017_2019.columns[11:])
NE_unemploy_rate_2019 = NE_unemploy_rate_2017_2019.loc[:, unemploy_col_2019]

pov_col_2019 = list(NE_pov_ratio_2017_2019.columns[0:1]) + list(NE_pov_ratio_2017_2019.columns[45:])
NE_pov_ratio_2019 = NE_pov_ratio_2017_2019.loc[:, pov_col_2019]

educ_vehicle_2019_col = list(NE_educ_vehicle_2017_2019.columns[0:1]) + list(NE_educ_vehicle_2017_2019.columns[34:])
NE_educ_vehicle_2019 = NE_educ_vehicle_2017_2019.loc[:, educ_vehicle_2019_col]

dem_ratio_2019_col = list(NE_dem_ratio_2017_2019.columns[0:1]) + list(NE_dem_ratio_2017_2019.columns[293:])
NE_dem_ratio_2019 = NE_dem_ratio_2017_2019.loc[:, dem_ratio_2019_col]

In [16]:
NE_avg_aqi_unemploy_2019 = pd.merge(avg_NE_aqi_2019, NE_unemploy_rate_2019, how='inner')
NE_avg_aqi_unemploy_pov_2019 = pd.merge(NE_avg_aqi_unemploy_2019, NE_pov_ratio_2019, how='inner')
NE_avg_aqi_unemploy_pov_educ_veh_2019 = pd.merge(NE_avg_aqi_unemploy_pov_2019, NE_educ_vehicle_2019, how='inner')
df_2019 = pd.merge(NE_avg_aqi_unemploy_pov_educ_veh_2019, NE_dem_ratio_2019, how='inner')

df_2019

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2019,Civilian_labor_force_2019,Employed_2019,Unemployed_2019,Unemployment_rate_2019,"Poverty Estimate, All Ages_2019",90% CI LB All Ages_2019,...,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield,46.871944,481023,463547,17476,3.6,83047.0,75296.0,...,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford,36.150685,483303,464533,18770,3.9,93694.0,86070.0,...,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield,37.840220,105182,101543,3639,3.5,12741.0,10601.0,...,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9007,Connecticut,Middlesex,44.316038,93696,90703,2993,3.2,11024.0,9322.0,...,2.782019,2.829422,0.419858,0.471570,0.088035,0.112044,0.056022,0.052944,0.029550,0.032013
4,9009,Connecticut,New Haven,41.133913,461613,443514,18099,3.9,99423.0,91433.0,...,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,42133,Pennsylvania,York,34.520167,236377,227297,9080,3.8,40477.0,35485.0,...,3.301133,3.162175,0.806355,0.775401,0.172583,0.157218,0.057899,0.059458,0.029172,0.027836
113,44003,Rhode Island,Kent,41.431280,90729,87815,2914,3.2,13152.0,10490.0,...,2.382344,2.328172,0.396246,0.375551,0.146690,0.139995,0.071215,0.078519,0.057215,0.029825
114,44007,Rhode Island,Providence,39.906336,325490,312930,12560,3.9,85644.0,79094.0,...,9.502122,9.250608,2.183804,2.288510,0.617907,0.629645,0.172319,0.165589,0.120357,0.120357
115,44009,Rhode Island,Washington,44.046729,69050,66892,2158,3.1,9524.0,7463.0,...,1.399142,1.528942,0.194303,0.179969,0.129801,0.131393,0.049372,0.043002,0.018315,0.019908


* df_2019 contains data on NorthEastern counties of the US
* The columns of df_2019 go from average aqi, unemployment rate, poverty rate, education attainment ratio, total vehicle ratio, demographic ratios

# Merge dataframe into df 2017-2019

In [17]:
pd.set_option('display.max_columns', 200)

df_2017_2018 = pd.merge(df_2017, df_2018, how='inner')
df_2017_2019 = pd.merge(df_2017_2018, df_2019, how='inner')

df_2017_2019

Unnamed: 0,GeoFIPS,State Name,County Name,AQI_2017,Civilian_labor_force_2017,Employed_2017,Unemployed_2017,Unemployment_rate_2017,"Poverty Estimate, All Ages_2017",90% CI LB All Ages_2017,90% CI UB All Ages_2017,"Poverty Percent, All Ages_2017",90% CI LB percent_2017,90% CI UB percent_2017,"Poverty Estimate, Age 0-17_2017",90% CI LB percent 0-17_2017,90% CI UB 0-17_2017,"Poverty Percent, Age 0-17_2017",90% CI LB percent 0-17 percent_2017,90% CI UB 0-17 percent_2017,"Poverty Estimate, Age 5-17 in Families_2017",90% CI LB 5-17 fam_2017,90% CI UB 5-17 fam_2017,"Poverty Percent, Age 5-17 in Families_2017",90% CI LB 5-17 percent_2017,90% CI UB percent 5-17 percent_2017,Median Household Income_2017,90% CI Lower Bound LB medh inc_2017,90% CI UB medh inc_2017,POP_2017,EDU_TOT_2017,LESS_HS_TOT_2017,HS_TOT_2017,COL_OR_ASSOC_TOT_2017,BACH_TOT_2017,GRAD_TOT_2017,VEHICLE_TOT_2017,EDU_TOT_ratio_2017,LESS_HS_TOT_ratio_2017,HS_TOT_ratio_2017,COL_OR_ASSOC_TOT_ratio_2017,BACH_TOT_ratio_2017,GRAD_TOT_ratio_2017,VEHICLE_TOT_ratio_2017,TOT_POP_2017,TOT_MALE_2017,TOT_FEMALE_2017,WA_MALE_2017,WA_FEMALE_2017,BA_MALE_2017,BA_FEMALE_2017,IA_MALE_2017,IA_FEMALE_2017,AA_MALE_2017,AA_FEMALE_2017,NA_MALE_2017,NA_FEMALE_2017,TOM_MALE_2017,TOM_FEMALE_2017,WAC_MALE_2017,WAC_FEMALE_2017,BAC_MALE_2017,BAC_FEMALE_2017,IAC_MALE_2017,IAC_FEMALE_2017,AAC_MALE_2017,AAC_FEMALE_2017,NAC_MALE_2017,NAC_FEMALE_2017,NH_MALE_2017,NH_FEMALE_2017,NHWA_MALE_2017,NHWA_FEMALE_2017,NHBA_MALE_2017,NHBA_FEMALE_2017,NHIA_MALE_2017,NHIA_FEMALE_2017,NHAA_MALE_2017,NHAA_FEMALE_2017,NHNA_MALE_2017,NHNA_FEMALE_2017,NHTOM_MALE_2017,NHTOM_FEMALE_2017,NHWAC_MALE_2017,NHWAC_FEMALE_2017,NHBAC_MALE_2017,NHBAC_FEMALE_2017,NHIAC_MALE_2017,NHIAC_FEMALE_2017,NHAAC_MALE_2017,NHAAC_FEMALE_2017,NHNAC_MALE_2017,NHNAC_FEMALE_2017,H_MALE_2017,H_FEMALE_2017,HWA_MALE_2017,HWA_FEMALE_2017,HBA_MALE_2017,HBA_FEMALE_2017,HIA_MALE_2017,...,NHAAC_MALE_2019,NHAAC_FEMALE_2019,NHNAC_MALE_2019,NHNAC_FEMALE_2019,H_MALE_2019,H_FEMALE_2019,HWA_MALE_2019,HWA_FEMALE_2019,HBA_MALE_2019,HBA_FEMALE_2019,HIA_MALE_2019,HIA_FEMALE_2019,HAA_MALE_2019,HAA_FEMALE_2019,HNA_MALE_2019,HNA_FEMALE_2019,HTOM_MALE_2019,HTOM_FEMALE_2019,HWAC_MALE_2019,HWAC_FEMALE_2019,HBAC_MALE_2019,HBAC_FEMALE_2019,HIAC_MALE_2019,HIAC_FEMALE_2019,HAAC_MALE_2019,HAAC_FEMALE_2019,HNAC_MALE_2019,HNAC_FEMALE_2019,TOT_MALE_ratio_2019,TOT_FEMALE_ratio_2019,WA_MALE_ratio_2019,WA_FEMALE_ratio_2019,BA_MALE_ratio_2019,BA_FEMALE_ratio_2019,IA_MALE_ratio_2019,IA_FEMALE_ratio_2019,AA_MALE_ratio_2019,AA_FEMALE_ratio_2019,NA_MALE_ratio_2019,NA_FEMALE_ratio_2019,TOM_MALE_ratio_2019,TOM_FEMALE_ratio_2019,WAC_MALE_ratio_2019,WAC_FEMALE_ratio_2019,BAC_MALE_ratio_2019,BAC_FEMALE_ratio_2019,IAC_MALE_ratio_2019,IAC_FEMALE_ratio_2019,AAC_MALE_ratio_2019,AAC_FEMALE_ratio_2019,NAC_MALE_ratio_2019,NAC_FEMALE_ratio_2019,NH_MALE_ratio_2019,NH_FEMALE_ratio_2019,NHWA_MALE_ratio_2019,NHWA_FEMALE_ratio_2019,NHBA_MALE_ratio_2019,NHBA_FEMALE_ratio_2019,NHIA_MALE_ratio_2019,NHIA_FEMALE_ratio_2019,NHAA_MALE_ratio_2019,NHAA_FEMALE_ratio_2019,NHNA_MALE_ratio_2019,NHNA_FEMALE_ratio_2019,NHTOM_MALE_ratio_2019,NHTOM_FEMALE_ratio_2019,NHWAC_MALE_ratio_2019,NHWAC_FEMALE_ratio_2019,NHBAC_MALE_ratio_2019,NHBAC_FEMALE_ratio_2019,NHIAC_MALE_ratio_2019,NHIAC_FEMALE_ratio_2019,NHAAC_MALE_ratio_2019,NHAAC_FEMALE_ratio_2019,NHNAC_MALE_ratio_2019,NHNAC_FEMALE_ratio_2019,H_MALE_ratio_2019,H_FEMALE_ratio_2019,HWA_MALE_ratio_2019,HWA_FEMALE_ratio_2019,HBA_MALE_ratio_2019,HBA_FEMALE_ratio_2019,HIA_MALE_ratio_2019,HIA_FEMALE_ratio_2019,HAA_MALE_ratio_2019,HAA_FEMALE_ratio_2019,HNA_MALE_ratio_2019,HNA_FEMALE_ratio_2019,HTOM_MALE_ratio_2019,HTOM_FEMALE_ratio_2019,HWAC_MALE_ratio_2019,HWAC_FEMALE_ratio_2019,HBAC_MALE_ratio_2019,HBAC_FEMALE_ratio_2019,HIAC_MALE_ratio_2019,HIAC_FEMALE_ratio_2019,HAAC_MALE_ratio_2019,HAAC_FEMALE_ratio_2019,HNAC_MALE_ratio_2019,HNAC_FEMALE_ratio_2019
0,9001,Connecticut,Fairfield,45.135453,479458,457625,21833,4.6,82428.0,74608.0,90248.0,8.8,8.0,9.6,24161.0,20926.0,27396.0,11.3,9.8,12.8,16941.0,14642.0,19240.0,10.5,9.1,11.9,91170.0,88902.0,93438.0,949921,647725.0,71384.0,138454.0,132522.0,168292.0,137073.0,357185,68.187249,7.514730,14.575317,13.950844,17.716421,14.429937,37.601548,943038,459242,483796,366178,379174,54885,63641,2544,2231,25280,28261,545,460,9810,10029,374704,387799,60472,69753,4568,4489,28796,31590,1345,1122,365528,391893,286103,300823,47287,55487,590,606,24652,27687,152,158,6744,7132,291982,306898,50592,59290,1757,2001,27723,30702,621,595,93714,91903,80075,78351,7598,8154,1954,...,28804,31628,620,626,97438,96334,82818,81793,8199,8693,2148,1783,640,599,389,337,3244,3129,85628,84584,10644,11169,3010,2699,1130,940,728,563,48.700776,51.299224,38.498959,39.859350,5.985591,6.946971,0.292474,0.249753,2.788838,3.093397,0.057032,0.052050,1.077881,1.097705,39.438289,40.808750,6.614956,7.623509,0.502792,0.490707,3.173220,3.452443,0.142898,0.126043,38.371644,41.087125,29.719653,31.188701,5.116438,6.025450,0.064770,0.060742,2.720993,3.029898,0.015795,0.016325,0.733994,0.766008,30.361103,31.842236,5.486616,6.439514,0.183711,0.204594,3.053432,3.352796,0.065724,0.066361,10.329131,10.212099,8.779306,8.670648,0.869153,0.921521,0.227704,0.189011,0.067845,0.063498,0.041237,0.035724,0.343887,0.331697,9.077186,8.966514,1.128341,1.183995,0.319082,0.286113,0.119788,0.099647,0.077173,0.059682
1,9003,Connecticut,Hartford,34.949296,477390,454205,23185,4.9,96200.0,88256.0,104144.0,11.0,10.1,11.9,28488.0,24981.0,31995.0,15.3,13.4,17.2,20592.0,17992.0,23192.0,14.8,12.9,16.7,70433.0,68420.0,72446.0,895388,625275.0,66723.0,169460.0,152999.0,130942.0,105151.0,368995,69.832855,7.451853,18.925873,17.087453,14.624051,11.743624,41.210626,893076,433833,459243,327876,347484,66613,71919,2453,2463,25325,25383,574,503,10992,11491,337633,357669,73775,79668,5198,5493,28242,28199,1042,917,355133,376784,265211,282304,56683,60761,804,838,24730,24727,122,146,7583,8008,271915,289320,61320,65813,2577,2817,27094,27116,431,465,78700,82459,62665,65180,9930,11158,1649,...,28907,28783,445,479,81461,85814,64744,67678,10311,11699,1691,1709,638,652,459,360,3618,3716,68010,71071,13007,14567,2704,2813,1228,1113,618,471,48.580720,51.419280,36.327883,38.455569,7.573341,8.251918,0.281142,0.287534,3.035482,3.026062,0.064931,0.057081,1.297941,1.341116,37.484524,39.647535,8.428318,9.160835,0.596936,0.629682,3.379424,3.352622,0.119208,0.106536,39.445454,41.795855,29.067308,30.865967,6.417037,6.939959,0.091509,0.095882,2.963935,2.952945,0.013457,0.016709,0.892208,0.924393,29.857691,31.677432,6.969677,7.527251,0.293702,0.314224,3.241713,3.227807,0.049904,0.053716,9.135267,9.623424,7.260575,7.589602,1.156305,1.311959,0.189634,0.191652,0.071547,0.073117,0.051474,0.040371,0.405733,0.416723,7.626834,7.970103,1.458642,1.633585,0.303234,0.315458,0.137711,0.124815,0.069304,0.052819
2,9005,Connecticut,Litchfield,38.488827,104051,99522,4529,4.4,12481.0,10381.0,14581.0,6.9,5.7,8.1,2642.0,1990.0,3294.0,7.9,5.9,9.9,1935.0,1456.0,2414.0,7.5,5.6,9.4,77968.0,74165.0,81771.0,182177,136414.0,10020.0,38651.0,37928.0,29259.0,20556.0,83455,74.879924,5.500145,21.216180,20.819313,16.060754,11.283532,45.809844,181667,89749,91918,83969,86459,2095,1760,305,271,1775,1890,66,58,1539,1480,85407,87822,2814,2478,802,749,2195,2286,129,128,83982,86448,79259,81946,1551,1279,154,143,1740,1855,29,36,1249,1189,80431,83043,2104,1822,543,510,2122,2214,73,92,5767,5470,4710,4513,544,481,151,...,2268,2312,78,88,6587,6213,5364,5123,627,547,182,149,43,45,37,24,334,325,5666,5420,820,753,302,261,97,92,55,41,49.515618,50.484382,46.003782,47.257573,1.331426,1.079115,0.185767,0.160259,1.055825,1.106287,0.039926,0.034935,0.898893,0.846212,46.839458,48.043342,1.757859,1.499448,0.463032,0.416452,1.311463,1.333089,0.073752,0.071534,45.862931,47.039089,43.029285,44.416718,0.983736,0.775787,0.084843,0.077634,1.031980,1.081333,0.019409,0.021627,0.713680,0.665990,43.697493,45.037791,1.303145,1.081887,0.295564,0.271720,1.257673,1.282073,0.043253,0.048799,3.652687,3.445293,2.974497,2.840856,0.347690,0.303328,0.100924,0.082625,0.023845,0.024954,0.020518,0.013309,0.185213,0.180222,3.141965,3.005551,0.454714,0.417561,0.167468,0.144732,0.053789,0.051017,0.030499,0.022736
3,9007,Connecticut,Middlesex,43.651163,92583,88817,3766,4.1,10796.0,8906.0,12686.0,6.8,5.6,8.0,2169.0,1662.0,2676.0,7.4,5.7,9.1,1483.0,1103.0,1863.0,6.7,5.0,8.4,81533.0,76017.0,87049.0,163410,119342.0,6519.0,33922.0,31499.0,27567.0,19835.0,72830,73.032250,3.989352,20.758827,19.276054,16.869837,12.138180,44.568876,162942,79376,83566,70949,74431,4283,4437,205,223,2310,2749,50,54,1579,1672,72387,75952,5075,5320,599,641,2832,3301,128,145,74363,78426,66806,70229,3786,3908,116,128,2259,2707,27,36,1369,1418,68056,71529,4445,4624,455,474,2741,3213,87,104,5013,5140,4143,4202,497,529,89,...,2849,3399,82,115,5256,5396,4326,4374,548,582,84,107,50,51,29,24,219,258,4519,4596,682,766,143,182,91,86,48,52,48.671477,51.328523,43.282277,45.415425,2.731537,2.845428,0.133591,0.150829,1.473196,1.794553,0.034475,0.035706,1.016400,1.086582,44.213105,46.399197,3.241892,3.433352,0.385998,0.411855,1.809944,2.145460,0.080032,0.102810,45.435741,48.006600,40.619075,42.722672,2.394174,2.487133,0.081878,0.084957,1.442414,1.763156,0.016622,0.020931,0.881578,0.927750,41.431087,43.569775,2.822035,2.961782,0.297964,0.299810,1.753922,2.092516,0.050481,0.070797,3.235736,3.321924,2.663203,2.692753,0.337364,0.358295,0.051713,0.065872,0.030781,0.031397,0.017853,0.014775,0.134822,0.158832,2.782019,2.829422,0.419858,0.471570,0.088035,0.112044,0.056022,0.052944,0.029550,0.032013
4,9009,Connecticut,New Haven,38.968085,457800,434645,23155,5.1,91639.0,82523.0,100755.0,11.0,9.9,12.1,26499.0,22681.0,30317.0,15.3,13.1,17.5,18873.0,16055.0,21691.0,14.7,12.5,16.9,66764.0,64782.0,68746.0,860435,598860.0,55565.0,187277.0,151361.0,110253.0,94404.0,346640,69.599679,6.457780,21.765386,17.591218,12.813635,10.971660,40.286599,857748,413560,444188,324268,344780,59423,66910,2274,2252,17193,19087,439,444,9963,10715,333028,354141,66049,74163,4776,5115,19721,21692,777,787,337182,365574,261087,280503,51122,57678,786,772,16767,18664,155,141,7265,7816,267416,287258,55681,62685,2527,2753,18923,20918,396,395,76378,78614,63181,64277,8301,9232,1488,...,19327,21289,400,392,80460,83156,66259,67663,8910,9981,1612,1631,465,452,311,327,2903,3102,68877,70440,11137,12414,2443,2544,830,813,417,408,48.211246,51.788754,37.473808,39.822429,7.131384,8.055038,0.282302,0.282653,2.056257,2.273746,0.054167,0.053816,1.213327,1.301072,38.544288,40.959126,7.940034,8.947104,0.582271,0.618655,2.358214,2.585764,0.095583,0.093594,38.798044,42.060141,29.722015,31.906378,6.088982,6.887338,0.093711,0.091839,2.001855,2.220865,0.017783,0.015560,0.873699,0.938161,30.486208,32.718188,6.637091,7.494762,0.296459,0.321027,2.261110,2.490649,0.046797,0.045861,9.413202,9.728613,7.751794,7.916051,1.042402,1.167700,0.188592,0.190814,0.054401,0.052881,0.036385,0.038256,0.339629,0.362910,8.058080,8.240939,1.302943,1.452343,0.285812,0.297628,0.097104,0.095115,0.048786,0.047733
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,42133,Pennsylvania,York,35.621813,234928,224744,10184,4.3,40966.0,36227.0,45705.0,9.4,8.3,10.5,12784.0,10798.0,14770.0,13.2,11.2,15.2,8237.0,6666.0,9808.0,11.4,9.2,13.6,63493.0,61485.0,65501.0,446078,310781.0,32160.0,119750.0,80357.0,50969.0,27545.0,194485,69.669654,7.209501,26.845081,18.014114,11.426029,6.174929,43.598877,445484,220149,225335,196044,200991,15296,14906,851,753,2984,3689,149,165,4825,4831,200597,205506,18522,18197,1885,1825,3992,4634,289,313,203176,209317,183250,188951,12627,12376,381,347,2849,3540,72,93,3997,4010,187031,192713,15253,15061,1176,1199,3759,4393,179,205,16973,16018,12794,12040,2669,2530,470,...,3833,4518,201,208,18539,17726,13926,13297,2898,2773,504,450,154,162,91,77,966,967,14824,14200,3621,3482,775,706,260,267,131,125,49.432145,50.567855,43.777864,44.899991,3.600648,3.453229,0.202646,0.176369,0.683653,0.840203,0.037634,0.039416,1.129698,1.158648,44.840978,45.992054,4.369369,4.244441,0.440478,0.428675,0.911464,1.065564,0.073933,0.074155,45.303725,46.620481,40.676705,41.938903,2.955298,2.835714,0.090411,0.076159,0.649359,0.804128,0.017370,0.022269,0.914581,0.943308,41.539846,42.829879,3.563014,3.469040,0.267894,0.271457,0.853565,1.006106,0.044760,0.046319,4.128420,3.947374,3.101158,2.961087,0.645351,0.617515,0.112235,0.100210,0.034294,0.036076,0.020265,0.017147,0.215117,0.215340,3.301133,3.162175,0.806355,0.775401,0.172583,0.157218,0.057899,0.059458,0.029172,0.027836
113,44003,Rhode Island,Kent,39.743842,90048,86403,3645,4.0,13853.0,11035.0,16671.0,8.5,6.8,10.2,3443.0,2559.0,4327.0,11.3,8.4,14.2,2370.0,1730.0,3010.0,10.5,7.7,13.3,72765.0,68574.0,76956.0,163760,120853.0,11115.0,32713.0,36533.0,25771.0,14721.0,77890,73.798852,6.787372,19.976185,22.308867,15.737054,8.989375,47.563508,163543,79060,84483,72967,78600,1920,1631,353,353,2085,2236,79,44,1656,1619,74485,80067,2712,2416,863,864,2602,2753,156,111,74959,80440,69759,75345,1508,1283,216,231,2025,2162,34,29,1417,1390,71061,76604,2148,1931,659,673,2507,2645,96,82,4101,4043,3208,3255,412,348,137,...,2776,2879,105,97,4728,4568,3668,3599,468,437,162,150,73,88,80,31,277,263,3914,3825,651,617,241,230,117,129,94,49,48.412582,51.587418,44.266915,47.596962,1.314124,1.154651,0.244686,0.239817,1.402990,1.502812,0.069997,0.037738,1.113870,1.055438,45.284007,48.541012,1.857668,1.686631,0.569717,0.561805,1.760889,1.830886,0.121126,0.088866,45.534780,48.807002,42.034305,45.406350,1.029265,0.888662,0.146081,0.148516,1.358557,1.449249,0.021304,0.018869,0.945268,0.895357,42.901663,46.212841,1.461422,1.311080,0.423027,0.421810,1.689674,1.752368,0.063911,0.059041,2.877803,2.780415,2.232610,2.190612,0.284859,0.265990,0.098605,0.091301,0.044433,0.053563,0.048694,0.018869,0.168602,0.160081,2.382344,2.328172,0.396246,0.375551,0.146690,0.139995,0.071215,0.078519,0.057215,0.029825
114,44007,Rhode Island,Providence,38.049587,323972,308660,15312,4.7,90056.0,83306.0,96806.0,14.7,13.6,15.8,28944.0,26202.0,31686.0,22.4,20.3,24.5,20048.0,18117.0,21979.0,21.4,19.3,23.5,55848.0,53668.0,58028.0,637357,435612.0,63196.0,146232.0,101179.0,77636.0,47369.0,246030,68.346625,9.915322,22.943499,15.874777,12.180928,7.432098,38.601600,634130,308406,325724,242195,256185,37600,39157,4284,4303,13698,14809,829,861,9800,10409,250638,264977,44369,46396,7011,7424,15843,17071,1298,1349,235960,253889,187810,203021,26845,27756,1294,1354,13053,14197,243,250,6715,7311,193533,209118,31196,32539,3191,3569,14792,16068,566,603,72446,71835,54385,53164,10755,11401,2990,...,15318,16585,578,624,76736,75870,57732,56208,11331,11978,3059,3084,657,648,600,641,3357,3311,60712,59105,13953,14622,3948,4023,1101,1058,769,769,48.735466,51.264534,37.958089,39.950480,6.122883,6.375806,0.684894,0.701484,2.213228,2.389460,0.132878,0.140704,1.623493,1.706601,39.366223,41.401810,7.250079,7.572649,1.123439,1.194026,2.569761,2.761331,0.210821,0.218020,36.725405,39.390012,28.922372,31.153286,4.349452,4.501112,0.206126,0.218803,2.110400,2.288040,0.038971,0.040380,1.098084,1.188391,29.864101,32.151203,5.066275,5.284139,0.505532,0.564380,2.397442,2.595742,0.090464,0.097663,12.010061,11.874522,9.035717,8.797194,1.773431,1.874694,0.478768,0.482681,0.102828,0.101419,0.093907,0.100324,0.525409,0.518209,9.502122,9.250608,2.183804,2.288510,0.617907,0.629645,0.172319,0.165589,0.120357,0.120357
115,44009,Rhode Island,Washington,40.418605,68725,66001,2724,4.0,10592.0,8668.0,12516.0,8.9,7.3,10.5,2217.0,1705.0,2729.0,10.6,8.2,13.0,1550.0,1165.0,1935.0,9.5,7.1,11.9,78599.0,72939.0,84259.0,126150,85775.0,4859.0,20014.0,23771.0,21776.0,15355.0,54910,67.994451,3.851764,15.865240,18.843440,17.261990,12.172017,43.527547,126314,61166,65148,57126,61137,977,842,605,623,1166,1366,20,30,1272,1150,58256,62129,1597,1414,1089,1064,1573,1727,75,92,59233,63048,55615,59467,834,702,500,511,1147,1346,12,18,1125,1004,56616,60337,1364,1182,928,893,1520,1674,55,71,1933,2100,1511,1670,143,140,105,...,1530,1697,61,58,2071,2210,1616,1789,152,134,115,97,23,22,12,15,153,153,1757,1920,244,226,163,165,62,54,23,25,48.414917,51.585083,45.140432,48.381471,0.807473,0.692006,0.471424,0.459479,0.940459,1.093353,0.018315,0.023890,1.036814,0.934885,46.064168,49.193722,1.329065,1.145114,0.844104,0.828177,1.267748,1.394364,0.066891,0.066095,46.765729,49.825207,43.853572,46.956847,0.686431,0.585298,0.379847,0.382236,0.922143,1.075834,0.008760,0.011945,0.914976,0.813047,44.665026,47.664779,1.134762,0.965145,0.714303,0.696784,1.218376,1.351362,0.048576,0.046187,1.649187,1.759876,1.286860,1.424624,0.121041,0.106707,0.091577,0.077243,0.018315,0.017519,0.009556,0.011945,0.121838,0.121838,1.399142,1.528942,0.194303,0.179969,0.129801,0.131393,0.049372,0.043002,0.018315,0.019908


# save data


In [18]:
#save 2017-2019 dataset
# data_path = r'../../data/processed/alt_full_dem_df_2017_2019'
# df_2017_2019.to_csv(data_path, index=False)
#
# save datasets for individual years
# data_path = r'../../data/processed/alt_full_dem_df_2017'
# df_2017.to_csv(data_path, index=False)
# data_path = r'../../data/processed/alt_full_dem_df_2018'
# df_2018.to_csv(data_path, index=False)
# data_path = r'../../data/processed/alt_full_dem_df_2019'
# df_2019.to_csv(data_path, index=False)

In [19]:
display(df_2017.corr()['AQI_2017'].sort_values().tail(15))
display(df_2018.corr()['AQI_2018'].sort_values().tail(15))
display(df_2019.corr()['AQI_2019'].sort_values().tail(15))

NHBAC_MALE_ratio_2017                  0.128785
TOM_MALE_ratio_2017                    0.131311
NHNA_MALE_ratio_2017                   0.136361
COL_OR_ASSOC_TOT_ratio_2017            0.143769
NH_MALE_ratio_2017                     0.161447
NHNAC_FEMALE_ratio_2017                0.180514
NH_FEMALE_ratio_2017                   0.184572
NHNAC_MALE_ratio_2017                  0.209573
NHTOM_FEMALE_ratio_2017                0.301019
NHTOM_MALE_ratio_2017                  0.324952
VEHICLE_TOT_ratio_2017                 0.336422
90% CI Lower Bound LB medh inc_2017    0.347750
Median Household Income_2017           0.356259
90% CI UB medh inc_2017                0.362795
AQI_2017                               1.000000
Name: AQI_2017, dtype: float64

NHBAC_MALE_ratio_2018                  0.108336
BACH_TOT_ratio_2018                    0.108645
VEHICLE_TOT_2018                       0.119622
NHNA_MALE_ratio_2018                   0.121961
TOM_FEMALE_ratio_2018                  0.134906
TOM_MALE_ratio_2018                    0.146463
NHNAC_FEMALE_ratio_2018                0.176328
NHNAC_MALE_ratio_2018                  0.212885
VEHICLE_TOT_ratio_2018                 0.225245
NHTOM_FEMALE_ratio_2018                0.240164
NHTOM_MALE_ratio_2018                  0.255688
90% CI Lower Bound LB medh inc_2018    0.379322
Median Household Income_2018           0.380913
90% CI UB medh inc_2018                0.381038
AQI_2018                               1.000000
Name: AQI_2018, dtype: float64

TOM_FEMALE_ratio_2019                  0.191134
TOM_MALE_ratio_2019                    0.197492
NHBA_FEMALE_ratio_2019                 0.203204
NHBAC_FEMALE_ratio_2019                0.208261
NHBA_MALE_ratio_2019                   0.208927
VEHICLE_TOT_ratio_2019                 0.212629
NHBAC_MALE_ratio_2019                  0.213676
NHNAC_FEMALE_ratio_2019                0.214389
NHNAC_MALE_ratio_2019                  0.248085
NHTOM_FEMALE_ratio_2019                0.263421
NHTOM_MALE_ratio_2019                  0.268511
90% CI Lower Bound LB medh inc_2019    0.411154
Median Household Income_2019           0.418417
90% CI UB medh inc_2019                0.423820
AQI_2019                               1.000000
Name: AQI_2019, dtype: float64

In [20]:
# # see scatter plots
# df_columns = df_2019.columns[4:]
# for col in df_columns:
#     fig, ax = plt.subplots(figsize=(10,10))
#
#     plt.title(' 2019: mean AQI vs ' + col)
#     plt.xlabel(col)
#     plt.ylabel('Mean AQI')
#     ax.scatter(df_2019[col], df_2019['AQI_2019'], label=col)
#
# ax.legend(bbox_to_anchor=(1.05,1))
# plt.show()


In [21]:
# df_2019.sort_values(['AQI_2019'], ascending=False)
# df_2019.sort_values(['HNAC_FEMALE_ratio_2019'], ascending=False)

In [22]:
# df_2019['AQI_2019'].hist()
# df_2019['AQI_2019'].describe()

When checking for pearson correlation for counties by only the pollutant Ozone, which is the most frequent recorded
pollutnat in the Northeaster county datasets; it was found that there is moderate strength correlation for median household
income and Non hispanic ethnicities and medical health insurance metrics; which contrasts from averaging all air pollutants.

A caveat of using only oxone pollutant however means that we get less available counties to work with.
With ozone we do not find education attainment levels making its way to correlations.