# Stage 3 Analysis

The stage 3 part of the analysis includes pandemic specific variables - variables for pandemic policies, and other health statistics related with Covid-19 (deaths and vaccination rates). Personal median income is also included as an economic indicator.

In [1]:
# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from os import *
import sys
sys.path.append("../")
import urllib.request
import requests
import json

In [2]:
# Change directory
def change_dir(newpath):
    get_path = sys.path[0].split("\\")      
    del get_path[-1]                                                  
    get_path.append(newpath)                    
    path = "\\".join(get_path)              
    os.chdir(path)

In [3]:
# Retrieve the stage 2 merged files
change_dir('output//stage2')
stage2_file = pd.read_csv('output.csv')
stage2_file = stage2_file.drop(columns=['Unnamed: 0'])
stage2_file

Unnamed: 0,area_code,hispanic,white_nonhispanic,black_nonhispanic,other_nonhispanic,asian_nonhispanic,above_65,prepandemic_growth,leisure_by_emp,total_emp_feb2020,...,hs_nodiploma,ged,college_nodegree,associates,bachelors_graduate,CBSA Title,division,region,state,pop_by_metro
0,10180.0,24.12,63.99,7.49,2.46,1.94,15.271998,0.061584,0.111297,71.2,...,7.73,32.28,24.66,7.70,23.28,"Abilene, TX",West South Central Division,South Region,48,172060.0
1,10420.0,2.22,79.34,12.49,2.48,3.47,18.158182,0.008289,0.096521,336.5,...,5.90,31.68,20.30,8.27,31.69,"Akron, OH",East North Central Division,Midwest Region,39,703479.0
2,10500.0,3.12,41.22,55.03,1.45,1.25,16.949280,0.027823,,62.2,...,11.13,31.97,22.81,9.55,20.19,"Albany, GA",South Atlantic Division,South Region,13,146726.0
3,10540.0,9.52,84.34,0.60,4.20,1.34,19.069126,0.106132,0.079524,46.6,...,7.60,29.00,31.20,10.00,19.30,"Albany, OR",Pacific Division,West Region,41,129749.0
4,10580.0,5.42,79.32,8.20,2.53,4.53,17.980738,0.033501,0.082655,466.1,...,5.29,26.17,16.21,12.37,37.53,"Albany-Schenectady-Troy, NY",Middle Atlantic Division,Northeast Region,36,880381.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,8.08,82.62,5.79,2.02,1.49,17.954696,0.040929,0.084918,185.4,...,7.00,39.60,16.30,9.20,24.90,"York-Hanover, PA",Middle Atlantic Division,Northeast Region,42,449058.0
365,49660.0,4.47,83.14,9.36,2.02,1.01,21.324866,-0.040301,0.109646,215.3,...,6.53,37.65,21.11,8.45,23.66,"Youngstown-Warren-Boardman, OH-PA",East North Central Division,Midwest Region,39,536081.0
366,49700.0,30.65,48.98,2.82,5.18,12.37,14.441553,0.132029,0.098186,44.6,...,10.09,23.43,28.08,10.80,17.71,"Yuba City, CA",Pacific Division,West Region,6,175639.0
367,49740.0,64.61,30.15,1.86,2.10,1.28,19.319229,0.086792,0.106338,59.1,...,13.20,25.70,24.80,7.70,15.00,"Yuma, AZ",Mountain Division,West Region,4,213787.0


## Pandemic Policies

The averages of four indices: stringency, government response, containment health, and economic support are computed from March 2020 to June 2021 at the state level.

In [4]:
# Clean up pandemic policies file
change_dir('output')
pandemic_policies = pd.read_csv("pandemic_policies.csv")
pandemic_policies["year_month"] = pandemic_policies['year'].astype(str) + str('-') + pandemic_policies['month'].astype(str)
months_to_exclude = ['2020-1','2020-2','2021-7','2021-8']
pandemic_policies = pandemic_policies[~pandemic_policies['year_month'].isin(months_to_exclude)]
pandemic_policies = pandemic_policies.groupby('state').mean().reset_index()
pandemic_policies = pandemic_policies.drop(columns=['year','month','Unnamed: 0'])
pandemic_policies = pandemic_policies.rename(columns=({'state':'state_name','fips':'state'}))
pandemic_policies

Unnamed: 0,state_name,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,state
0,Alabama,46.763696,51.103417,52.59512,41.381048,1.0
1,Alaska,56.752059,57.579288,57.956765,54.937836,2.0
2,Arizona,52.6127,54.854439,55.531136,50.730847,4.0
3,Arkansas,54.760006,57.27313,59.352424,42.704133,5.0
4,California,63.051351,67.141762,64.823398,80.771169,6.0
5,Colorado,57.806356,58.67133,57.8033,64.742944,8.0
6,Connecticut,62.514697,66.263142,65.050539,74.746304,9.0
7,Delaware,60.338009,62.0787,61.929548,63.13004,10.0
8,District of Columbia,66.044084,64.805929,64.572282,66.439852,11.0
9,Florida,52.481696,54.34097,54.719222,51.695228,12.0


In [5]:
# Merge the pandemic policies variables with the rest of the database
merged = stage2_file.merge(pandemic_policies, on=['state'], how='left')
merged

Unnamed: 0,area_code,hispanic,white_nonhispanic,black_nonhispanic,other_nonhispanic,asian_nonhispanic,above_65,prepandemic_growth,leisure_by_emp,total_emp_feb2020,...,CBSA Title,division,region,state,pop_by_metro,state_name,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex
0,10180.0,24.12,63.99,7.49,2.46,1.94,15.271998,0.061584,0.111297,71.2,...,"Abilene, TX",West South Central Division,South Region,48,172060.0,Texas,53.639829,52.692697,54.371357,40.948421
1,10420.0,2.22,79.34,12.49,2.48,3.47,18.158182,0.008289,0.096521,336.5,...,"Akron, OH",East North Central Division,Midwest Region,39,703479.0,Ohio,58.787620,59.645174,62.129232,42.263105
2,10500.0,3.12,41.22,55.03,1.45,1.25,16.949280,0.027823,,62.2,...,"Albany, GA",South Atlantic Division,South Region,13,146726.0,Georgia,55.921999,56.254949,55.945863,58.414579
3,10540.0,9.52,84.34,0.60,4.20,1.34,19.069126,0.106132,0.079524,46.6,...,"Albany, OR",Pacific Division,West Region,41,129749.0,Oregon,60.793028,59.832710,58.962529,66.683468
4,10580.0,5.42,79.32,8.20,2.53,4.53,17.980738,0.033501,0.082655,466.1,...,"Albany-Schenectady-Troy, NY",Middle Atlantic Division,Northeast Region,36,880381.0,New York,66.881749,71.821582,68.375444,95.937500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,8.08,82.62,5.79,2.02,1.49,17.954696,0.040929,0.084918,185.4,...,"York-Hanover, PA",Middle Atlantic Division,Northeast Region,42,449058.0,Pennsylvania,55.666169,59.711847,59.995945,57.721774
365,49660.0,4.47,83.14,9.36,2.02,1.01,21.324866,-0.040301,0.109646,215.3,...,"Youngstown-Warren-Boardman, OH-PA",East North Central Division,Midwest Region,39,536081.0,Ohio,58.787620,59.645174,62.129232,42.263105
366,49700.0,30.65,48.98,2.82,5.18,12.37,14.441553,0.132029,0.098186,44.6,...,"Yuba City, CA",Pacific Division,West Region,6,175639.0,California,63.051351,67.141762,64.823398,80.771169
367,49740.0,64.61,30.15,1.86,2.10,1.28,19.319229,0.086792,0.106338,59.1,...,"Yuma, AZ",Mountain Division,West Region,4,213787.0,Arizona,52.612700,54.854439,55.531136,50.730847


## Death rates

Covid death rates are reported on a county level. The cumulative totals as at June 30th, 2021 are used.

In [6]:
# Retrieve and clean-up covid deaths by county database
change_dir('output')
death_by_county = pd.read_csv('health_stats_county.csv')
death_by_county.drop(death_by_county.columns.difference(['FIPS', 'Admin2', 'Province_State', '6/30/2021']), 1, inplace=True)
death_by_county = death_by_county[death_by_county['Admin2'] != 'Unassigned']
death_by_county = death_by_county[death_by_county['FIPS'].notna()]
death_by_county = death_by_county.rename(columns={'FIPS':'fips','Admin2':'Name','Province_State':'state_proper','6/30/2021':'deaths'})
death_by_county['fips'].nunique()
states_exclude = ['American Samoa','Diamond Princess','Grand Princess','Puerto Rico','Northern Mariana Islands','Guam','Virgin Islands']
death_by_county = death_by_county[~death_by_county['state_proper'].isin(states_exclude)]
death_by_county

0        1001.0
1        1003.0
2        1005.0
3        1007.0
4        1009.0
         ...   
3285    56037.0
3286    56039.0
3287    56041.0
3288    56043.0
3289    56045.0
Name: fips, Length: 3195, dtype: float64

In [7]:
# Add state abbreviations to the covid deaths dataset
change_dir('output//stage2')
delineation_files = pd.read_csv('delineation_files.csv')
death_by_county = pd.merge(death_by_county,delineation_files[['state_proper','state_abbr']], on='state_proper', how='left').drop_duplicates()
death_by_county = death_by_county.rename(columns={'state_abbr':'State'})
death_by_county

Unnamed: 0,fips,Name,state_proper,deaths,State
0,1001.0,Autauga,Alabama,113,AL
43,1003.0,Baldwin,Alabama,315,AL
86,1005.0,Barbour,Alabama,60,AL
129,1007.0,Bibb,Alabama,64,AL
172,1009.0,Blount,Alabama,139,AL
...,...,...,...,...,...
173019,56037.0,Sweetwater,Wyoming,40,WY
173028,56039.0,Teton,Wyoming,11,WY
173037,56041.0,Uinta,Wyoming,13,WY
173046,56043.0,Washakie,Wyoming,26,WY


In [8]:
# Get county and states codes
change_dir('input')
fips1 = pd.read_csv('US_FIPS_Codes.csv')
fips2 = pd.read_csv('fips_state_county.csv')
fips2 = fips2.merge(fips1,on=['Name','State'],how='left')
fips2 = fips2.rename(columns={'FIPS State':'fips_state','FIPS County':'county'})
fips2 = pd.merge(fips2, delineation_files[['fips_state','county','area_code','CBSA Title']], on=['fips_state','county'], how='left')
fips2.drop(fips2.columns.difference(['FIPS', 'Name', 'State', 'fips_state','county','area_code','CBSA Title']), 1, inplace=True)
fips2

Unnamed: 0,FIPS,Name,State,fips_state,county,area_code,CBSA Title
0,1001,Autauga,AL,1.0,1.0,33860.0,"Montgomery, AL"
1,1003,Baldwin,AL,1.0,3.0,19300.0,"Daphne-Fairhope-Foley, AL"
2,1005,Barbour,AL,1.0,5.0,21640.0,"Eufaula, AL-GA"
3,1007,Bibb,AL,1.0,7.0,13820.0,"Birmingham-Hoover, AL"
4,1009,Blount,AL,1.0,9.0,13820.0,"Birmingham-Hoover, AL"
...,...,...,...,...,...,...,...
3137,56037,Sweetwater,WY,56.0,37.0,40540.0,"Rock Springs, WY"
3138,56039,Teton,WY,56.0,39.0,27220.0,"Jackson, WY-ID"
3139,56041,Uinta,WY,56.0,41.0,21740.0,"Evanston, WY"
3140,56043,Washakie,WY,56.0,43.0,,


In [9]:
# Merge the deaths dataset with the area codes
death_by_county = fips2.merge(death_by_county, on=['Name','State'], how='left')
death_by_county = death_by_county.groupby('area_code').sum().reset_index()
death_by_county

Unnamed: 0,area_code,FIPS,fips_state,county,fips,deaths
0,10100.0,92058,92.0,58.0,92058.0,109.0
1,10140.0,53027,53.0,27.0,53027.0,76.0
2,10180.0,144753,144.0,753.0,144753.0,511.0
3,10220.0,40123,40.0,123.0,40123.0,80.0
4,10300.0,26091,26.0,91.0,26091.0,170.0
...,...,...,...,...,...,...
926,49700.0,12216,12.0,216.0,12216.0,165.0
927,49740.0,4027,4.0,27.0,4027.0,848.0
928,49780.0,39119,39.0,119.0,39119.0,137.0
929,49820.0,48505,48.0,505.0,48505.0,36.0


In [10]:
merged2 = pd.merge(merged, death_by_county[['area_code','deaths']],on='area_code',how='left')
merged2['deaths_by_pop'] = merged2['deaths']/merged2['total_pop']
merged2

Unnamed: 0,area_code,hispanic,white_nonhispanic,black_nonhispanic,other_nonhispanic,asian_nonhispanic,above_65,prepandemic_growth,leisure_by_emp,total_emp_feb2020,...,region,state,pop_by_metro,state_name,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,deaths,deaths_by_pop
0,10180.0,24.12,63.99,7.49,2.46,1.94,15.271998,0.061584,0.111297,71.2,...,South Region,48,172060.0,Texas,53.639829,52.692697,54.371357,40.948421,511.0,0.002970
1,10420.0,2.22,79.34,12.49,2.48,3.47,18.158182,0.008289,0.096521,336.5,...,Midwest Region,39,703479.0,Ohio,58.787620,59.645174,62.129232,42.263105,1231.0,0.001750
2,10500.0,3.12,41.22,55.03,1.45,1.25,16.949280,0.027823,,62.2,...,South Region,13,146726.0,Georgia,55.921999,56.254949,55.945863,58.414579,499.0,0.003401
3,10540.0,9.52,84.34,0.60,4.20,1.34,19.069126,0.106132,0.079524,46.6,...,West Region,41,129749.0,Oregon,60.793028,59.832710,58.962529,66.683468,81.0,0.000624
4,10580.0,5.42,79.32,8.20,2.53,4.53,17.980738,0.033501,0.082655,466.1,...,Northeast Region,36,880381.0,New York,66.881749,71.821582,68.375444,95.937500,892.0,0.001013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,8.08,82.62,5.79,2.02,1.49,17.954696,0.040929,0.084918,185.4,...,Northeast Region,42,449058.0,Pennsylvania,55.666169,59.711847,59.995945,57.721774,826.0,0.001839
365,49660.0,4.47,83.14,9.36,2.02,1.01,21.324866,-0.040301,0.109646,215.3,...,Midwest Region,39,536081.0,Ohio,58.787620,59.645174,62.129232,42.263105,1367.0,0.002081
366,49700.0,30.65,48.98,2.82,5.18,12.37,14.441553,0.132029,0.098186,44.6,...,West Region,6,175639.0,California,63.051351,67.141762,64.823398,80.771169,165.0,0.000939
367,49740.0,64.61,30.15,1.86,2.10,1.28,19.319229,0.086792,0.106338,59.1,...,West Region,4,213787.0,Arizona,52.612700,54.854439,55.531136,50.730847,848.0,0.003967


## Vaccines

Vaccination counts are sourced at the county level. Cumulative totals as at June 30th, 2021 are used for analysis.

In [11]:
# Retrieve and clean up vaccination data 
change_dir('output')
vaccs = pd.read_csv('vaccines.csv')
vaccs = vaccs.drop(columns=['2020-12-31','2021-01-31','2021-02-28','2021-03-31','2021-04-30','2021-05-31','2021-07-31'])
states_exclude = ['GU', 'PR', 'VI', 'AS','FM', 'MH', 'MP', 'PW', 'UNK']
vaccs = vaccs[~vaccs['Recip_State'].isin(states_exclude)]
vaccs = vaccs[vaccs['FIPS'] != "UNK"]
vaccs['state'] = vaccs['FIPS'].astype(str).str[:2]
vaccs['county'] = vaccs['FIPS'].astype(str).str[2:]
vaccs = vaccs.fillna(0)
vaccs[['FIPS','2021-06-30','state','county']] = vaccs[['FIPS','2021-06-30','state','county']].astype(int)
vaccs =  vaccs.rename(columns={'2021-06-30':'vaccines'})

In [12]:
# Merge area codes 
vaccs = delineation_files.merge(vaccs).drop_duplicates()
vaccs = vaccs.groupby('area_code').sum().reset_index()
vaccs

Unnamed: 0,area_code,fips_state,county,state,FIPS,vaccines
0,10100,92,58,92,92058,17521
1,10140,53,27,53,53027,32982
2,10180,144,753,144,144753,0
3,10220,40,123,40,40123,14142
4,10300,26,91,26,26091,37788
...,...,...,...,...,...,...
927,49700,12,216,12,12216,62086
928,49740,4,27,4,4027,97902
929,49780,39,119,39,39119,32476
930,49820,48,505,48,48505,0


In [13]:
# Merge vaccinations with the rest of the dataset
vaccs = pd.merge(merged2, vaccs[['area_code','vaccines']], on='area_code', how='left')
vaccs['vaccines_by_pop'] = vaccs['vaccines'] / vaccs['total_pop']
vaccs

Unnamed: 0,area_code,hispanic,white_nonhispanic,black_nonhispanic,other_nonhispanic,asian_nonhispanic,above_65,prepandemic_growth,leisure_by_emp,total_emp_feb2020,...,pop_by_metro,state_name,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,deaths,deaths_by_pop,vaccines,vaccines_by_pop
0,10180.0,24.12,63.99,7.49,2.46,1.94,15.271998,0.061584,0.111297,71.2,...,172060.0,Texas,53.639829,52.692697,54.371357,40.948421,511.0,0.002970,0,0.000000
1,10420.0,2.22,79.34,12.49,2.48,3.47,18.158182,0.008289,0.096521,336.5,...,703479.0,Ohio,58.787620,59.645174,62.129232,42.263105,1231.0,0.001750,333431,0.473974
2,10500.0,3.12,41.22,55.03,1.45,1.25,16.949280,0.027823,,62.2,...,146726.0,Georgia,55.921999,56.254949,55.945863,58.414579,499.0,0.003401,17340,0.118179
3,10540.0,9.52,84.34,0.60,4.20,1.34,19.069126,0.106132,0.079524,46.6,...,129749.0,Oregon,60.793028,59.832710,58.962529,66.683468,81.0,0.000624,53524,0.412520
4,10580.0,5.42,79.32,8.20,2.53,4.53,17.980738,0.033501,0.082655,466.1,...,880381.0,New York,66.881749,71.821582,68.375444,95.937500,892.0,0.001013,516496,0.586673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,8.08,82.62,5.79,2.02,1.49,17.954696,0.040929,0.084918,185.4,...,449058.0,Pennsylvania,55.666169,59.711847,59.995945,57.721774,826.0,0.001839,200064,0.445519
365,49660.0,4.47,83.14,9.36,2.02,1.01,21.324866,-0.040301,0.109646,215.3,...,536081.0,Ohio,58.787620,59.645174,62.129232,42.263105,1367.0,0.002081,299053,0.455314
366,49700.0,30.65,48.98,2.82,5.18,12.37,14.441553,0.132029,0.098186,44.6,...,175639.0,California,63.051351,67.141762,64.823398,80.771169,165.0,0.000939,62086,0.353486
367,49740.0,64.61,30.15,1.86,2.10,1.28,19.319229,0.086792,0.106338,59.1,...,213787.0,Arizona,52.612700,54.854439,55.531136,50.730847,848.0,0.003967,97902,0.457942


## Income

Personal median income data is sourced for the year 2019 at the MSA level from the Bureau of Economic Analysis (BEA) as available here: https://www.bea.gov/data/income-saving/personal-income-county-metro-and-other-areas.

In [18]:
# Add personal median income for each MSA
change_dir('output//stage2')
income = pd.read_csv('bea.csv')
income['income'] = income['income'].astype(int)
merged3 = vaccs.merge(income, on=['CBSA Title'],how='left').drop_duplicates()
merged3

Unnamed: 0,area_code,hispanic,white_nonhispanic,black_nonhispanic,other_nonhispanic,asian_nonhispanic,above_65,prepandemic_growth,leisure_by_emp,total_emp_feb2020,...,state_name,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,deaths,deaths_by_pop,vaccines,vaccines_by_pop,income
0,10180.0,24.12,63.99,7.49,2.46,1.94,15.271998,0.061584,0.111297,71.2,...,Texas,53.639829,52.692697,54.371357,40.948421,511.0,0.002970,0,0.000000,26603
1,10420.0,2.22,79.34,12.49,2.48,3.47,18.158182,0.008289,0.096521,336.5,...,Ohio,58.787620,59.645174,62.129232,42.263105,1231.0,0.001750,333431,0.473974,51095
2,10500.0,3.12,41.22,55.03,1.45,1.25,16.949280,0.027823,,62.2,...,Georgia,55.921999,56.254949,55.945863,58.414579,499.0,0.003401,17340,0.118179,39922
3,10540.0,9.52,84.34,0.60,4.20,1.34,19.069126,0.106132,0.079524,46.6,...,Oregon,60.793028,59.832710,58.962529,66.683468,81.0,0.000624,53524,0.412520,44830
4,10580.0,5.42,79.32,8.20,2.53,4.53,17.980738,0.033501,0.082655,466.1,...,New York,66.881749,71.821582,68.375444,95.937500,892.0,0.001013,516496,0.586673,60767
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,8.08,82.62,5.79,2.02,1.49,17.954696,0.040929,0.084918,185.4,...,Pennsylvania,55.666169,59.711847,59.995945,57.721774,826.0,0.001839,200064,0.445519,51585
365,49660.0,4.47,83.14,9.36,2.02,1.01,21.324866,-0.040301,0.109646,215.3,...,Ohio,58.787620,59.645174,62.129232,42.263105,1367.0,0.002081,299053,0.455314,43167
366,49700.0,30.65,48.98,2.82,5.18,12.37,14.441553,0.132029,0.098186,44.6,...,California,63.051351,67.141762,64.823398,80.771169,165.0,0.000939,62086,0.353486,45782
367,49740.0,64.61,30.15,1.86,2.10,1.28,19.319229,0.086792,0.106338,59.1,...,Arizona,52.612700,54.854439,55.531136,50.730847,848.0,0.003967,97902,0.457942,36570


## Finalizing

In [20]:
merged3.columns

Index(['area_code', 'hispanic', 'white_nonhispanic', 'black_nonhispanic',
       'other_nonhispanic', 'asian_nonhispanic', 'above_65',
       'prepandemic_growth', 'leisure_by_emp', 'total_emp_feb2020',
       'manufacturing_emp_feb2020', 'service_emp_feb2020', 'total_emp_jun2021',
       'manufacturing_emp_jun2021', 'service_emp_jun2021', 'total_employment',
       'manufacturing', 'service', 'emp_tot_change_feb2020_jun2021',
       'manufacturing_change_feb2020_jun2021',
       'service_change_feb2020_jun2021', 'occ_15', 'total_pop', 'pop_excl',
       'less_than_9grade', 'hs_nodiploma', 'ged', 'college_nodegree',
       'associates', 'bachelors_graduate', 'CBSA Title', 'division', 'region',
       'state', 'pop_by_metro', 'state_name', 'StringencyIndex',
       'GovernmentResponseIndex', 'ContainmentHealthIndex',
       'EconomicSupportIndex', 'deaths', 'deaths_by_pop', 'vaccines',
       'vaccines_by_pop', 'income'],
      dtype='object')

In [24]:
merged3 = merged3[['area_code','CBSA Title','state_name','state','division','region','total_pop','pop_by_metro','income','hispanic', 'white_nonhispanic',
                   'black_nonhispanic','other_nonhispanic', 'asian_nonhispanic', 'above_65', 'less_than_9grade', 'hs_nodiploma', 
                   'ged', 'college_nodegree','associates', 'bachelors_graduate','total_emp_feb2020','manufacturing_emp_feb2020', 
                   'service_emp_feb2020', 'total_emp_jun2021','manufacturing_emp_jun2021', 'service_emp_jun2021', 'total_employment',
                   'manufacturing', 'service', 'emp_tot_change_feb2020_jun2021','manufacturing_change_feb2020_jun2021','service_change_feb2020_jun2021', 
                   'occ_15','leisure_by_emp','prepandemic_growth','StringencyIndex','GovernmentResponseIndex', 'ContainmentHealthIndex',
                   'EconomicSupportIndex', 'deaths', 'deaths_by_pop', 'vaccines','vaccines_by_pop']]
merged3

Unnamed: 0,area_code,CBSA Title,state_name,state,division,region,total_pop,pop_by_metro,income,hispanic,...,leisure_by_emp,prepandemic_growth,StringencyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,deaths,deaths_by_pop,vaccines,vaccines_by_pop
0,10180.0,"Abilene, TX",Texas,48,West South Central Division,South Region,172060,172060.0,26603,24.12,...,0.111297,0.061584,53.639829,52.692697,54.371357,40.948421,511.0,0.002970,0,0.000000
1,10420.0,"Akron, OH",Ohio,39,East North Central Division,Midwest Region,703479,703479.0,51095,2.22,...,0.096521,0.008289,58.787620,59.645174,62.129232,42.263105,1231.0,0.001750,333431,0.473974
2,10500.0,"Albany, GA",Georgia,13,South Atlantic Division,South Region,146726,146726.0,39922,3.12,...,,0.027823,55.921999,56.254949,55.945863,58.414579,499.0,0.003401,17340,0.118179
3,10540.0,"Albany, OR",Oregon,41,Pacific Division,West Region,129749,129749.0,44830,9.52,...,0.079524,0.106132,60.793028,59.832710,58.962529,66.683468,81.0,0.000624,53524,0.412520
4,10580.0,"Albany-Schenectady-Troy, NY",New York,36,Middle Atlantic Division,Northeast Region,880381,880381.0,60767,5.42,...,0.082655,0.033501,66.881749,71.821582,68.375444,95.937500,892.0,0.001013,516496,0.586673
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
364,49620.0,"York-Hanover, PA",Pennsylvania,42,Middle Atlantic Division,Northeast Region,449058,449058.0,51585,8.08,...,0.084918,0.040929,55.666169,59.711847,59.995945,57.721774,826.0,0.001839,200064,0.445519
365,49660.0,"Youngstown-Warren-Boardman, OH-PA",Ohio,39,East North Central Division,Midwest Region,656806,536081.0,43167,4.47,...,0.109646,-0.040301,58.787620,59.645174,62.129232,42.263105,1367.0,0.002081,299053,0.455314
366,49700.0,"Yuba City, CA",California,6,Pacific Division,West Region,175639,175639.0,45782,30.65,...,0.098186,0.132029,63.051351,67.141762,64.823398,80.771169,165.0,0.000939,62086,0.353486
367,49740.0,"Yuma, AZ",Arizona,4,Mountain Division,West Region,213787,213787.0,36570,64.61,...,0.106338,0.086792,52.612700,54.854439,55.531136,50.730847,848.0,0.003967,97902,0.457942


In [25]:
change_dir('output//stage2')
merged3.to_csv('final_output.csv',index="ignore")