In [None]:
from google.colab import drive
drive.mount('/content/gdrive') 
%cd "/content/gdrive/My Drive/Econ488Paper/final_data" 

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/My Drive/Econ488Paper/final_data


In [None]:
import pandas as pd
import numpy as np 
from datetime import datetime
from numpy.linalg import norm 
import matplotlib.pyplot as plt 

import warnings
warnings.filterwarnings("ignore") 

In [None]:
data = pd.read_csv("final_cases.csv", index_col = 0)      
mask_data = pd.read_csv("cleaned_mask_data.csv", index_col = 0) 
demo_data = pd.read_csv("age_data.csv", index_col = 0)
race_data = pd.read_csv("demographics_race.csv", index_col = 0)

demo_data.columns = demo_data.columns.str.lower() 
mask_data.columns = mask_data.columns.str.lower()    
race_data.columns = race_data.columns.str.lower()    

In [None]:
data.head(2)

Unnamed: 0,date,county,fips,cases,time_index,county_num,treatment_indicator,x_cases
0,2020-07-10,Abbeville South Carolina,45001,141,1,1,0,141
1,2020-07-11,Abbeville South Carolina,45001,150,2,1,0,150


In [None]:
mask_data.head(2)

Unnamed: 0,countyfp,never,rarely,sometimes,frequently,always,similarity
0,46093,0.123,0.078,0.161,0.214,0.425,0.0
1,20083,0.126,0.075,0.163,0.225,0.412,0.017664


In [None]:
demo_data.head(2)

Unnamed: 0,year,county,pct_18_29
11,12,Autauga Alabama,0.147917
23,12,Baldwin Alabama,0.127324


In [None]:
race_data.head(2)

Unnamed: 0,ctyname,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island
209,Autauga Alabama,0.762623,0.201131,0.004761,0.011742,0.001038
437,Baldwin Alabama,0.87441,0.087769,0.007803,0.010661,0.00069


In [None]:
len(data.fips.unique()), len(demo_data.county.unique()), len(mask_data.countyfp.unique())  

(2971, 2971, 2971)

In [None]:
unique_counties = set(data.county.unique()).intersection(set(race_data.ctyname.unique()))  

In [None]:
len(unique_counties)

2971

In [None]:
race_data = race_data[race_data.ctyname.isin(unique_counties)] 

len(race_data.ctyname.unique())  

2971

In [None]:
race_data["county"] = race_data.ctyname 
race_data.drop(["ctyname"], axis = 1, inplace = True) 

In [None]:
county_fips = data[["county", "fips"]].drop_duplicates() 

demo_temp = demo_data.merge(county_fips, how = "left", on = "county") 
race_data = race_data.merge(county_fips, how = "left", on = "county") 

In [None]:
demo_temp.head()

Unnamed: 0,year,county,pct_18_29,fips
0,12,Autauga Alabama,0.147917,1001
1,12,Baldwin Alabama,0.127324,1003
2,12,Barbour Alabama,0.153326,1005
3,12,Bibb Alabama,0.156024,1007
4,12,Blount Alabama,0.139211,1009


In [None]:
race_data.head()

Unnamed: 0,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,county,fips
0,0.762623,0.201131,0.004761,0.011742,0.001038,Autauga Alabama,1001
1,0.87441,0.087769,0.007803,0.010661,0.00069,Baldwin Alabama,1003
2,0.491534,0.482298,0.006886,0.004699,0.002106,Barbour Alabama,1005
3,0.767661,0.212691,0.004599,0.002143,0.001161,Bibb Alabama,1007
4,0.958254,0.016913,0.006399,0.003199,0.001159,Blount Alabama,1009


In [None]:
mask_temp = mask_data.drop(["similarity"], axis = 1) 

In [None]:
meade = demo_data[demo_data.county == "Meade South Dakota"]

In [None]:
meade

Unnamed: 0,year,county,pct_18_29
551,12,Meade South Dakota,1.52033


In [None]:
######## MATCH ON MEDIAN AGE ###########

In [None]:
mask_data = mask_data.drop(["similarity"], axis = 1)

In [None]:
mask_data.head()

Unnamed: 0,countyfp,never,rarely,sometimes,frequently,always
0,46093,0.123,0.078,0.161,0.214,0.425
1,20083,0.126,0.075,0.163,0.225,0.412
2,21213,0.137,0.066,0.162,0.208,0.426
3,46107,0.131,0.067,0.147,0.225,0.431
4,13269,0.126,0.069,0.182,0.201,0.422


In [None]:
mask_data["fips"] = mask_data.countyfp
mask_data.drop(["countyfp"], axis = 1, inplace = True) 

In [None]:
county_data = mask_data.merge(race_data, how = "left", on = "fips") 

In [None]:
county_data.head()

Unnamed: 0,never,rarely,sometimes,frequently,always,fips,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,county
0,0.123,0.078,0.161,0.214,0.425,46093,0.905125,0.019801,0.03166,0.010306,0.000918,Meade South Dakota
1,0.126,0.075,0.163,0.225,0.412,20083,0.92252,0.013378,0.006132,0.042921,0.0,Hodgeman Kansas
2,0.137,0.066,0.162,0.208,0.426,21213,0.870289,0.095736,0.003661,0.008454,0.000915,Simpson Kentucky
3,0.131,0.067,0.147,0.225,0.431,46107,0.940084,0.005574,0.026475,0.00836,0.000464,Potter South Dakota
4,0.126,0.069,0.182,0.201,0.422,13269,0.59788,0.376434,0.002993,0.008728,0.000125,Taylor Georgia


In [None]:
county_data.columns

Index(['never', 'rarely', 'sometimes', 'frequently', 'always', 'fips',
       'pct_white', 'pct_black', 'pct_american_indian', 'pct_asian',
       'pct_pacific_island', 'county'],
      dtype='object')

In [None]:
county_data = county_data[['fips', 'county', 'never', 'rarely', 'sometimes', 'frequently', 'always', 
                            'pct_white', 'pct_black', 'pct_american_indian', 'pct_asian',
                            'pct_pacific_island']]   

county_data.head()

Unnamed: 0,fips,county,never,rarely,sometimes,frequently,always,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island
0,46093,Meade South Dakota,0.123,0.078,0.161,0.214,0.425,0.905125,0.019801,0.03166,0.010306,0.000918
1,20083,Hodgeman Kansas,0.126,0.075,0.163,0.225,0.412,0.92252,0.013378,0.006132,0.042921,0.0
2,21213,Simpson Kentucky,0.137,0.066,0.162,0.208,0.426,0.870289,0.095736,0.003661,0.008454,0.000915
3,46107,Potter South Dakota,0.131,0.067,0.147,0.225,0.431,0.940084,0.005574,0.026475,0.00836,0.000464
4,13269,Taylor Georgia,0.126,0.069,0.182,0.201,0.422,0.59788,0.376434,0.002993,0.008728,0.000125


In [None]:
demo_data.head()  

Unnamed: 0,year,county,pct_18_29
11,12,Autauga Alabama,0.147917
23,12,Baldwin Alabama,0.127324
35,12,Barbour Alabama,0.153326
47,12,Bibb Alabama,0.156024
59,12,Blount Alabama,0.139211


In [None]:
county_data = county_data.merge(demo_data[["county", "pct_18_29"]], how = "left", on = "county")  
county_data.head()

Unnamed: 0,fips,county,never,rarely,sometimes,frequently,always,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,pct_18_29
0,46093,Meade South Dakota,0.123,0.078,0.161,0.214,0.425,0.905125,0.019801,0.03166,0.010306,0.000918,1.52033
1,20083,Hodgeman Kansas,0.126,0.075,0.163,0.225,0.412,0.92252,0.013378,0.006132,0.042921,0.0,1.511706
2,21213,Simpson Kentucky,0.137,0.066,0.162,0.208,0.426,0.870289,0.095736,0.003661,0.008454,0.000915,1.491008
3,46107,Potter South Dakota,0.131,0.067,0.147,0.225,0.431,0.940084,0.005574,0.026475,0.00836,0.000464,1.492336
4,13269,Taylor Georgia,0.126,0.069,0.182,0.201,0.422,0.59788,0.376434,0.002993,0.008728,0.000125,1.476309


In [None]:
norms = np.linalg.norm(demo_data.iloc[:, 2:] - demo_data.iloc[0][2: ].astype(float), ord = 2, axis = 1)   

In [None]:
county_data["similarity"] = norms  

In [None]:
county_data.head(3)

Unnamed: 0,fips,county,never,rarely,sometimes,frequently,always,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,pct_18_29,similarity
0,46093,Meade South Dakota,0.123,0.078,0.161,0.214,0.425,0.905125,0.019801,0.03166,0.010306,0.000918,1.52033,0.0
1,20083,Hodgeman Kansas,0.126,0.075,0.163,0.225,0.412,0.92252,0.013378,0.006132,0.042921,0.0,1.511706,0.020594
2,21213,Simpson Kentucky,0.137,0.066,0.162,0.208,0.426,0.870289,0.095736,0.003661,0.008454,0.000915,1.491008,0.005408


In [98]:
county_data = county_data.sort_values(by = ["similarity"]) 
county_data.head()

Unnamed: 0,fips,county,never,rarely,sometimes,frequently,always,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,pct_18_29,similarity
0,46093,Meade South Dakota,0.123,0.078,0.161,0.214,0.425,0.905125,0.019801,0.03166,0.010306,0.000918,1.52033,0.0
27,46069,Hyde South Dakota,0.142,0.053,0.142,0.234,0.43,0.874712,0.007686,0.089931,0.002306,0.000769,1.518832,4.7e-05
12,46033,Custer South Dakota,0.132,0.088,0.156,0.229,0.396,0.927998,0.005907,0.037784,0.005907,0.000223,1.49543,0.000385
8,13185,Lowndes Georgia,0.108,0.101,0.149,0.208,0.435,0.575516,0.373882,0.004633,0.02172,0.00161,1.482624,0.000426
32,38003,Barnes North Dakota,0.111,0.079,0.137,0.248,0.425,0.942487,0.018339,0.011426,0.009121,0.000672,1.504081,0.000605


In [99]:
county_data.pct_18_29 /= 10 

county_data.head()

Unnamed: 0,fips,county,never,rarely,sometimes,frequently,always,pct_white,pct_black,pct_american_indian,pct_asian,pct_pacific_island,pct_18_29,similarity
0,46093,Meade South Dakota,0.123,0.078,0.161,0.214,0.425,0.905125,0.019801,0.03166,0.010306,0.000918,0.152033,0.0
27,46069,Hyde South Dakota,0.142,0.053,0.142,0.234,0.43,0.874712,0.007686,0.089931,0.002306,0.000769,0.151883,4.7e-05
12,46033,Custer South Dakota,0.132,0.088,0.156,0.229,0.396,0.927998,0.005907,0.037784,0.005907,0.000223,0.149543,0.000385
8,13185,Lowndes Georgia,0.108,0.101,0.149,0.208,0.435,0.575516,0.373882,0.004633,0.02172,0.00161,0.148262,0.000426
32,38003,Barnes North Dakota,0.111,0.079,0.137,0.248,0.425,0.942487,0.018339,0.011426,0.009121,0.000672,0.150408,0.000605


In [100]:
county_data.to_csv("county_demographics_final.csv") 