# Health Statistics

The state data is sourced from CDC's COVID-19 Death Data and Resources database. The data link is the SODA API which is updated on a daily basis. The program filters the obtained data for US states, and focuses on mainly COVID-19 deaths. The County data is sourced from COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University. The data is updated on a daily basis.

In [5]:
# Change the directory
def change_dir(folder):
    get_path = sys.path[0].split("\\")      
    del get_path[-1]                                                   
    get_path.append(folder)                    
    path = "\\".join(get_path)              
    os.chdir(path) 

## State data

In [6]:
# Import libraries
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
import json
import requests
import os
import sys
import urllib.request
sys.path.append("../")
from pandas.tseries.offsets import MonthEnd

In [3]:
def get_cdcdata(i):
    api_query = "https://data.cdc.gov/resource/r8kw-7aab.json"
    api_query = api_query + "?month=" + str(i)
    response = requests.get(api_query)
    formattedResponse = json.loads(response.text)
    return formattedResponse

In [4]:
appended_data = pd.DataFrame()
for i in range(1,13):
    df = pd.DataFrame(get_cdcdata(i))
    appended_data = pd.concat([appended_data, df])
appended_data

Unnamed: 0,data_as_of,start_date,end_date,group,year,month,state,covid_19_deaths,total_deaths,percent_of_expected_deaths,pneumonia_deaths,pneumonia_and_covid_19_deaths,influenza_deaths,pneumonia_influenza_or_covid_19_deaths,footnote
0,2021-08-11T00:00:00.000,2020-01-01T00:00:00.000,2020-01-31T00:00:00.000,By Month,2020,1,United States,6,264680,98.00,17909,3,2124,20036,
1,2021-08-11T00:00:00.000,2021-01-01T00:00:00.000,2021-01-31T00:00:00.000,By Month,2021,1,United States,104964,372697,138.00,69945,55504,143,119486,
2,2021-08-11T00:00:00.000,2020-01-01T00:00:00.000,2020-01-31T00:00:00.000,By Month,2020,1,Alabama,,4729,94.00,282,0,35,318,One or more data cells have counts between 1-9...
3,2021-08-11T00:00:00.000,2021-01-01T00:00:00.000,2021-01-31T00:00:00.000,By Month,2021,1,Alabama,2396,7787,154.00,1159,886,,2673,One or more data cells have counts between 1-9...
4,2021-08-11T00:00:00.000,2020-01-01T00:00:00.000,2020-01-31T00:00:00.000,By Month,2020,1,Alaska,0,422,107.00,10,0,,13,One or more data cells have counts between 1-9...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49,2021-08-11T00:00:00.000,2020-12-01T00:00:00.000,2020-12-31T00:00:00.000,By Month,2020,12,Washington,937,6118,122.00,811,551,0,1197,
50,2021-08-11T00:00:00.000,2020-12-01T00:00:00.000,2020-12-31T00:00:00.000,By Month,2020,12,West Virginia,755,2993,147.00,483,344,0,894,
51,2021-08-11T00:00:00.000,2020-12-01T00:00:00.000,2020-12-31T00:00:00.000,By Month,2020,12,Wisconsin,1771,6654,140.0,790,593,,1968,One or more data cells have counts between 1-9...
52,2021-08-11T00:00:00.000,2020-12-01T00:00:00.000,2020-12-31T00:00:00.000,By Month,2020,12,Wyoming,183,611,147.00,113,83,0,213,


In [5]:
appended_data = appended_data.fillna(0)

In [6]:
# Drop irrelevant columns
appended_data.drop(appended_data.columns.difference(['year','month','state','covid_19_deaths']), 1, inplace=True)
appended_data

Unnamed: 0,year,month,state,covid_19_deaths
0,2020,1,United States,6
1,2021,1,United States,104964
2,2020,1,Alabama,0
3,2021,1,Alabama,2396
4,2020,1,Alaska,0
...,...,...,...,...
49,2020,12,Washington,937
50,2020,12,West Virginia,755
51,2020,12,Wisconsin,1771
52,2020,12,Wyoming,183


In [7]:
# Go to the Input folder
change_dir('input')  

In [8]:
# Merge state codes
state_codes = pd.read_csv("State_msa_names.csv")
merged = pd.merge(appended_data, state_codes, left_on='state', right_on='state_proper')
merged

Unnamed: 0,year,month,state,covid_19_deaths,fips,state_proper,division,region,state_abbr
0,2020,1,Alabama,0,1,Alabama,East South Central Division,South Region,AL
1,2021,1,Alabama,2396,1,Alabama,East South Central Division,South Region,AL
2,2020,2,Alabama,0,1,Alabama,East South Central Division,South Region,AL
3,2021,2,Alabama,964,1,Alabama,East South Central Division,South Region,AL
4,2020,3,Alabama,48,1,Alabama,East South Central Division,South Region,AL
...,...,...,...,...,...,...,...,...,...
1015,2021,8,Wyoming,0,56,Wyoming,Mountain Division,West Region,WY
1016,2020,9,Wyoming,12,56,Wyoming,Mountain Division,West Region,WY
1017,2020,10,Wyoming,71,56,Wyoming,Mountain Division,West Region,WY
1018,2020,11,Wyoming,163,56,Wyoming,Mountain Division,West Region,WY


In [9]:
# Save the output csv
change_dir('output')
merged.to_csv('health_stats_states.csv')

## County data - Deaths

In [53]:
# Read the dataset in csv format
data = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv')

In [54]:
data

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,...,8/1/21,8/2/21,8/3/21,8/4/21,8/5/21,8/6/21,8/7/21,8/8/21,8/9/21,8/10/21
0,84001001,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,...,114,114,114,114,114,114,114,114,114,114
1,84001003,US,USA,840,1003.0,Baldwin,Alabama,US,30.727750,-87.722071,...,329,329,329,329,330,330,330,330,330,332
2,84001005,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,...,61,61,61,61,61,63,63,63,63,63
3,84001007,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,...,65,65,65,66,66,66,66,66,66,66
4,84001009,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,...,139,139,139,140,140,140,140,140,140,140
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,84056039,US,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.589080,...,11,11,11,11,11,11,11,11,11,11
3338,84056041,US,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,...,14,14,14,14,14,14,14,14,14,14
3339,84090056,US,USA,840,90056.0,Unassigned,Wyoming,US,0.000000,0.000000,...,0,0,0,0,0,0,0,0,0,0
3340,84056043,US,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,...,26,26,26,26,26,26,26,26,26,26


In [55]:
# Transpose the date to clean up date columns
data_transpose = data.transpose().reset_index()

In [56]:
# Convert dates to a datetime format
data_transpose['index'][12:] = pd.to_datetime(data_transpose['index'][12:], infer_datetime_format=True).dt.date
dates = data_transpose.loc[12:]         # Save the dataframe as a separate dataframe
dates

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341
12,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
14,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
15,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
16,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,2021-08-06,114,330,63,66,140,42,72,335,125,...,34,15,31,9,45,11,14,0,26,6
575,2021-08-07,114,330,63,66,140,41,72,336,125,...,34,15,31,9,45,11,14,0,26,6
576,2021-08-08,114,330,63,66,140,41,72,336,125,...,34,15,31,9,45,11,14,0,26,6
577,2021-08-09,114,330,63,66,140,41,72,336,125,...,34,15,31,9,45,11,14,0,26,6


In [57]:
# Find the end date of the month for every date
dates['End Date'] = pd.to_datetime(dates['index'], infer_datetime_format=True) + MonthEnd(0)
dates

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dates['End Date'] = pd.to_datetime(dates['index'], infer_datetime_format=True) + MonthEnd(0)


Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,3333,3334,3335,3336,3337,3338,3339,3340,3341,End Date
12,2020-01-22,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
13,2020-01-23,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
14,2020-01-24,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
15,2020-01-25,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
16,2020-01-26,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574,2021-08-06,114,330,63,66,140,42,72,335,125,...,15,31,9,45,11,14,0,26,6,2021-08-31
575,2021-08-07,114,330,63,66,140,41,72,336,125,...,15,31,9,45,11,14,0,26,6,2021-08-31
576,2021-08-08,114,330,63,66,140,41,72,336,125,...,15,31,9,45,11,14,0,26,6,2021-08-31
577,2021-08-09,114,330,63,66,140,41,72,336,125,...,15,31,9,45,11,14,0,26,6,2021-08-31


In [58]:
# Filter rows for the last date of the month
dates = dates[dates['index'] == dates['End Date']]
dates.head(20)

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,3333,3334,3335,3336,3337,3338,3339,3340,3341,End Date
21,2020-01-31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-01-31
50,2020-02-29,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2020-02-29
81,2020-03-31,0,1,0,0,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,2020-03-31
111,2020-04-30,4,3,1,0,0,0,1,3,21,...,0,0,0,0,0,0,6,0,0,2020-04-30
142,2020-05-31,4,9,1,1,1,5,18,3,25,...,0,0,0,0,0,0,15,0,0,2020-05-31
172,2020-06-30,11,9,1,1,1,9,27,5,27,...,0,0,0,0,0,0,19,0,0,2020-06-30
203,2020-07-31,20,21,5,2,3,11,35,9,38,...,0,0,0,0,0,0,25,0,0,2020-07-31
234,2020-08-31,22,38,7,7,11,13,36,30,39,...,0,0,0,0,0,0,36,0,0,2020-08-31
264,2020-09-30,27,52,7,11,15,15,40,44,42,...,1,4,1,2,1,2,0,6,0,2020-09-30
295,2020-10-31,31,71,9,15,25,17,41,65,47,...,2,4,1,2,1,3,10,7,0,2020-10-31


In [59]:
# Delete the extra 'End Date' column
dates = dates.drop(['End Date'],axis=1)
dates

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341
21,2020-01-31,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
50,2020-02-29,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
81,2020-03-31,0,1,0,0,0,0,0,0,4,...,0,0,0,0,0,0,0,0,0,0
111,2020-04-30,4,3,1,0,0,0,1,3,21,...,0,0,0,0,0,0,0,6,0,0
142,2020-05-31,4,9,1,1,1,5,18,3,25,...,0,0,0,0,0,0,0,15,0,0
172,2020-06-30,11,9,1,1,1,9,27,5,27,...,0,0,0,0,0,0,0,19,0,0
203,2020-07-31,20,21,5,2,3,11,35,9,38,...,0,0,0,0,0,0,0,25,0,0
234,2020-08-31,22,38,7,7,11,13,36,30,39,...,0,0,0,0,0,0,0,36,0,0
264,2020-09-30,27,52,7,11,15,15,40,44,42,...,2,1,4,1,2,1,2,0,6,0
295,2020-10-31,31,71,9,15,25,17,41,65,47,...,3,2,4,1,2,1,3,10,7,0


In [60]:
# Retrieve the columns containing the other variables
variables = data_transpose.loc[1:11]
variables

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,3332,3333,3334,3335,3336,3337,3338,3339,3340,3341
1,iso2,US,US,US,US,US,US,US,US,US,...,US,US,US,US,US,US,US,US,US,US
2,iso3,USA,USA,USA,USA,USA,USA,USA,USA,USA,...,USA,USA,USA,USA,USA,USA,USA,USA,USA,USA
3,code3,840,840,840,840,840,840,840,840,840,...,840,840,840,840,840,840,840,840,840,840
4,FIPS,1001.0,1003.0,1005.0,1007.0,1009.0,1011.0,1013.0,1015.0,1017.0,...,56029.0,56031.0,56033.0,56035.0,56037.0,56039.0,56041.0,90056.0,56043.0,56045.0
5,Admin2,Autauga,Baldwin,Barbour,Bibb,Blount,Bullock,Butler,Calhoun,Chambers,...,Park,Platte,Sheridan,Sublette,Sweetwater,Teton,Uinta,Unassigned,Washakie,Weston
6,Province_State,Alabama,Alabama,Alabama,Alabama,Alabama,Alabama,Alabama,Alabama,Alabama,...,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming,Wyoming
7,Country_Region,US,US,US,US,US,US,US,US,US,...,US,US,US,US,US,US,US,US,US,US
8,Lat,32.539527,30.72775,31.868263,32.996421,33.982109,32.100305,31.753001,33.774837,32.913601,...,44.521575,42.132991,44.790489,42.765583,41.659439,43.935225,41.287818,0.0,43.904516,43.839612
9,Long_,-86.644082,-87.722071,-85.387129,-87.125115,-86.567906,-85.712655,-86.680575,-85.826304,-85.390727,...,-109.585283,-104.966331,-106.886239,-109.913092,-108.882788,-110.58908,-110.547578,0.0,-107.680187,-104.567488
10,Combined_Key,"Autauga, Alabama, US","Baldwin, Alabama, US","Barbour, Alabama, US","Bibb, Alabama, US","Blount, Alabama, US","Bullock, Alabama, US","Butler, Alabama, US","Calhoun, Alabama, US","Chambers, Alabama, US",...,"Park, Wyoming, US","Platte, Wyoming, US","Sheridan, Wyoming, US","Sublette, Wyoming, US","Sweetwater, Wyoming, US","Teton, Wyoming, US","Uinta, Wyoming, US","Unassigned, Wyoming, US","Washakie, Wyoming, US","Weston, Wyoming, US"


In [61]:
# Append the variables dataframe with the cleaned up dates dataframe
appended_df = variables.append(dates).transpose()
appended_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,295,325,356,387,415,446,476,507,537,568
index,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,...,2020-10-31,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
0,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",...,31,42,48,69,91,99,107,110,113,114
1,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",...,71,98,161,224,283,301,305,311,315,329
2,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",...,9,11,32,40,51,55,56,59,60,61
3,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",...,15,17,46,52,60,58,63,64,64,65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,US,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.58908,"Teton, Wyoming, US",...,1,2,4,6,9,9,9,9,11,11
3338,US,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,"Uinta, Wyoming, US",...,3,4,7,12,12,12,12,13,13,14
3339,US,USA,840,90056.0,Unassigned,Wyoming,US,0.0,0.0,"Unassigned, Wyoming, US",...,10,0,0,0,0,0,0,0,0,0
3340,US,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,"Washakie, Wyoming, US",...,7,8,19,25,26,26,26,26,26,26


In [62]:
# Clean up the headers
appended_df.columns = appended_df.iloc[0]
appended_df = appended_df.drop('index')
appended_df

index,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,...,2020-10-31,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
0,US,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",...,31,42,48,69,91,99,107,110,113,114
1,US,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",...,71,98,161,224,283,301,305,311,315,329
2,US,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",...,9,11,32,40,51,55,56,59,60,61
3,US,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",...,15,17,46,52,60,58,63,64,64,65
4,US,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",...,25,40,63,100,127,131,135,139,139,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3337,US,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.58908,"Teton, Wyoming, US",...,1,2,4,6,9,9,9,9,11,11
3338,US,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,"Uinta, Wyoming, US",...,3,4,7,12,12,12,12,13,13,14
3339,US,USA,840,90056.0,Unassigned,Wyoming,US,0.0,0.0,"Unassigned, Wyoming, US",...,10,0,0,0,0,0,0,0,0,0
3340,US,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,"Washakie, Wyoming, US",...,7,8,19,25,26,26,26,26,26,26


In [63]:
# States have unassigned county deaths - filter the actual counties in a separate dataframe
counties = appended_df[appended_df['Admin2'] != 'Unassigned']
counties_vars = counties[counties.columns[1:11]]

In [64]:
# Filter month columns for counties
counties_bymonth = counties[counties.columns[11:]]
counties_bymonth

index,2020-01-31,2020-02-29,2020-03-31,2020-04-30,2020-05-31,2020-06-30,2020-07-31,2020-08-31,2020-09-30,2020-10-31,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
0,0,0,0,4,4,11,20,22,27,31,42,48,69,91,99,107,110,113,114
1,0,0,1,3,9,9,21,38,52,71,98,161,224,283,301,305,311,315,329
2,0,0,0,1,1,1,5,7,7,9,11,32,40,51,55,56,59,60,61
3,0,0,0,0,1,1,2,7,11,15,17,46,52,60,58,63,64,64,65
4,0,0,0,0,1,1,3,11,15,25,40,63,100,127,131,135,139,139,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3336,0,0,0,0,0,0,0,0,2,2,6,16,32,34,37,37,39,40,44
3337,0,0,0,0,0,0,0,0,1,1,2,4,6,9,9,9,9,11,11
3338,0,0,0,0,0,0,0,0,2,3,4,7,12,12,12,12,13,13,14
3340,0,0,0,0,0,0,0,0,6,7,8,19,25,26,26,26,26,26,26


In [65]:
"""# Calcuate the difference between each consecutive month to get deaths per month
counties_bymonth = counties_bymonth.diff(axis=1)
counties_bymonth"""

'# Calcuate the difference between each consecutive month to get deaths per month\ncounties_bymonth = counties_bymonth.diff(axis=1)\ncounties_bymonth'

In [66]:
merged_counties = pd.concat([counties_vars, counties_bymonth], axis=1)
merged_counties

index,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population,...,2020-10-31,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
0,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",55869,...,31,42,48,69,91,99,107,110,113,114
1,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",223234,...,71,98,161,224,283,301,305,311,315,329
2,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",24686,...,9,11,32,40,51,55,56,59,60,61
3,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",22394,...,15,17,46,52,60,58,63,64,64,65
4,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",57826,...,25,40,63,100,127,131,135,139,139,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3336,USA,840,56037.0,Sweetwater,Wyoming,US,41.659439,-108.882788,"Sweetwater, Wyoming, US",42343,...,2,6,16,32,34,37,37,39,40,44
3337,USA,840,56039.0,Teton,Wyoming,US,43.935225,-110.58908,"Teton, Wyoming, US",23464,...,1,2,4,6,9,9,9,9,11,11
3338,USA,840,56041.0,Uinta,Wyoming,US,41.287818,-110.547578,"Uinta, Wyoming, US",20226,...,3,4,7,12,12,12,12,13,13,14
3340,USA,840,56043.0,Washakie,Wyoming,US,43.904516,-107.680187,"Washakie, Wyoming, US",7805,...,7,8,19,25,26,26,26,26,26,26


In [67]:
# Filter the unassigned counties and merge it with the counties dataset
unassigned_counties = appended_df[appended_df['Admin2'] == 'Unassigned']
merged_all_counties = merged_counties.append(unassigned_counties)
merged_all_counties

index,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Population,...,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31,iso2
0,USA,840,1001.0,Autauga,Alabama,US,32.539527,-86.644082,"Autauga, Alabama, US",55869,...,42,48,69,91,99,107,110,113,114,
1,USA,840,1003.0,Baldwin,Alabama,US,30.72775,-87.722071,"Baldwin, Alabama, US",223234,...,98,161,224,283,301,305,311,315,329,
2,USA,840,1005.0,Barbour,Alabama,US,31.868263,-85.387129,"Barbour, Alabama, US",24686,...,11,32,40,51,55,56,59,60,61,
3,USA,840,1007.0,Bibb,Alabama,US,32.996421,-87.125115,"Bibb, Alabama, US",22394,...,17,46,52,60,58,63,64,64,65,
4,USA,840,1009.0,Blount,Alabama,US,33.982109,-86.567906,"Blount, Alabama, US",57826,...,40,63,100,127,131,135,139,139,139,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,USA,840,90051.0,Unassigned,Virginia,US,0.0,0.0,"Unassigned, Virginia, US",0,...,0,0,0,0,0,0,0,0,0,US
3180,USA,840,90053.0,Unassigned,Washington,US,0.0,0.0,"Unassigned, Washington, US",0,...,3,5,4,4,4,3,3,3,3,US
3235,USA,840,90054.0,Unassigned,West Virginia,US,0.0,0.0,"Unassigned, West Virginia, US",0,...,0,0,0,0,0,0,0,0,0,US
3306,USA,840,90055.0,Unassigned,Wisconsin,US,0.0,0.0,"Unassigned, Wisconsin, US",0,...,0,0,0,0,0,0,0,0,0,US


In [68]:
# Drop irrelevant columns
merged_all_counties.drop((['iso2','iso3','code3','Lat','Long_']), 1, inplace=True)
merged_all_counties

index,FIPS,Admin2,Province_State,Country_Region,Combined_Key,Population,2020-01-31,2020-02-29,2020-03-31,2020-04-30,...,2020-10-31,2020-11-30,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
0,1001.0,Autauga,Alabama,US,"Autauga, Alabama, US",55869,0,0,0,4,...,31,42,48,69,91,99,107,110,113,114
1,1003.0,Baldwin,Alabama,US,"Baldwin, Alabama, US",223234,0,0,1,3,...,71,98,161,224,283,301,305,311,315,329
2,1005.0,Barbour,Alabama,US,"Barbour, Alabama, US",24686,0,0,0,1,...,9,11,32,40,51,55,56,59,60,61
3,1007.0,Bibb,Alabama,US,"Bibb, Alabama, US",22394,0,0,0,0,...,15,17,46,52,60,58,63,64,64,65
4,1009.0,Blount,Alabama,US,"Blount, Alabama, US",57826,0,0,0,0,...,25,40,63,100,127,131,135,139,139,139
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,90051.0,Unassigned,Virginia,US,"Unassigned, Virginia, US",0,0,0,12,0,...,0,0,0,0,0,0,0,0,0,0
3180,90053.0,Unassigned,Washington,US,"Unassigned, Washington, US",0,0,0,0,0,...,3,3,5,4,4,4,3,3,3,3
3235,90054.0,Unassigned,West Virginia,US,"Unassigned, West Virginia, US",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3306,90055.0,Unassigned,Wisconsin,US,"Unassigned, Wisconsin, US",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [69]:
# Convert the output to csv
change_dir('output')
merged_all_counties.to_csv("health_stats_county.csv")

## County data - Vaccinations

In [18]:
# CDC Vaccination API
change_dir('input')
url = 'https://data.cdc.gov/api/views/8xkx-amqh/rows.csv?accessType=DOWNLOAD'
urllib.request.urlretrieve(url, 'vaccinations.csv')
vaccines = pd.read_csv('vaccinations.csv')
vaccines

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,Date,FIPS,MMWR_week,Recip_County,Recip_State,Series_Complete_Pop_Pct,Series_Complete_Yes,Series_Complete_12Plus,Series_Complete_12PlusPop_Pct,Series_Complete_18Plus,...,Administered_Dose1_Recip_12PlusPop_Pct,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_18PlusPop_Pct,Administered_Dose1_Recip_65Plus,Administered_Dose1_Recip_65PlusPop_Pct,SVI_CTGY,Series_Complete_Pop_Pct_SVI,Series_Complete_12PlusPop_Pct_SVI,Series_Complete_18PlusPop_Pct_SVI,Series_Complete_65PlusPop_Pct_SVI
0,08/11/2021,13095,32,Dougherty County,GA,15.2,13401,13401.0,18.1,12517,...,0.0,,0.0,,0.0,High,Low VC/High SVI,Low VC/High SVI,Low VC/High SVI,Low VC/High SVI
1,08/11/2021,08027,32,Custer County,CO,39.4,1995,1995.0,43.4,1979,...,46.8,2127.0,48.7,1158.0,69.2,Low,Low-Mod VC/Low SVI,Mod-High VC/Low SVI,Mod-High VC/Low SVI,Mod-High VC/Low SVI
2,08/11/2021,05127,32,Scott County,AR,30.2,3102,3099.0,35.4,3028,...,45.5,3849.0,48.6,1432.0,64.9,High,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Low-Mod VC/High SVI,Mod-High VC/High SVI
3,08/11/2021,16071,32,Oneida County,ID,30.2,1367,,,1366,...,,1540.0,47.4,666.0,71.9,Low-Mod,Low-Mod VC/Low-Mod SVI,Low-Mod VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI,Mod-High VC/Low-Mod SVI
4,08/11/2021,28037,32,Franklin County,MS,32.2,2483,2483.0,37.5,2446,...,45.0,2848.0,47.7,1080.0,67.4,Mod-High,Low-Mod VC/Mod-High SVI,Low-Mod VC/Mod-High SVI,Mod-High VC/Mod-High SVI,Mod-High VC/Mod-High SVI
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
793997,12/13/2020,53003,51,Asotin County,WA,0.0,0,0.0,0.0,0,...,0.0,,0.0,,0.0,Low-Mod,,,,
793998,12/13/2020,46127,51,Union County,SD,0.0,0,0.0,0.0,0,...,0.0,,0.0,,0.0,Low,,,,
793999,12/13/2020,48317,51,Martin County,TX,0.0,0,0.0,0.0,0,...,0.0,,0.0,,0.0,Low-Mod,,,,
794000,12/13/2020,6019,51,Fresno County,CA,0.0,0,0.0,0.0,0,...,0.0,,0.0,,0.0,High,,,,


In [19]:
vaccines.columns

Index(['Date', 'FIPS', 'MMWR_week', 'Recip_County', 'Recip_State',
       'Series_Complete_Pop_Pct', 'Series_Complete_Yes',
       'Series_Complete_12Plus', 'Series_Complete_12PlusPop_Pct',
       'Series_Complete_18Plus', 'Series_Complete_18PlusPop_Pct',
       'Series_Complete_65Plus', 'Series_Complete_65PlusPop_Pct',
       'Completeness_pct', 'Administered_Dose1_Recip',
       'Administered_Dose1_Pop_Pct', 'Administered_Dose1_Recip_12Plus',
       'Administered_Dose1_Recip_12PlusPop_Pct',
       'Administered_Dose1_Recip_18Plus',
       'Administered_Dose1_Recip_18PlusPop_Pct',
       'Administered_Dose1_Recip_65Plus',
       'Administered_Dose1_Recip_65PlusPop_Pct', 'SVI_CTGY',
       'Series_Complete_Pop_Pct_SVI', 'Series_Complete_12PlusPop_Pct_SVI',
       'Series_Complete_18PlusPop_Pct_SVI',
       'Series_Complete_65PlusPop_Pct_SVI'],
      dtype='object')

In [20]:
# Drop irrelevant columns
vaccines.drop(vaccines.columns.difference(['Date','FIPS','Recip_County','Recip_State','Series_Complete_Yes']), 1, inplace=True)
vaccines

Unnamed: 0,Date,FIPS,Recip_County,Recip_State,Series_Complete_Yes
0,08/11/2021,13095,Dougherty County,GA,13401
1,08/11/2021,08027,Custer County,CO,1995
2,08/11/2021,05127,Scott County,AR,3102
3,08/11/2021,16071,Oneida County,ID,1367
4,08/11/2021,28037,Franklin County,MS,2483
...,...,...,...,...,...
793997,12/13/2020,53003,Asotin County,WA,0
793998,12/13/2020,46127,Union County,SD,0
793999,12/13/2020,48317,Martin County,TX,0
794000,12/13/2020,6019,Fresno County,CA,0


In [21]:
vaccines['Date'] = pd.to_datetime(vaccines['Date'], infer_datetime_format=True)

In [22]:
# Find the end date of the month for every date
vaccines['End Date'] = pd.to_datetime(vaccines['Date'], infer_datetime_format=True) + MonthEnd(0)
vaccines

Unnamed: 0,Date,FIPS,Recip_County,Recip_State,Series_Complete_Yes,End Date
0,2021-08-11,13095,Dougherty County,GA,13401,2021-08-31
1,2021-08-11,08027,Custer County,CO,1995,2021-08-31
2,2021-08-11,05127,Scott County,AR,3102,2021-08-31
3,2021-08-11,16071,Oneida County,ID,1367,2021-08-31
4,2021-08-11,28037,Franklin County,MS,2483,2021-08-31
...,...,...,...,...,...,...
793997,2020-12-13,53003,Asotin County,WA,0,2020-12-31
793998,2020-12-13,46127,Union County,SD,0,2020-12-31
793999,2020-12-13,48317,Martin County,TX,0,2020-12-31
794000,2020-12-13,6019,Fresno County,CA,0,2020-12-31


In [23]:
# Filter rows for the last date of the month
vaccines = vaccines[vaccines['Date'] == vaccines['End Date']]
vaccines

Unnamed: 0,Date,FIPS,Recip_County,Recip_State,Series_Complete_Yes,End Date
36102,2021-07-31,16041,Franklin County,ID,4011,2021-07-31
36103,2021-07-31,51185,Tazewell County,VA,10118,2021-07-31
36104,2021-07-31,30087,Rosebud County,MT,5363,2021-07-31
36105,2021-07-31,46083,Lincoln County,SD,22037,2021-07-31
36106,2021-07-31,49017,Garfield County,UT,2162,2021-07-31
...,...,...,...,...,...,...
735163,2020-12-31,53005,Benton County,WA,0,2020-12-31
735164,2020-12-31,20157,Republic County,KS,0,2020-12-31
735165,2020-12-31,48063,Camp County,TX,0,2020-12-31
735166,2020-12-31,48429,Stephens County,TX,0,2020-12-31


In [24]:
vaccines['Date'].unique()

array(['2021-07-31T00:00:00.000000000', '2021-06-30T00:00:00.000000000',
       '2021-05-31T00:00:00.000000000', '2021-04-30T00:00:00.000000000',
       '2021-03-31T00:00:00.000000000', '2021-02-28T00:00:00.000000000',
       '2021-01-31T00:00:00.000000000', '2020-12-31T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [25]:
# Parse months as columns 
vaccines = vaccines.set_index(['FIPS','Recip_County','Recip_State','Date'])['Series_Complete_Yes'].unstack()
vaccines

Unnamed: 0_level_0,Unnamed: 1_level_0,Date,2020-12-31,2021-01-31,2021-02-28,2021-03-31,2021-04-30,2021-05-31,2021-06-30,2021-07-31
FIPS,Recip_County,Recip_State,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
01001,Autauga County,AL,0.0,439.0,1919.0,5091.0,9585.0,12227.0,13813.0,14671.0
01003,Baldwin County,AL,0.0,2817.0,16356.0,32263.0,49335.0,59021.0,67145.0,70605.0
01005,Barbour County,AL,0.0,121.0,1027.0,2584.0,4031.0,4936.0,6086.0,6865.0
01007,Bibb County,AL,0.0,188.0,1319.0,2148.0,3245.0,3895.0,4708.0,5092.0
01009,Blount County,AL,0.0,367.0,2479.0,4327.0,7405.0,9413.0,10654.0,11231.0
...,...,...,...,...,...,...,...,...,...,...
UNK,Unknown County,VT,17.0,4089.0,12367.0,27902.0,58670.0,90896.0,107199.0,110221.0
UNK,Unknown County,WA,77.0,7882.0,28448.0,55233.0,106183.0,143210.0,161016.0,168345.0
UNK,Unknown County,WI,42.0,5236.0,13056.0,24780.0,58439.0,77947.0,85447.0,89189.0
UNK,Unknown County,WV,6.0,32326.0,94673.0,154901.0,253455.0,279854.0,292196.0,295907.0


In [26]:
# Calcuate the difference between each consecutive month to get deaths per month
#vaccines = vaccines.diff(axis=1)
#vaccines

In [28]:
# Convert the output to csv - keep the cumulative vaccines
change_dir('output')
vaccines.to_csv("vaccines.csv")