<p>Module:  pull_hhs_national_10_years.py</p>
<p>Version:  March 31, 2018</p>
<p>Input:  HHS Vaccinations for six years for HHS API.</p>
<p>Output:  HHS Vaccinations Json File for six years.</p>

In [3]:
# Dependencies
import requests
import json
import numpy as np
import pandas as pd

In [4]:
# Build a HHS dataframe for multiple API calls. 
# One call per year.
df1 = pd.DataFrame()
year_list = [2012, 2013, 2014, 2015, 2016, 2017]
for year_entry in year_list:
    url = "https://fluvaccineapi.hhs.gov/api/v2/vaccination_rates/trends/{}/national.json?ethnicity=T&medicare_status=A".format(year_entry)
    national_dict = requests.get(url).json()
    df = pd.DataFrame(national_dict)
    df1 = df1.append(df, ignore_index=True)

In [5]:
# Inspect the HHS data just downloaded into a dataframe.
df1.head()

Unnamed: 0,count,disparity,ethnicity,fips,medicare_status,name,percentage,short_name,week,week_start,year
0,26043833,0.0,T,0,A,National,0.512392,US,43,01JUN,2012
1,26043833,0.0,T,0,A,National,0.512401,US,44,08JUN,2012
2,26043833,0.0,T,0,A,National,0.512409,US,45,15JUN,2012
3,26043833,0.0,T,0,A,National,0.512415,US,46,22JUN,2012
4,26043833,0.0,T,0,A,National,0.512419,US,47,29JUN,2012


In [6]:
# Save the raw HHS data to a file as a save point.
df1.to_json('../data/raw_hhs_data.json')

In [7]:
# Read the file just created into a dataframe.
# This services as a checkpoint in the process.
df2 = pd.read_json('../data/raw_hhs_data.json')

In [8]:
# Inspect the HHS data.
df2.head()

Unnamed: 0,count,disparity,ethnicity,fips,medicare_status,name,percentage,short_name,week,week_start,year
0,26043833,0,T,0,A,National,0.512392,US,43,01JUN,2012
1,26043833,0,T,0,A,National,0.512401,US,44,08JUN,2012
10,27278865,0,T,0,A,National,0.000349,US,1,10AUG,2013
100,28830312,0,T,0,A,National,0.501679,US,39,02MAY,2014
101,28830312,0,T,0,A,National,0.501692,US,40,09MAY,2014


In [9]:
# Select only the attributes needed.
df3 = df2[['count', 'percentage', 'year', 'week']]

In [10]:
# Rename columns to match the CCD data
df4 = df3.rename(columns={"count":"vaccinations", "percentage":"vac_percent"})

In [11]:
# Sort the HHS data by year and week to aid in analysis.
df5 = df4.sort_values(by=['year', 'week'])

In [12]:
# Inspect HHS data.
df5.head()

Unnamed: 0,vaccinations,vac_percent,year,week
0,26043833,0.512392,2012,43
1,26043833,0.512401,2012,44
2,26043833,0.512409,2012,45
3,26043833,0.512415,2012,46
4,26043833,0.512419,2012,47


In [13]:
# Inspect HHS data.
df5.tail()

Unnamed: 0,vaccinations,vac_percent,year,week
238,31422333,0.458187,2017,21
239,31422333,0.459196,2017,22
240,31422333,0.459847,2017,23
241,31422333,0.459873,2017,24
242,31422333,0.459873,2017,25


In [14]:
# Add a "vaccinations percent by week" attribute to the HHS data.
df5['vac_pct_week'] = 0.0

In [15]:
# Convert the vaccination percentage from a ration to a percentage.
df5['vac_percent'] = df5['vac_percent'] * 100

In [16]:
# Inspect the HHS dataframe.
df5.head()

Unnamed: 0,vaccinations,vac_percent,year,week,vac_pct_week
0,26043833,51.2392,2012,43,0.0
1,26043833,51.2401,2012,44,0.0
2,26043833,51.2409,2012,45,0.0
3,26043833,51.2415,2012,46,0.0
4,26043833,51.2419,2012,47,0.0


In [17]:
# Calculate vaccination rate percentage per week 
# by finding the difference in the culmulative vaccination
# rate between weeks.
for index_entry in df5.index:
    if df5.loc[index_entry, 'week'] == 1:
        df5.loc[index_entry, 'vac_pct_week'] = df5.loc[index_entry, 'vac_percent']
    elif index_entry > 0:
        i = index_entry - 1
        df5.loc[index_entry, 'vac_pct_week'] = df5.loc[index_entry, 'vac_percent'] - df5.loc[i, 'vac_percent']

In [19]:
# Inspect the HHS dataframe for the calculated vaccination rate per week.
df5.tail()

Unnamed: 0,vaccinations,vac_percent,year,week,vac_pct_week
238,31422333,45.8187,2017,21,0.2095
239,31422333,45.9196,2017,22,0.1009
240,31422333,45.9847,2017,23,0.0651
241,31422333,45.9873,2017,24,0.0026
242,31422333,45.9873,2017,25,0.0


In [20]:
# Write the HHS data for ten years at the national level to a json file.
df5.to_json('../data/hhs_national_10_years.json')

In [21]:
# Validate the above file by loading it into a dataframe.
df6 = pd.read_json('../data/hhs_national_10_years.json')

In [22]:
# Sort the HHS data into year, week sequence to aid in inspection.
df7 = df6.sort_values(['year', 'week'])

In [23]:
# Inspect the HHS data.
df7.head()

Unnamed: 0,vac_pct_week,vac_percent,vaccinations,week,year
0,0.0,51.2392,26043833,43,2012
1,0.0009,51.2401,26043833,44,2012
2,0.0008,51.2409,26043833,45,2012
3,0.0006,51.2415,26043833,46,2012
4,0.0004,51.2419,26043833,47,2012


In [24]:
# Inspect the HHS data.
df7.tail()

Unnamed: 0,vac_pct_week,vac_percent,vaccinations,week,year
238,0.2095,45.8187,31422333,21,2017
239,0.1009,45.9196,31422333,22,2017
240,0.0651,45.9847,31422333,23,2017
241,0.0026,45.9873,31422333,24,2017
242,0.0,45.9873,31422333,25,2017
