In [None]:
%matplotlib inline

import requests
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from time import sleep
import seaborn as sns
sns.set()

In [None]:
data_path = r'./data/cdcpniweekly/'
if not os.path.exists(data_path):
    os.makedirs(data_path)

Uncomment this code to pull the data down to your local machine.

# Please don't repeatedly pull down the data as it creates unnecessary work for the CDC's servers. I have included the result of running this code in the repo. So unless you want to grab more recent snapshots down the road there's no point in running it.

In [None]:
years = range(2009, 2021)
weeks = range(1, 54)

In [None]:
## CDC DATA Format
## https://www.cdc.gov/flu/weekly/weeklyarchives2017-2018/data/nchsdata42.csv
# for year in years:
#     for week in weeks:
#         if (year == 2009 and week < 40) or (year == 2020 and week > 13):
#             next
#         else: 
#             request_url = ('https://www.cdc.gov/flu/weekly/weeklyarchives' +
#                            str(year - 1) + '-' + str(year) +
#                            '/data/nchsdata' + str(week).zfill(2) + '.csv')
#             print(request_url)
#             request = requests.get(request_url)
#             isStatusOK = request.status_code == requests.codes.ok
#             if isStatusOK:
#                 decoded_content = request.content.decode('utf-8')
#                 with open('./data/cdcpniweekly/' + str(year) + '-' + str(week).zfill(2) + '.csv', 'w') as my_data_file:
#                     print('isOK')
#                     my_data_file.write(decoded_content)

In [None]:
weekly_reports = []
path = data_path # use your path
all_files = glob.glob(path + "/*.csv")
for filename in all_files:
#     print(filename)
    if 'combined' not in filename:
        year = filename[20:24]
        month = filename[25:27]
        df = pd.read_csv(filename, index_col=None, header=0)
        df['snapshotYear'] = year
        df['snapshotWeek'] = month
        df['snapshot'] = year + month
        weekly_reports.append(df)
combined_data = pd.concat(weekly_reports, ignore_index=True)

In [None]:
combined_data.to_csv(data_path + 'combined.csv')

In [None]:
combined_data['snapshot'].unique()

In [None]:
snapshot_id = '201604'
data = combined_data.query('snapshot == @snapshot_id')
data.head(3)

In [None]:
combined_data['snapshot'].unique()

In [None]:
snapshot_ids = list(combined_data['snapshot'].unique())
snaptshot_ids = snapshot_ids.sort()

In [None]:
snapshots = {}
for snapshot in snapshot_ids:
    snapshots[snapshot] = combined_data.query('snapshot == @snapshot').pivot_table(
        values='All Deaths',
        columns='Year',
        index='Week')

In [None]:
[""]*12

In [None]:
# CDC DATA Format
dash_styles = [""]*15

colors = sns.color_palette("husl", 12)

pallete = {
    2009: colors[0],
    2010: colors[1],
    2011: colors[2],
    2012: colors[3],
    2013: colors[4],
    2014: colors[5],
    2015: colors[6],
    2016: colors[7],
    2017: colors[8],
    2018: colors[9],
    2019: colors[10],
    2020: colors[11],
}

sns.set_style("dark")
sns.set_context("notebook", font_scale=2.5, rc={"lines.linewidth": 5})
def plot_data(x):
    plt = sns.relplot(kind="line",
                      data=snapshots[x],
                      dashes=dash_styles,
                      height=9,
                      aspect=1.5,
                      hue="Year",
                      palette=pallete)
    plt.set(ylim=(0,70000))
    plt.set(xlim=(0,53))
    plt.set(title="All Deaths as of " + x[:4] + ' week ' + x[-2:])
    plt.set(xlabel='Week of Year', ylabel='Death Count')

w = widgets.Dropdown(options=snapshot_ids, value=snapshot_ids[0])
interact(plot_data, x=w)

This look is what creates the animation

In [None]:
for value in snapshot_ids:
    w.value = value