
## CDC case counts
Data is scraped from https://www.cdc.gov/coronavirus/2019-ncov/cases-updates/cases-in-us.html

In [1]:
import pandas as pd

import plotly.graph_objects as go
import plotly.express as px

In [2]:
# This data is prepared by running `python data.py` 
df = pd.read_csv('../data/cases_by_onset.csv', parse_dates=['date_time', 'report_date'])

In [3]:
df.head()

Unnamed: 0,date_time,cases,report_date
0,2020-01-12,0,2020-04-02
1,2020-01-13,0,2020-04-02
2,2020-01-14,3,2020-04-02
3,2020-01-15,0,2020-04-02
4,2020-01-16,1,2020-04-02


## Distribution in case delays

In [4]:
df1 = df.loc[df['report_date'] == pd.to_datetime('2020-04-08')][['date_time', 'cases']]
df2 = df.loc[df['report_date'] == pd.to_datetime('2020-04-09')][['date_time', 'cases']]
df_delay = pd.merge(df1, df2, on='date_time', suffixes=('_Apr8', '_Apr9'))
df_delay['delay_days'] = (pd.to_datetime('2020-04-09') - df_delay['date_time']).dt.days
df_delay['new_cases'] = df_delay['cases_Apr9'] - df_delay['cases_Apr8']
df_delay['new_cases'].sum()
df_delay.head()

Unnamed: 0,date_time,cases_Apr8,cases_Apr9,delay_days,new_cases
0,2020-01-12,0,0,88,0
1,2020-01-13,0,0,87,0
2,2020-01-14,3,4,86,1
3,2020-01-15,2,2,85,0
4,2020-01-16,1,1,84,0


In [5]:
srs = []
for i in df_delay.index:
    srs += [df_delay.loc[i]['delay_days']] * df_delay.loc[i]['new_cases']
srs = pd.Series(srs, name='delay_days')
srs.describe()

count    74834.000000
mean        15.047291
std          8.000357
min          1.000000
25%          7.000000
50%         14.000000
75%         22.000000
max         86.000000
Name: delay_days, dtype: float64

In [10]:
(srs > 11).mean()

0.5791217895608948

In [9]:
fig = px.histogram(srs, x='delay_days')
fig.update_layout(
    title_text='Distribution of delay (report - onset) of cases reported on April 9', 
    yaxis_title_text='Cases', 
    xaxis_title_text='Delay (days)', 
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)
fig.show()

## Make CDC plot, overlaying future data

In [7]:
df1 = df.loc[df['report_date'] == pd.to_datetime('2020-04-02')][['date_time', 'cases']]
df2 = df.loc[df['report_date'] == pd.to_datetime('2020-04-09')][['date_time', 'cases']]
df_delay = pd.merge(df1, df2, on='date_time', suffixes=('_Apr2', '_Apr9'))
df_delay['new_cases'] = df_delay['cases_Apr9'] - df_delay['cases_Apr2']
df_delay['new_cases'].sum()
df_delay.head()

Unnamed: 0,date_time,cases_Apr2,cases_Apr9,new_cases
0,2020-01-12,0,0,0
1,2020-01-13,0,0,0
2,2020-01-14,3,4,1
3,2020-01-15,0,2,2
4,2020-01-16,1,1,0


In [8]:
d = df_delay.loc[df_delay['date_time'] >= '2020-02-20']
fig = go.Figure(data=[
    go.Bar(name='Cases reported by April 2', x=d['date_time'], y=d['cases_Apr2']),
    go.Bar(name='Additional reported by April 9', x=d['date_time'], y=d['new_cases'])
])
fig.add_shape(
        # Line Vertical
        dict(
            type="line",
            x0=pd.to_datetime('2020-03-23'),
            y0=0,
            x1=pd.to_datetime('2020-03-23'),
            y1=d['cases_Apr9'].max(),
            line=dict(
                color="Gray",
                width=3
            )
))
# fig.add_trace(go.Scatter(
#     x=[pd.to_datetime('2020-03-23')],
#     y=[d['cases_Apr9'].max() + 500],
#     text=["11 day stated delay"],
#     mode="text"
# ))
fig.update_layout(
    title_text='Why the CDC stated delay of 11 days is insufficient', 
    yaxis_title_text='Cases', 
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1 # gap between bars of the same location coordinates
)
fig.update_layout(barmode='stack')
fig.show()