In [1]:
import pandas as pd

In [2]:
# setup plotting
import altair as alt

def line_chart(data, x, y, color, title):
    """A line chart with tooltips"""
    line = alt.Chart(data).mark_line().encode(x=x, y=y, color=color)

    # Create a selection that chooses the nearest point & selects based on x-value
    nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=[x], empty='none')

    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = alt.Chart(data).mark_point().encode(
        x=x,
        opacity=alt.value(0),
    ).add_selection(
        nearest
    )

    # Draw points on the line, and highlight based on selection
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
    )

    # Draw text labels near the points, and highlight based on selection
    text = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, y, alt.value(' '))
    )

    # Draw a rule at the location of the selection
    rules = alt.Chart(data).mark_rule(color='gray').encode(
        x=x,
    ).transform_filter(
        nearest
    )

    # Put the five layers into a chart and bind the data
    return alt.layer(
        line, selectors, points, rules, text
    ).properties(
        title=title,
        width=600, height=300
    )

In [3]:
data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv",
                   parse_dates=['date'],
                   usecols = ['state', 'county', 'date', 'cases', 'deaths'],
                   index_col=['state', 'county', 'date'])
data = data.sort_index()  # avoid PerformanceWarning:indexing past lexsort depth may impact performance.
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cases,deaths
state,county,date,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,Autauga,2020-03-24,1,0
Alabama,Autauga,2020-03-25,4,0
Alabama,Autauga,2020-03-26,6,0
Alabama,Autauga,2020-03-27,6,0
Alabama,Autauga,2020-03-28,6,0
...,...,...,...,...
Wyoming,Washakie,2020-04-24,4,0
Wyoming,Washakie,2020-04-25,4,0
Wyoming,Washakie,2020-04-26,4,0
Wyoming,Washakie,2020-04-27,4,0


In [4]:
# New Cases: Difference between current day and prior day total reported cases
data['new cases'] = data.groupby(['state', 'county'])['cases'].diff()

In [5]:
# Recovered Cases: Assuming 14 day recovery period, total cases reported 14 days prior less any deaths.
recovery_period = 14  # days
shifted_cases = data.groupby(['state', 'county'])['cases'].shift(recovery_period)
#shifted_cases
data['recovered cases'] = data.apply(lambda row: shifted_cases[row.name] - row['deaths'], axis=1)

In [6]:
# Active Cases: Difference between current day total reported cases less reported deaths and recovered cases.
data['active cases'] = data.apply(lambda row: row['cases'] - row['deaths'] - row['recovered cases'], axis=1)

In [7]:
state = 'Texas'
county = 'Travis'

In [8]:
county_data = data.loc[(state, county)]
county_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-13,4,0,,,
2020-03-14,4,0,0.0,,
2020-03-15,6,0,2.0,,
2020-03-16,6,0,0.0,,
2020-03-17,6,0,0.0,,
2020-03-18,6,0,0.0,,
2020-03-19,7,0,1.0,,
2020-03-20,21,0,14.0,,
2020-03-21,62,0,41.0,,
2020-03-22,79,0,17.0,,


In [9]:
long_form_county_data = county_data.reset_index().melt('date', var_name='indicator', value_name='count')
long_form_county_data

Unnamed: 0,date,indicator,count
0,2020-03-13,cases,4.0
1,2020-03-14,cases,4.0
2,2020-03-15,cases,6.0
3,2020-03-16,cases,6.0
4,2020-03-17,cases,6.0
...,...,...,...
230,2020-04-24,active cases,737.0
231,2020-04-25,active cases,706.0
232,2020-04-26,active cases,638.0
233,2020-04-27,active cases,608.0


In [10]:
line_chart(long_form_county_data, x='date', y='count:Q', color='indicator:N', title=f"{county} County, {state} COVID-19")

In [11]:
data_by_state = data.groupby(['state', 'date']).sum()
data_by_state

Unnamed: 0_level_0,Unnamed: 1_level_0,cases,deaths,new cases,recovered cases,active cases
state,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,2020-03-13,6,0,0.0,0.0,0.0
Alabama,2020-03-14,12,0,5.0,0.0,0.0
Alabama,2020-03-15,23,0,8.0,0.0,0.0
Alabama,2020-03-16,29,0,6.0,0.0,0.0
Alabama,2020-03-17,39,0,8.0,0.0,0.0
...,...,...,...,...,...,...
Wyoming,2020-04-24,349,7,17.0,232.0,109.0
Wyoming,2020-04-25,362,7,13.0,254.0,100.0
Wyoming,2020-04-26,371,7,9.0,263.0,101.0
Wyoming,2020-04-27,389,7,18.0,268.0,114.0


In [12]:
state_data = data_by_state.loc[state]
state_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-12,1,0,0.0,0.0,0.0
2020-02-13,2,0,1.0,0.0,0.0
2020-02-14,2,0,0.0,0.0,0.0
2020-02-15,2,0,0.0,0.0,0.0
2020-02-16,2,0,0.0,0.0,0.0
...,...,...,...,...,...
2020-04-24,23650,625,942.0,11666.0,11304.0
2020-04-25,24494,662,844.0,12547.0,11225.0
2020-04-26,25206,676,712.0,13102.0,11364.0
2020-04-27,25960,699,753.0,13790.0,11403.0


In [13]:
long_form_state_data = state_data.reset_index().melt('date', var_name='indicator', value_name='count')
long_form_state_data

Unnamed: 0,date,indicator,count
0,2020-02-12,cases,1.0
1,2020-02-13,cases,2.0
2,2020-02-14,cases,2.0
3,2020-02-15,cases,2.0
4,2020-02-16,cases,2.0
...,...,...,...
380,2020-04-24,active cases,11304.0
381,2020-04-25,active cases,11225.0
382,2020-04-26,active cases,11364.0
383,2020-04-27,active cases,11403.0


In [14]:
line_chart(long_form_state_data, x='date', y='count:Q', color='indicator:N', title=f"{state} COVID-19")

In [15]:
us_data = data.groupby('date').sum()
us_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-21,1,0,0.0,0.0,0.0
2020-01-22,1,0,0.0,0.0,0.0
2020-01-23,1,0,0.0,0.0,0.0
2020-01-24,2,0,0.0,0.0,0.0
2020-01-25,3,0,0.0,0.0,0.0
...,...,...,...,...,...
2020-04-24,904606,46503,37269.0,449910.0,407403.0
2020-04-25,938760,48334,34134.0,479640.0,409879.0
2020-04-26,965217,49464,26474.0,505477.0,409887.0
2020-04-27,988256,50836,23012.0,529845.0,407188.0


In [16]:
long_form_us_data = us_data.reset_index().melt('date', var_name='indicator', value_name='count')
long_form_us_data

Unnamed: 0,date,indicator,count
0,2020-01-21,cases,1.0
1,2020-01-22,cases,1.0
2,2020-01-23,cases,1.0
3,2020-01-24,cases,2.0
4,2020-01-25,cases,3.0
...,...,...,...
490,2020-04-24,active cases,407403.0
491,2020-04-25,active cases,409879.0
492,2020-04-26,active cases,409887.0
493,2020-04-27,active cases,407188.0


In [17]:
line_chart(long_form_us_data, x='date', y='count:Q', color='indicator:N', title="United States COVID-19")