In [1]:
import pandas as pd

In [2]:
# setup plotting
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import cufflinks as cf
cf.go_offline()

In [3]:
data = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv",
                   parse_dates=['date'],
                   usecols = ['state', 'county', 'date', 'cases', 'deaths'],
                   index_col=['state', 'county', 'date'])
data = data.sort_index()  # avoid PerformanceWarning:indexing past lexsort depth may impact performance.
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,cases,deaths
state,county,date,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,Autauga,2020-03-24,1,0
Alabama,Autauga,2020-03-25,4,0
Alabama,Autauga,2020-03-26,6,0
Alabama,Autauga,2020-03-27,6,0
Alabama,Autauga,2020-03-28,6,0
...,...,...,...,...
Wyoming,Washakie,2020-04-05,4,0
Wyoming,Washakie,2020-04-06,4,0
Wyoming,Washakie,2020-04-07,4,0
Wyoming,Washakie,2020-04-08,4,0


In [4]:
# New Cases: Difference between current day and prior day total reported cases
data['new cases'] = data.groupby(['state', 'county'])['cases'].diff()

In [5]:
# Recovered Cases: Assuming 14 day recovery period, total cases reported 14 days prior less any deaths.
recovery_period = 14  # days
shifted_cases = data.groupby(['state', 'county'])['cases'].shift(recovery_period)
#shifted_cases
data['recovered cases'] = data.apply(lambda row: shifted_cases[row.name] - row['deaths'], axis=1)

In [6]:
# Active Cases: Difference between current day total reported cases less reported deaths and recovered cases.
data['active cases'] = data.apply(lambda row: row['cases'] - row['deaths'] - row['recovered cases'], axis=1)

In [7]:
state = 'Texas'
county = 'Travis'

In [8]:
county_data = data.loc[(state, county)]
county_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-03-13,4,0,,,
2020-03-14,4,0,0.0,,
2020-03-15,6,0,2.0,,
2020-03-16,6,0,0.0,,
2020-03-17,6,0,0.0,,
2020-03-18,6,0,0.0,,
2020-03-19,7,0,1.0,,
2020-03-20,21,0,14.0,,
2020-03-21,62,0,41.0,,
2020-03-22,79,0,17.0,,


In [9]:
county_data.iplot(title=f"{county} County, {state} COVID-19")

In [10]:
data_by_state = data.groupby(['state', 'date']).sum()
data_by_state

Unnamed: 0_level_0,Unnamed: 1_level_0,cases,deaths,new cases,recovered cases,active cases
state,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,2020-03-13,6,0,0.0,0.0,0.0
Alabama,2020-03-14,12,0,5.0,0.0,0.0
Alabama,2020-03-15,23,0,8.0,0.0,0.0
Alabama,2020-03-16,29,0,6.0,0.0,0.0
Alabama,2020-03-17,39,0,8.0,0.0,0.0
...,...,...,...,...,...,...
Wyoming,2020-04-05,200,0,11.0,26.0,140.0
Wyoming,2020-04-06,213,0,13.0,28.0,147.0
Wyoming,2020-04-07,221,0,8.0,37.0,151.0
Wyoming,2020-04-08,200,0,-21.0,49.0,126.0


In [11]:
state_data = data_by_state.loc[state]
state_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-12,1,0,0.0,0.0,0.0
2020-02-13,2,0,1.0,0.0,0.0
2020-02-14,2,0,0.0,0.0,0.0
2020-02-15,2,0,0.0,0.0,0.0
2020-02-16,2,0,0.0,0.0,0.0
2020-02-17,2,0,0.0,0.0,0.0
2020-02-18,2,0,0.0,0.0,0.0
2020-02-19,2,0,0.0,0.0,0.0
2020-02-20,2,0,0.0,0.0,0.0
2020-02-21,4,0,2.0,0.0,0.0


In [12]:
state_data.iplot(title=f"{state} COVID-19")

In [13]:
us_data = data.groupby('date').sum()
us_data

Unnamed: 0_level_0,cases,deaths,new cases,recovered cases,active cases
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-21,1,0,0.0,0.0,0.0
2020-01-22,1,0,0.0,0.0,0.0
2020-01-23,1,0,0.0,0.0,0.0
2020-01-24,2,0,0.0,0.0,0.0
2020-01-25,3,0,0.0,0.0,0.0
...,...,...,...,...,...
2020-04-05,336410,9661,25523.0,24314.0,293735.0
2020-04-06,366362,11709,29248.0,32713.0,310570.0
2020-04-07,397752,12959,33514.0,41187.0,334545.0
2020-04-08,429283,14803,31464.0,54040.0,353657.0


In [14]:
us_data.iplot(title=f"United States COVID-19")