# Interactive Plots of COVID-19 Data
This is a notebook to interact with COVID-19 data using [Jupyter](https://jupyter.org/) and [Hvplot](https://hvplot.holoviz.org/). Currently we are focused on data from the US but may expand our analyses in the near future.

## Load Johns Hopkins COVID-19 Data
Here we load the COVID-19 confirmed case data from the [The Center for Systems Science and Engineering (CSSE)](https://systems.jhu.edu) at Johns Hopkins University. The CCSE COVID-19 [GitHub Repo](https://github.com/CSSEGISandData/COVID-19) has more information about these data and their sources.

In [255]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 6)
import hvplot.pandas
import datetime


In [256]:
dr='https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'

In [257]:
tday=datetime.date.today()
tday

datetime.date(2020, 4, 2)

In [258]:
tday=datetime.date.today()
day=datetime.timedelta(days=1)
yday=tday-day*1
fname=yday.strftime('%m-%d-%Y.csv')

In [259]:
src = dr + fname
src

'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-01-2020.csv'

In [260]:
df1 = pd.read_csv(src)
df1

Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
0,45001.0,Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4,0,0,0,"Abbeville, South Carolina, US"
1,22001.0,Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47,1,0,0,"Acadia, Louisiana, US"
2,51001.0,Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7,0,0,0,"Accomack, Virginia, US"
...,...,...,...,...,...,...,...,...,...,...,...,...
2482,,,,West Bank and Gaza,2020-04-01 21:58:34,31.952200,35.233200,134,1,18,115,West Bank and Gaza
2483,,,,Zambia,2020-04-01 21:58:34,-13.133897,27.849332,36,0,0,36,Zambia
2484,,,,Zimbabwe,2020-04-01 21:58:34,-19.015438,29.154857,8,1,0,7,Zimbabwe


In [261]:
dfus=df1[['Combined_Key','Admin2','Province_State','Country_Region','Last_Update', 'Lat', 'Long_','Confirmed']]
dfus=dfus[dfus.Country_Region=='US']
dfus.reset_index(drop=True, inplace=True)
dfus.set_index('Combined_Key', inplace=True)
dfus

Unnamed: 0_level_0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-04-01 21:58:49,18.220800,-66.590100,286
"Recovered, US",,Recovered,US,2020-04-01 21:58:49,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-04-01 21:58:49,18.335800,-64.896300,30


In [262]:
#dOld=dfus['Last_Update'][0]
dOld=pd.to_datetime(dfus['Last_Update'][0]).date()
dOld

datetime.date(2020, 4, 1)

In [265]:
dfus1=dfus.rename(columns={'Confirmed':dOld})

In [266]:
#dfus1=dfus.copy()
dfus1

Unnamed: 0_level_0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-04-01 21:58:49,18.220800,-66.590100,286
"Recovered, US",,Recovered,US,2020-04-01 21:58:49,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-04-01 21:58:49,18.335800,-64.896300,30


In [267]:
i=2
dday=tday-day*i
fname2=dday.strftime('%m-%d-%Y.csv')

In [268]:
df2=pd.read_csv(dr+fname2)

In [269]:
fname2

'03-31-2020.csv'

In [270]:
dfus=df2[['Combined_Key','Admin2','Province_State','Country_Region','Last_Update', 'Lat', 'Long_','Confirmed']]
dfus=dfus[dfus.Country_Region=='US']
dfus.reset_index(drop=True, inplace=True)
dfus.set_index('Combined_Key', inplace=True)
dfus

Unnamed: 0_level_0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-03-31 23:43:56,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-03-31 23:43:56,30.295065,-92.414197,39
"Accomack, Virginia, US",Accomack,Virginia,US,2020-03-31 23:43:56,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-03-31 23:43:56,18.220800,-66.590100,239
"Recovered, US",,Recovered,US,2020-03-31 23:43:56,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-03-31 23:43:56,18.335800,-64.896300,30


In [283]:
dNew=pd.to_datetime(dfus['Last_Update'][0]).date()
dNew

datetime.date(2020, 3, 31)

In [284]:
dfus2=dfus.rename(columns={
                     'Admin2':'aNew', 
                     'Province_State': 'psNew',
                     'Country_Region': 'crNew', 
                     'Last_Update': 'luNew',
                     'Lat': 'latNew',
                     'Long_': 'lonNew'})

In [285]:
#dfus2=dfus.copy()
dfus2

Unnamed: 0_level_0,aNew,psNew,crNew,luNew,latNew,lonNew,Confirmed
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-03-31 23:43:56,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-03-31 23:43:56,30.295065,-92.414197,39
"Accomack, Virginia, US",Accomack,Virginia,US,2020-03-31 23:43:56,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-03-31 23:43:56,18.220800,-66.590100,239
"Recovered, US",,Recovered,US,2020-03-31 23:43:56,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-03-31 23:43:56,18.335800,-64.896300,30


In [286]:
dfusc=pd.concat([dfus1,dfus2], axis=1, join='outer')
dfusc

Unnamed: 0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01,aNew,psNew,crNew,luNew,latNew,lonNew,Confirmed
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4.0,Abbeville,South Carolina,US,2020-03-31 23:43:56,34.223334,-82.461707,4.0
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47.0,Acadia,Louisiana,US,2020-03-31 23:43:56,30.295065,-92.414197,39.0
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7.0,Accomack,Virginia,US,2020-03-31 23:43:56,37.767072,-75.632346,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Navajo, Arizona, US",,,,,,,,Navajo,Arizona,US,2020-03-31 23:43:56,35.399771,-110.321898,91.0
"San Juan, New Mexico, US",,,,,,,,San Juan,New Mexico,US,2020-03-31 23:43:56,36.508383,-108.320437,32.0
"Unassigned, Delaware, US",,,,,,,,Unassigned,Delaware,US,2020-03-31 23:43:56,0.000000,0.000000,0.0


In [287]:
#df1.set_index('Combined_Key', inplace=True)
#df2.set_index('Combined_Key', inplace=True)
#dfc=pd.merge(df1, df2, on=['Combined_Key','Combined_Key'])
#dfc

## Now clean the table

In [288]:
dfusc.rename(columns={'Confirmed':dNew}, inplace=True)

In [289]:
dfusc

Unnamed: 0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01,aNew,psNew,crNew,luNew,latNew,lonNew,2020-03-31
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4.0,Abbeville,South Carolina,US,2020-03-31 23:43:56,34.223334,-82.461707,4.0
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47.0,Acadia,Louisiana,US,2020-03-31 23:43:56,30.295065,-92.414197,39.0
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7.0,Accomack,Virginia,US,2020-03-31 23:43:56,37.767072,-75.632346,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Navajo, Arizona, US",,,,,,,,Navajo,Arizona,US,2020-03-31 23:43:56,35.399771,-110.321898,91.0
"San Juan, New Mexico, US",,,,,,,,San Juan,New Mexico,US,2020-03-31 23:43:56,36.508383,-108.320437,32.0
"Unassigned, Delaware, US",,,,,,,,Unassigned,Delaware,US,2020-03-31 23:43:56,0.000000,0.000000,0.0


In [290]:
dfusc.drop(columns={'aNew','psNew', 'crNew', 'luNew', 'lonNew', 'latNew'}, inplace=True)

In [291]:
dfusc

Unnamed: 0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01,2020-03-31
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4.0,4.0
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47.0,39.0
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7.0,7.0
...,...,...,...,...,...,...,...,...
"Navajo, Arizona, US",,,,,,,,91.0
"San Juan, New Mexico, US",,,,,,,,32.0
"Unassigned, Delaware, US",,,,,,,,0.0


## Make a function

In [292]:
dfus1

Unnamed: 0_level_0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-04-01 21:58:49,18.220800,-66.590100,286
"Recovered, US",,Recovered,US,2020-04-01 21:58:49,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-04-01 21:58:49,18.335800,-64.896300,30


In [354]:
def appendData(dfAll,i,tday):
        print(i)
        #print(id(dfAll))
        #print(dfAll.columns)
        day=datetime.timedelta(days=1)
        dday=yday-day*i
        fname2=dday.strftime('%m-%d-%Y.csv')
        df2=pd.read_csv(dr+fname2)
        dfus=df2[['Combined_Key','Admin2','Province_State','Country_Region','Last_Update', 'Lat', 'Long_','Confirmed']]
        dfus=dfus[dfus.Country_Region=='US']
        dfus.reset_index(drop=True, inplace=True)
        dfus.set_index('Combined_Key', inplace=True)
        dNew=pd.to_datetime(dfus['Last_Update'][0]).date()
        print(dNew)
        dfus2=dfus.rename(columns={
                             'Admin2':'aNew', 
                             'Province_State': 'psNew',
                             'Country_Region': 'crNew', 
                             'Last_Update': 'luNew',
                             'Lat': 'latNew',
                             'Long_': 'lonNew'})
        #dfus2=dfus.copy()
        dfusc=pd.concat([dfAll,dfus2], axis=1, join='outer')
        dfusc2=dfusc.rename(columns={'Confirmed':dNew})
        dfusc3=dfusc2.drop(columns={'aNew','psNew', 'crNew', 'luNew', 'lonNew', 'latNew'})
        dfusc3.iloc[:,-1]=pd.to_numeric(dfusc3.iloc[:,-1], errors='ignore', downcast='float')
        return dfusc3

In [355]:
#dfTest=appendData(dfus1,1,tday)
#dfTest

## Run the function

In [356]:
dfus1

Unnamed: 0_level_0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01
Combined_Key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7
...,...,...,...,...,...,...,...
"Puerto Rico, US",,Puerto Rico,US,2020-04-01 21:58:49,18.220800,-66.590100,286
"Recovered, US",,Recovered,US,2020-04-01 21:58:49,0.000000,0.000000,0
",Virgin Islands,US",,Virgin Islands,US,2020-04-01 21:58:49,18.335800,-64.896300,30


In [357]:
#ndays=pd.to_datetime(dOld).date()-pd.to_datetime(dNew).date()
deltaDay=datetime.date.today()-datetime.date(2020,3,23)
ndays=int(deltaDay/day)
ndays


10

In [358]:
days=range(1, ndays, 1)
tday=datetime.date.today()

In [413]:
dfAll=dfus1.copy()
for i in days:
    dfAll=appendData(dfAll,i,tday)
    #print('outside')
    #print(dfAll.columns)

1
2020-03-31
2
2020-03-30
3
2020-03-29
4
2020-03-28
5
2020-03-27
6
2020-03-26
7
2020-03-25
8
2020-03-24
9
2020-03-23


In [414]:
dfAll

Unnamed: 0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,2020-04-01,2020-03-31,2020-03-30,2020-03-29,2020-03-28,2020-03-27,2020-03-26,2020-03-25,2020-03-24,2020-03-23
"Abbeville, South Carolina, US",Abbeville,South Carolina,US,2020-04-01 21:58:49,34.223334,-82.461707,4.0,4.0,3.0,3.0,3.0,4.0,3.0,3.0,1.0,1.0
"Acadia, Louisiana, US",Acadia,Louisiana,US,2020-04-01 21:58:49,30.295065,-92.414197,47.0,39.0,11.0,9.0,9.0,8.0,3.0,2.0,2.0,1.0
"Accomack, Virginia, US",Accomack,Virginia,US,2020-04-01 21:58:49,37.767072,-75.632346,7.0,7.0,6.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Hawaii,Hawaii,US",,,,,,,,,,,,,,,,3.0
"Unassigned, Indiana, US",,,,,,,,,,,,,,,,56.0
"Walla Walla County, Washington, US",,,,,,,,,,,,,,,,1.0


In [415]:
dfUS=dfAll[dfAll.Country_Region=='US']

In [416]:
dfUS.Admin2.fillna('Total', inplace=True)
#dfm['id'] = dfm[['Admin2', 'Province_State']].agg('-'.join, axis=1)
dfUS['Country_Region'] = dfUS['Admin2'] + ', ' + dfAll['Province_State']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfUS['Country_Region'] = dfUS['Admin2'] + ', ' + dfAll['Province_State']


In [417]:
dfUS.rename(columns = {'Lat':'lat', 'Long_':'lon','Country_Region':'id'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [418]:
dfUS


Unnamed: 0,Admin2,Province_State,id,Last_Update,lat,lon,2020-04-01,2020-03-31,2020-03-30,2020-03-29,2020-03-28,2020-03-27,2020-03-26,2020-03-25,2020-03-24,2020-03-23
"Abbeville, South Carolina, US",Abbeville,South Carolina,"Abbeville, South Carolina",2020-04-01 21:58:49,34.223334,-82.461707,4.0,4.0,3.0,3.0,3.0,4.0,3.0,3.0,1.0,1.0
"Acadia, Louisiana, US",Acadia,Louisiana,"Acadia, Louisiana",2020-04-01 21:58:49,30.295065,-92.414197,47.0,39.0,11.0,9.0,9.0,8.0,3.0,2.0,2.0,1.0
"Accomack, Virginia, US",Accomack,Virginia,"Accomack, Virginia",2020-04-01 21:58:49,37.767072,-75.632346,7.0,7.0,6.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Puerto Rico, US",Total,Puerto Rico,"Total, Puerto Rico",2020-04-01 21:58:49,18.220800,-66.590100,286.0,239.0,174.0,127.0,100.0,79.0,64.0,51.0,39.0,31.0
"Recovered, US",Total,Recovered,"Total, Recovered",2020-04-01 21:58:49,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
",Virgin Islands,US",Total,Virgin Islands,"Total, Virgin Islands",2020-04-01 21:58:49,18.335800,-64.896300,30.0,30.0,,,,,,,,


In [419]:
dfUS.drop(columns={'Admin2','Province_State','Last_Update'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [420]:
#dfUS[(dfUS.ck=='Suffolk, Massachusetts, US')]
dfUS.loc['Suffolk, Massachusetts, US']

id            Suffolk, Massachusetts
lat                           42.328
lon                         -71.0785
                       ...          
2020-03-25                       342
2020-03-24                       234
2020-03-23                       154
Name: Suffolk, Massachusetts, US, Length: 13, dtype: object

In [421]:
dfUS

Unnamed: 0,id,lat,lon,2020-04-01,2020-03-31,2020-03-30,2020-03-29,2020-03-28,2020-03-27,2020-03-26,2020-03-25,2020-03-24,2020-03-23
"Abbeville, South Carolina, US","Abbeville, South Carolina",34.223334,-82.461707,4.0,4.0,3.0,3.0,3.0,4.0,3.0,3.0,1.0,1.0
"Acadia, Louisiana, US","Acadia, Louisiana",30.295065,-92.414197,47.0,39.0,11.0,9.0,9.0,8.0,3.0,2.0,2.0,1.0
"Accomack, Virginia, US","Accomack, Virginia",37.767072,-75.632346,7.0,7.0,6.0,3.0,2.0,2.0,2.0,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Puerto Rico, US","Total, Puerto Rico",18.220800,-66.590100,286.0,239.0,174.0,127.0,100.0,79.0,64.0,51.0,39.0,31.0
"Recovered, US","Total, Recovered",0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
",Virgin Islands,US","Total, Virgin Islands",18.335800,-64.896300,30.0,30.0,,,,,,,,


In [422]:
dfm=pd.melt(dfUS, id_vars=dfUS.columns.values[0:3], var_name="Date", value_name="Value")

In [423]:
dfm

Unnamed: 0,id,lat,lon,Date,Value
0,"Abbeville, South Carolina",34.223334,-82.461707,2020-04-01,4.0
1,"Acadia, Louisiana",30.295065,-92.414197,2020-04-01,47.0
2,"Accomack, Virginia",37.767072,-75.632346,2020-04-01,7.0
...,...,...,...,...,...
22297,"Total, Puerto Rico",18.220800,-66.590100,2020-03-23,31.0
22298,"Total, Recovered",0.000000,0.000000,2020-03-23,
22299,"Total, Virgin Islands",18.335800,-64.896300,2020-03-23,


In [425]:
dfm.to_csv('US_covid_conf.csv', index=False)

In [324]:
#dff.Admin2.fillna('Total', inplace=True)
#dff.set_index(['Province_State', 'Admin2'], inplace=True)
#dff.sort_index(0)