In [112]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
import heapq
import math
import plotly.express as px
import statsmodels.formula.api as sm

In [113]:
df=pd.read_csv('./20200408_us-counties.csv',skiprows=0,nrows=100000) #as dataframe
df

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0
...,...,...,...,...,...,...
40715,2020-04-07,Sublette,Wyoming,56035.0,1,0
40716,2020-04-07,Sweetwater,Wyoming,56037.0,6,0
40717,2020-04-07,Teton,Wyoming,56039.0,44,0
40718,2020-04-07,Uinta,Wyoming,56041.0,3,0


In [115]:
#add a new column since_days
dff=df.copy(1)
dff[['date']]=dff[['date']].applymap(lambda x: datetime.datetime.strptime(x,'%Y-%m-%d').date())
dff['sdate']=dff['date']-datetime.date(2020,1,20)
dff['sdate']=dff['sdate'].dt.days
dff

Unnamed: 0,date,county,state,fips,cases,deaths,sdate
0,2020-01-21,Snohomish,Washington,53061.0,1,0,1
1,2020-01-22,Snohomish,Washington,53061.0,1,0,2
2,2020-01-23,Snohomish,Washington,53061.0,1,0,3
3,2020-01-24,Cook,Illinois,17031.0,1,0,4
4,2020-01-24,Snohomish,Washington,53061.0,1,0,4
...,...,...,...,...,...,...,...
40715,2020-04-07,Sublette,Wyoming,56035.0,1,0,78
40716,2020-04-07,Sweetwater,Wyoming,56037.0,6,0,78
40717,2020-04-07,Teton,Wyoming,56039.0,44,0,78
40718,2020-04-07,Uinta,Wyoming,56041.0,3,0,78


In [202]:
#calculate daily/accumulated deaths/cases and make dataframe
totaldays=78
t1=dff['sdate']
d1=[0]*(totaldays+1) #death
d2=[0]*(totaldays+1) #case
c1=[0]*(totaldays+1) #accumulative
c2=[0]*(totaldays+1)
for i in range(totaldays):
    list1=np.where(dff['sdate']==i+1)[0]
    c1[i+1]=np.sum(dff['deaths'][list1])
    d1[i+1]=c1[i+1]-c1[i]
    c2[i+1]=np.sum(dff['cases'][list1])
    d2[i+1]=c2[i+1]-c2[i]
df1=pd.DataFrame(np.arange(totaldays*5+5).reshape(totaldays+1,5),columns=['ddeath','adeath','dcase','acase','time'])
df1['time']=pd.DataFrame(np.arange(totaldays+1))
df1['ddeath']=pd.DataFrame(d1)
df1['adeath']=pd.DataFrame(c1)
df1['dcase']=pd.DataFrame(d2)
df1['acase']=pd.DataFrame(c2)
df1

Unnamed: 0,ddeath,adeath,dcase,acase,time
0,0,0,0,0,0
1,0,0,1,1,1
2,0,0,0,1,2
3,0,0,0,1,3
4,0,0,1,2,4
...,...,...,...,...,...
74,1634,7136,32194,276201,74
75,1369,8505,34641,310842,75
76,1156,9661,25568,336410,76
77,2048,11709,29952,366362,77


In [252]:
#plot value
import plotly
import plotly.graph_objs as go
trace0 = go.Scatter(
    x = df1['time'],
    y = df1['adeath'],
    mode = 'lines+markers',
    name = 'accmulated deaths'
)
trace1 = go.Scatter(
    x = df1['time'],
    y = df1['ddeath'],
    mode = 'lines+markers',
    name = 'daily increased deaths'
)
trace2 = go.Scatter(
    x = df1['time'],
    y = df1['acase'],
    mode = 'lines+markers',
    name = 'daily increased cases'
)
trace3 = go.Scatter(
    x = df1['time'],
    y = df1['dcase'],
    mode = 'lines+markers',
    name = 'accmulated cases'
)
data1 = [trace0,trace1]
data2 = [trace2,trace3]
layout1 = dict(title ='value of deaths of COVID-19')
layout2 = dict(title ='value of confirmed cases of COVID-19')
fig1 = dict(data =data1, layout =layout1)
fig2 = dict(data =data2, layout =layout2)
plotly.offline.iplot(fig1)
plotly.offline.iplot(fig2)

In [251]:
#plot log
trace4 = go.Scatter(
    x = df1['time'],
    y = np.log(df1['adeath']+1),
    mode = 'lines+markers',
    name = 'accmulated deaths'
)
trace5 = go.Scatter(
    x = df1['time'],
    y = np.log(df1['ddeath']+1),
    mode = 'lines+markers',
    name = 'daily increased deaths'
)
trace6 = go.Scatter(
    x = df1['time'],
    y = np.log(1+df1['acase']),
    mode = 'lines+markers',
    name = 'daily increased cases'
)
trace7 = go.Scatter(
    x = df1['time'],
    y = np.log(df1['dcase']+1),
    mode = 'lines+markers',
    name = 'accmulated cases'
)
data3 = [trace4,trace5]
data4 = [trace6,trace7]
layout3 = dict(title ='logvalue of deaths of COVID-19')
layout4 = dict(title ='logvalue of confirmed cases of COVID-19')
fig3 = dict(data =data3, layout =layout3)
fig4 = dict(data =data4, layout =layout4)
plotly.offline.iplot(fig3)
plotly.offline.iplot(fig4)

In [250]:
trace8 = go.Bar(
    x = df1['time'],
    y = df1['adeath'],
    name = 'accmulated deaths'
)
trace9 = go.Bar(
    x = df1['time'],
    y = df1['ddeath'],
    name = 'daily increased deaths'
)
trace10 = go.Bar(
    x = df1['time'],
    y = df1['acase'],
    name = 'daily increased cases'
)
trace11 = go.Bar(
    x = df1['time'],
    y = df1['dcase'],
    name = 'accmulated cases'
)
data5 = [trace8, trace9]
data6 = [trace10, trace11]
layout5 = dict(title ='Deaths of COVID-19 in the States',barmode='group')
layout6 = dict(title ='Confirmed Cases of COVID-19 in the States',barmode='group')
fig5 = go.Figure(data=data5, layout=layout5)
fig6 = go.Figure(data=data6, layout=layout6)
plotly.offline.iplot(fig5)
plotly.offline.iplot(fig6)

In [153]:
states=df['state'].unique()
print(states)
print(states.shape)

['Washington' 'Illinois' 'California' 'Arizona' 'Massachusetts'
 'Wisconsin' 'Texas' 'Nebraska' 'Utah' 'Oregon' 'Florida' 'New York'
 'Rhode Island' 'Georgia' 'New Hampshire' 'North Carolina' 'New Jersey'
 'Colorado' 'Maryland' 'Nevada' 'Tennessee' 'Hawaii' 'Indiana' 'Kentucky'
 'Minnesota' 'Oklahoma' 'Pennsylvania' 'South Carolina'
 'District of Columbia' 'Kansas' 'Missouri' 'Vermont' 'Virginia'
 'Connecticut' 'Iowa' 'Louisiana' 'Ohio' 'Michigan' 'South Dakota'
 'Arkansas' 'Delaware' 'Mississippi' 'New Mexico' 'North Dakota' 'Wyoming'
 'Alaska' 'Maine' 'Alabama' 'Idaho' 'Montana' 'Puerto Rico'
 'Virgin Islands' 'Guam' 'West Virginia' 'Northern Mariana Islands']
(55,)


In [216]:
list2=np.where(dff['sdate']==totaldays)[0]
df2=dff[list2[0]:]
df2

Unnamed: 0,date,county,state,fips,cases,deaths,sdate
38181,2020-04-07,Autauga,Alabama,1001.0,12,1,78
38182,2020-04-07,Baldwin,Alabama,1003.0,42,1,78
38183,2020-04-07,Barbour,Alabama,1005.0,3,0,78
38184,2020-04-07,Bibb,Alabama,1007.0,8,0,78
38185,2020-04-07,Blount,Alabama,1009.0,10,0,78
...,...,...,...,...,...,...,...
40715,2020-04-07,Sublette,Wyoming,56035.0,1,0,78
40716,2020-04-07,Sweetwater,Wyoming,56037.0,6,0,78
40717,2020-04-07,Teton,Wyoming,56039.0,44,0,78
40718,2020-04-07,Uinta,Wyoming,56041.0,3,0,78


In [224]:
cnt_states=55
cnt1=[0]*cnt_states #death
cnt2=[0]*cnt_states #case
for i in range(cnt_states):
    list3=np.where(df2['state']==states[i])[0]
    cnt1[i]=np.sum(df2['deaths'][list3[0]:list3[-1]+1])
    cnt2[i]=np.sum(df2['cases'][list3[0]:list3[-1]+1])

In [245]:
fig0 = {"data": [{"values": cnt1,"labels": states,
        "domain": {"x": [0, 1]},"name": "Deaths","hoverinfo":"label+percent+name","hole": .4,"type": "pie"}],
        "layout": {"title":"Pie Chart of States' Deaths by Apr 7th",
        "annotations": [{"font": {"size": 20},"showarrow": False,"text": "COVID-19","x": 0.5,"y": 0.5}]}}
fig00 = {"data": [{"values": cnt2,"labels": states,
        "domain": {"x": [0,1]},"name": "Cases","hoverinfo":"label+percent+name","hole": .4,"type": "pie"}],
        "layout": {"title":"Pie Chart of States' Confirmed Cases by Apr 7th",
        "annotations": [{"font": {"size": 20},"showarrow": False,"text": "COVID-19","x": 0.5,"y": 0.5}]}}
plotly.offline.iplot(fig0)
plotly.offline.iplot(fig00)