# <center> PSM </center>
# <center> EDA on COVID-19 </center>

In [1]:
#import lib
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot, plot_mpl
import plotly.offline as py
init_notebook_mode(connected=True)
plt.rcParams.update({'font.size': 14})
%matplotlib inline


In [2]:
#import database
covidDf = pd.read_csv("training_LSTM(1).csv")

In [3]:
covidDf.head()

Unnamed: 0,Country_Region,Province_State,Date,ConfirmedCases,Recoveries,Fatalities,Id
0,Afghanistan,,2020-01-22,0,0,0,1
1,Afghanistan,,2020-01-23,0,0,0,2
2,Afghanistan,,2020-01-24,0,0,0,3
3,Afghanistan,,2020-01-25,0,0,0,4
4,Afghanistan,,2020-01-26,0,0,0,5


In [4]:
covidDfDate = covidDf.groupby(['Date']).agg({'ConfirmedCases': ['sum'], 'Recoveries': ['sum'], 'Fatalities': ['sum']}).reset_index()
covidDfDate.columns = covidDfDate.columns.get_level_values(0)


In [5]:
covidDfDate.tail()

Unnamed: 0,Date,ConfirmedCases,Recoveries,Fatalities
113,2020-05-14,4367195,1551704,296823
114,2020-05-15,4466195,1600057,301984
115,2020-05-16,4556658,1655262,305978
116,2020-05-17,4635069,1695296,309279
117,2020-05-18,4722305,1747520,312518


In [6]:
#create function to calculate the rates in the data
def add_rates(df):
    df['Confirmed Change'] = df['ConfirmedCases'].diff().shift(-1)
 
    df['Mortality Rate'] = df['Fatalities'] / df['ConfirmedCases']
    df['Recovery Rate'] = df['Recoveries'] / df['ConfirmedCases']
    df['Growth Rate'] = df['Confirmed Change'] / df['ConfirmedCases']
    df['Growth Rate Change'] = df['Growth Rate'].diff().shift(-1)
    df['Growth Rate Accel'] = df['Growth Rate Change'] / df['Growth Rate']
    df['Patient in Care'] = df['ConfirmedCases'] - df['Fatalities'] - df['Recoveries']
    df['Date'] = pd.to_datetime(df['Date'])
    return df

rateDf = add_rates(covidDfDate)

In [7]:
print("Basic Information as on 2020-05-18")
print("Totol number of countries with Disease Spread: ",len(covidDf["Country_Region"].unique()))
print("Total number of Confirmed Cases around the World: ",rateDf["ConfirmedCases"].iloc[-1])
print("Total number of Recovered Cases around the World: ",rateDf["Recoveries"].iloc[-1])
print("Total number of Deaths Cases around the World: ",rateDf["Fatalities"].iloc[-1])
print("Total number of Active Cases around the World: ",(rateDf["ConfirmedCases"].iloc[-1]-rateDf["Recoveries"].iloc[-1]-rateDf["Fatalities"].iloc[-1]))
print("Total number of Closed Cases around the World: ",rateDf["Recoveries"].iloc[-1]+rateDf["Fatalities"].iloc[-1])
print("Total Mean for Confirmed Cases = ", covidDf['ConfirmedCases'].mean())
print("Total Max Values for Confirmed Cases = ", covidDf['ConfirmedCases'].max())
print("Total Mean for Fatalities Cases = ", covidDf['Fatalities'].mean())
print("Total Max Values for Fatalities Cases = ", covidDf['Fatalities'].max())
print("Total Mean for Recoveries Cases = ", covidDf['Recoveries'].mean())
print("Max Values for Recoveries Cases = ", covidDf['Recoveries'].max())
print("Approximate number of Confirmed Cases per Day around the World: ",np.round(rateDf["ConfirmedCases"].iloc[-1]/rateDf.shape[0]))
print("Approximate number of Recovered Cases per Day around the World: ",np.round(rateDf["Recoveries"].iloc[-1]/rateDf.shape[0]))
print("Approximate number of Death Cases per Day around the World: ",np.round(rateDf["Fatalities"].iloc[-1]/rateDf.shape[0]))
print("Approximate number of Confirmed Cases per hour around the World: ",np.round(rateDf["ConfirmedCases"].iloc[-1]/((rateDf.shape[0])*24)))
print("Approximate number of Recovered Cases per hour around the World: ",np.round(rateDf["Recoveries"].iloc[-1]/((rateDf.shape[0])*24)))
print("Approximate number of Death Cases per hour around the World: ",np.round(rateDf["Fatalities"].iloc[-1]/((rateDf.shape[0])*24)))

Basic Information as on 2020-05-18
Totol number of countries with Disease Spread:  184
Total number of Confirmed Cases around the World:  4722305
Total number of Recovered Cases around the World:  1747520
Total number of Deaths Cases around the World:  312518
Total number of Active Cases around the World:  2662267
Total number of Closed Cases around the World:  2060038
Total Mean for Confirmed Cases =  4919.05731400177
Total Max Values for Confirmed Cases =  1508308
Total Mean for Fatalities Cases =  328.31369545980533
Total Max Values for Fatalities Cases =  90347
Total Mean for Recoveries Cases =  1488.9104553808454
Max Values for Recoveries Cases =  283178
Approximate number of Confirmed Cases per Day around the World:  40020.0
Approximate number of Recovered Cases per Day around the World:  14809.0
Approximate number of Death Cases per Day around the World:  2648.0
Approximate number of Confirmed Cases per hour around the World:  1667.0
Approximate number of Recovered Cases per hou

In [9]:
fig = None
if fig is None:
    fig = go.Figure()
    
fig.update_layout(template = 'plotly_dark', title = 'Summary of the cases (worldwide)')

fig.add_trace(go.Scatter(x = rateDf['Date'],
                         y = rateDf['ConfirmedCases'],
                         mode = 'lines',
                         name = 'Confirmed Cases',
                         line = dict(color = 'yellow', width = 2)
                        ))

fig.add_trace(go.Scatter(x = rateDf['Date'],
                         y = rateDf['Fatalities'],
                         mode = 'lines',
                         name = 'Fatalities',
                         line = dict(color = 'red', width = 2)
                        ))

fig.add_trace(go.Scatter(x = rateDf['Date'],
                         y = rateDf['Recoveries'],
                         mode = 'lines',
                         name = 'Recoveries',
                         line = dict(color = 'green', width = 2)
                        ))

fig.add_trace(go.Scatter(x = rateDf['Date'],
                         y = rateDf['Patient in Care'],
                         mode = 'lines',
                         name = 'Active Cases',
                         line = dict(color = 'Orange', width = 2)
                        ))

In [10]:
fig = None
if fig is None:
    fig = go.Figure()
    
fig.update_layout(template='plotly_white', title = 'Summary of Rate Cases')

fig.add_trace(go.Scatter(x = rateDf['Date'], 
                         y = rateDf['Mortality Rate'],
                         mode = 'lines',
                         name = 'Mortality rate',
                         line = dict(color='red', width=2)))

fig.add_trace(go.Scatter(x = rateDf['Date'], 
                         y = rateDf['Recovery Rate'],
                         mode = 'lines',
                         name = 'Recovery rate',
                         line = dict(color='Green', width=2)))

fig.add_trace(go.Scatter(x = rateDf['Date'], 
                         y = rateDf['Growth Rate'],
                         mode = 'lines',
                         name = 'Growth rate confirmed',
                         line = dict(color='Yellow', width=2)))

fig.update_layout(yaxis=dict(tickformat=".2%"))

In [11]:
fig = None
if fig is None:
    fig = go.Figure()

fig.update_layout(template='plotly_white', title = 'Rate of Growth per day')

tmp_df = rateDf[rateDf['Growth Rate Accel'] < 10]
fig.add_trace(go.Scatter(x=tmp_df['Date'], 
                         y=tmp_df['Growth Rate Accel'],
                         mode='lines',
                         name='Growth Acceleration',
                         line=dict(color='Magenta', width=3)))
fig.update_layout(yaxis=dict(tickformat=".2%"))

In [12]:
#select Malaysia Data
myCovidDf = covidDf[covidDf['Country_Region'] == "Malaysia"]
myCovidDfDate = myCovidDf.groupby(['Date']).agg({'ConfirmedCases': ['sum'], 'Recoveries': ['sum'], 'Fatalities': ['sum']}).reset_index()
myCovidDfDate.columns = myCovidDfDate.columns.get_level_values(0)

myRateDf = add_rates(myCovidDfDate)

In [13]:
#Summary Graph for Malaysia
fig = None
if fig is None:
    fig = go.Figure()
    
fig.update_layout(template = 'plotly_white', title = 'Summary of the cases (Malaysia)')

fig.add_trace(go.Scatter(x = myRateDf['Date'],
                         y = myRateDf['ConfirmedCases'],
                         mode = 'lines',
                         name = 'Confirmed Cases',
                         line = dict(color = 'yellow', width = 2)
                        ))

fig.add_trace(go.Scatter(x = myRateDf['Date'],
                         y = myRateDf['Fatalities'],
                         mode = 'lines',
                         name = 'Fatalities',
                         line = dict(color = 'red', width = 2)
                        ))

fig.add_trace(go.Scatter(x = myRateDf['Date'],
                         y = myRateDf['Recoveries'],
                         mode = 'lines',
                         name = 'Recoveries',
                         line = dict(color = 'green', width = 2)
                        ))

fig.add_trace(go.Scatter(x = myRateDf['Date'],
                         y = myRateDf['Patient in Care'],
                         mode = 'lines',
                         name = 'Active Cases',
                         line = dict(color = 'Orange', width = 2)
                        ))

fig.add_trace(go.Scatter( x = ['2020-03-18', '2020-03-18'],
                          y = [0, 7000],
                          line=go.scatter.Line(color="cyan", dash = 'dash'),
                          name='MCO Begins'))

In [14]:
#Rate Cases Malaysia
fig = None
if fig is None:
    fig = go.Figure()
    
fig.update_layout(template='plotly_white', title = 'Summary of Rate Cases (Malaysia)')

fig.add_trace(go.Scatter(x = myRateDf['Date'], 
                         y = myRateDf['Mortality Rate'],
                         mode = 'lines',
                         name = 'Mortality rate',
                         line = dict(color='red', width=2)))

fig.add_trace(go.Scatter(x = myRateDf['Date'], 
                         y = myRateDf['Recovery Rate'],
                         mode = 'lines',
                         name = 'Recovery rate',
                         line = dict(color='Green', width=2)))

fig.add_trace(go.Scatter(x = myRateDf['Date'], 
                         y = myRateDf['Growth Rate'],
                         mode = 'lines',
                         name = 'Growth rate confirmed',
                         line = dict(color='Yellow', width=2)))

fig.add_trace(go.Scatter( x = ['2020-03-18', '2020-03-18'],
                          y = [0, 0.82],
                          line=go.scatter.Line(color="cyan", dash = 'dash'),
                          name='MCO Begins'))


In [15]:
#growth per day Malaysia
fig = None
if fig is None:
    fig = go.Figure()

fig.update_layout(template='plotly_white', title = 'Rate of Growth per day (Malaysia)')

tmp_df = myRateDf[myRateDf['Growth Rate Accel'] < 10]
fig.add_trace(go.Scatter(x=tmp_df['Date'], 
                         y=tmp_df['Growth Rate Accel'],
                         mode='lines',
                         name='Growth Acceleration',
                         line=dict(color='Magenta', width=3)))
fig.update_layout(yaxis=dict(tickformat=".2%"))

fig.add_trace(go.Scatter( x = ['2020-03-18', '2020-03-18'],
                          y = [-1, 3],
                          line=go.scatter.Line(color="cyan", dash = 'dash'),
                          name='MCO Begins'))