In [21]:
import os
from glob import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

In [22]:
# COLORS
sars_color = '#442288'
ebola_color = '#B5D33D'
mers_color = '#FED23F'
covid_color = '#FED23F'

color_palette = [sars_color, ebola_color, mers_color, covid_color]

## COVID-19 Preprocessing

In [23]:
covid_df = pd.read_csv('output/COVID9-cleaned_03-05-2020_21:13.csv', parse_dates=['Date'])

covid_df = covid_df[['Country', 'Date', 'Confirmed', 'Deaths']]
covid_df.columns = ['Country', 'Date', 'Cases', 'Deaths']

covid_df['Deaths'] = covid_df['Deaths'].fillna(0)
covid_df['Cases'] = covid_df['Cases'].fillna(0)

covid_df['Deaths'] = covid_df['Deaths'].astype('int')
covid_df['Cases'] = covid_df['Cases'].astype('int')

covid_df = covid_df.groupby(['Date', 'Country'])['Cases', 'Deaths'].sum().reset_index()

# Day by day
c_dbd = covid_df.groupby('Date')['Cases', 'Deaths'].sum().reset_index()

# nth-day
covid_df['nth_day'] = (covid_df['Date'] - min(covid_df['Date'])).dt.days
c_dbd['nth_day'] = covid_df.groupby('Date')['nth_day'].max().values

c_dbd['epidemic'] = 'COVID-19'
temp = covid_df[covid_df['Cases'] > 0]

## COVID-19 Preprocessing

In [24]:
ebola_df = pd.read_csv("Dataset/Ebola/ebola_2014_2016_clean.csv", parse_dates=['Date'])

ebola_df = ebola_df[[
    'Date',
    'Country',
    'No. of confirmed, probable and suspected cases',
    'No. of confirmed, probable and suspected deaths']] 

ebola_df.columns = ['Date', 'Country', 'Cases', 'Deaths']

ebola_df['Deaths'] = ebola_df['Deaths'].fillna(0)
ebola_df['Cases'] = ebola_df['Cases'].fillna(0)

ebola_df['Deaths'] = ebola_df['Deaths'].astype('int')
ebola_df['Cases'] = ebola_df['Cases'].astype('int')

# Day by day
e_dbd = ebola_df.groupby('Date')['Cases', 'Deaths'].sum().reset_index()

# nth-day
ebola_df['nth_day'] = (ebola_df['Date'] - min(ebola_df['Date'])).dt.days
e_dbd['nth_day'] = ebola_df.groupby('Date')['nth_day'].max().values

e_dbd['epidemic'] = 'EBOLA'
temp = ebola_df[ebola_df['Cases'] > 0]

## MERS Preprocessing

In [25]:
mers_df = pd.read_csv("Dataset/MERS/MERS_weekly_clean.csv")

mers_df['Year-Week'] = mers_df['Year'].astype(str) + ' - ' + mers_df['Week'].astype(str)
mers_df['Date'] = pd.to_datetime(mers_df['Week'].astype(str) + mers_df['Year'].astype(str).add('-1'),format='%V%G-%u')

# Select and rename columns
cols = ['Date', 'Region', 'New Cases']
mers_df = mers_df[cols]
mers_df.columns = ['Date', 'Country', 'Cases']

# Day by day
m_dbd = mers_df.groupby('Date')['Cases'].sum().reset_index()

# nth-day
mers_df['nth_day'] = (mers_df['Date'] - min(mers_df['Date'])).dt.days
m_dbd['nth_day'] = mers_df.groupby('Date')['nth_day'].max().values

m_dbd['epidemic'] = 'MERS'
temp = mers_df[mers_df['Cases'] > 0]


## SARS Preprocessing

In [26]:
sars_df = pd.read_csv("Dataset/SARS/sars_2003_complete_dataset_clean.csv", parse_dates=['Date'])

# Select and rename columns
cols = ['Date', 'Country', 'Cumulative number of case(s)', 'Number of deaths']
sars_df = sars_df[cols]
sars_df.columns = ['Date', 'Country', 'Cases', 'Deaths']

# Day by day
s_dbd = sars_df.groupby(['Date'])['Cases', 'Deaths'].sum().reset_index()

# nth-day
sars_df['nth_day'] = (sars_df['Date'] - min(sars_df['Date'])).dt.days
s_dbd['nth_day'] = sars_df.groupby('Date')['nth_day'].max().values

s_dbd['epidemic'] = 'SARS'
temp = sars_df[sars_df['Cases'] > 0]


### Verifying form of Dataframes

In [27]:
print("COVID\n", c_dbd.tail(1))
print("\n\nSARS\n", s_dbd.tail(1))
print("\n\nEBOLA\n", e_dbd.tail(1))
print("\n\nMERS\n", m_dbd.tail(1))

COVID
          Date    Cases  Deaths  nth_day  epidemic
96 2020-04-27  3041764  211167       96  COVID-19


SARS
          Date  Cases  Deaths  nth_day epidemic
95 2003-07-11   8432     813      116     SARS


EBOLA
           Date  Cases  Deaths  nth_day epidemic
258 2016-03-23  28642   11319      572    EBOLA


MERS
           Date  Cases  nth_day epidemic
374 2019-06-10      2     2639     MERS


## Plotting Comparison


In [28]:
temp = pd.concat([c_dbd, s_dbd, e_dbd], axis=0, sort=True)

In [29]:
df = temp

fig = px.line(df, x="nth_day", y="Deaths", color='epidemic', range_x=[0, 100])


fig.update_layout(
    title='The first 100 days of COVID-19',
    autosize=False,
    width=800,
    height=800,
    template='simple_white',  
)

fig.update_yaxes(type="linear")

#fig.write_image("images/linear_comparison_2.pdf")
fig.show()

In [30]:
df = temp

fig = px.line(df, x="nth_day", y="Deaths", color='epidemic', range_x=[0, 100])


fig.update_layout(
    title='The first 100 days of COVID-19',
    autosize=False,
    width=800,
    height=800,
    template='simple_white',
    #shapes=[
    #dict(
    #  type= 'line',
    #  yref= 'y', y0 = max(e_dbd['Cases']), y1 = max(e_dbd['Cases']),
    #  xref= 'paper', x0 = 0, x1 = 1
    #    )
    #] 
)

fig.update_yaxes(type="log")


#fig.write_image("images/log_comparison_2.pdf")
fig.show()