# Visualization and Analysis of Covid-19
**Tianbai Hsiao**

acknowledgement to https://github.com/twiecki/covid19

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib

import numpy as np
import pandas as pd
import seaborn as sns
import covid

sns.set_context('talk')
plt.style.use('seaborn-whitegrid')

In [2]:
df = covid.load_data(drop_states=True)
    
#annotate_kwargs = dict(
#    s='Data from Johns Hopkins CSSE ({})\nBy T. Hsiao'.format(df.index.max().strftime('%B %d, %Y')), 
#    xy=(0.05, 0.01), xycoords='figure fraction', fontsize=10 )

ParserError: Error tokenizing data. C error: Expected 1 fields in line 33, saw 2


In [None]:
# Country array
df.country.unique()

In [None]:
european_countries = ['Italy', 'Germany', 'France (total)', 'Spain']
american_countries = ['US (total)', 'Canada (total)']
asian_countries = ['Japan', 'Korea, South', 'Iran']

country_groups = [european_countries, american_countries, asian_countries]
line_styles = ['-', ':', '--', '-.']

In [None]:
df

## China's benchmark

In [None]:
sns.set_palette(sns.hls_palette(8, l=.45, s=.8))
fig, ax = plt.subplots(figsize=(12, 8))

for countries, ls in zip([["China (total)"]], line_styles):
    covid.plot_countries(df, countries, ls=ls)

x = np.linspace(0, plt.xlim()[1] - 1)
ax.plot(x, 100 * (1.12) ** x, ls='--', label='12% daily growth')
ax.plot(x, 1350*x, ls='--', color='k', label='linear model')

ax.set( title='Growth of COVID-19 cases',
        xlabel='Days from first 100 confirmed cases',
        ylabel='Confirmed cases' )
#ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
#ax.annotate(**annotate_kwargs)
#sns.despine();

## Successive situations

In [None]:
#sns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries max
fig, ax = plt.subplots(figsize=(12, 8))

for countries, ls in zip(country_groups, line_styles):
    covid.plot_countries(df, countries, ls=ls)

x = np.linspace(0, plt.xlim()[1] - 1)
ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')

ax.set( yscale='log',
        title='Growth of COVID-19 cases',
        xlabel='Days from first 100 confirmed cases',
        ylabel='Confirmed cases (log scale)' )
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
ax.annotate(**annotate_kwargs)
sns.despine();

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))

for countries, ls in zip(country_groups, line_styles):
    covid.plot_countries(df, countries, ls=ls)

x = np.linspace(0, plt.xlim()[1] - 1)
ax.plot(x, 100 * (1.33) ** x, ls='--', color='k', label='33% daily growth')

ax.set(title='Growth of COVID-19 cases',
       xlabel='Days from first 100 confirmed cases',
       ylabel='Confirmed cases', ylim=(0, 30000))
ax.legend(bbox_to_anchor=(1.0, 1.0))
ax.annotate(**annotate_kwargs);
#sns.despine();

In [None]:
smooth_days = 4
fig, ax = plt.subplots(figsize=(14, 8))
df['pct_change'] = (df
                    .groupby('country')
                    .confirmed
                    .pct_change()
                    .rolling(smooth_days)
                    .mean()
)

for countries, ls in zip(country_groups, line_styles):
    (df.set_index('country')
       .loc[countries]
       .loc[lambda x: x.confirmed > 100]
       .reset_index()
       .set_index('days_since_100')
       .groupby('country', sort=False)['pct_change']
       .plot(ls=ls)
    )

ax.set(ylim=(0, 1),
       xlim=(0, 20),
       title='Are we seeing changes in daily growth rate?',
       xlabel='Days from first 100 confirmed cases',
       ylabel='Daily percent change (smoothed over {} days)'.format(smooth_days),
)
ax.axhline(.33, ls='--', color='k')
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, .1))
sns.despine()
ax.annotate(**annotate_kwargs);

In [None]:
#collapse-hide
sns.set_palette(sns.hls_palette(8, l=.45, s=.8)) # 8 countries max
fig, ax = plt.subplots(figsize=(12, 8))
p_crit = .05
# 28000 ICU beds total, 80% occupied
icu_germany = 28000
icu_germany_free = .2

df_tmp = df.loc[lambda x: (x.country == 'Germany') & (x.confirmed > 100)].critical_estimate
df_tmp.plot(ax=ax)

x = np.linspace(0, 30, 30)
pd.Series(index=pd.date_range(df_tmp.index[0], periods=30),
          data=100*p_crit * (1.33) ** x).plot(ax=ax,ls='--', color='k', label='33% daily growth')

ax.axhline(icu_germany, color='.3', ls='-.', label='Total ICU beds')
ax.axhline(icu_germany * icu_germany_free, color='.5', ls=':', label='Free ICU beds')
ax.set(yscale='log',
       title='When will Germany run out of ICU beds?',
       ylabel='Expected critical cases (assuming {:.0f}% critical)'.format(100 * p_crit),
)
ax.get_yaxis().set_major_formatter(matplotlib.ticker.ScalarFormatter())
ax.legend(bbox_to_anchor=(1.0, 1.0))
sns.despine()
ax.annotate(**annotate_kwargs);