In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import bokeh
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.models.tools import HoverTool
from bokeh.models import PrintfTickFormatter

In [3]:
url_cc = 'https://github.com/datasets/country-codes/raw/master/data/country-codes.csv'
df_cc = pd.read_csv(url_cc)

In [4]:
df_cc.columns

Index(['FIFA', 'Dial', 'ISO3166-1-Alpha-3', 'MARC', 'is_independent',
       'ISO3166-1-numeric', 'GAUL', 'FIPS', 'WMO', 'ISO3166-1-Alpha-2', 'ITU',
       'IOC', 'DS', 'UNTERM Spanish Formal', 'Global Code',
       'Intermediate Region Code', 'official_name_fr', 'UNTERM French Short',
       'ISO4217-currency_name', 'Developed / Developing Countries',
       'UNTERM Russian Formal', 'UNTERM English Short',
       'ISO4217-currency_alphabetic_code',
       'Small Island Developing States (SIDS)', 'UNTERM Spanish Short',
       'ISO4217-currency_numeric_code', 'UNTERM Chinese Formal',
       'UNTERM French Formal', 'UNTERM Russian Short', 'M49',
       'Sub-region Code', 'Region Code', 'official_name_ar',
       'ISO4217-currency_minor_unit', 'UNTERM Arabic Formal',
       'UNTERM Chinese Short', 'Land Locked Developing Countries (LLDC)',
       'Intermediate Region Name', 'official_name_es', 'UNTERM English Formal',
       'official_name_cn', 'official_name_en', 'ISO4217-currency_count

In [5]:
onm = list(pd.unique(df_cc['official_name_en']))[1:]

In [6]:
badctrcode = {'Brunei':'BRN','US':'USA','Iran':'IRN','Korea, South':'KOR','Vietnam':'VNM',
 'Russia':'RUS','Moldova':'MDA','Bolivia':'BOL','United Kingdom':'GBR','Venezuela':'VEN'}

In [7]:
url_pop = 'https://github.com/datasets/population/raw/master/data/population.csv'
df_pop = pd.read_csv(url_pop)
df_pop.loc[(df_pop['Country Code']=='USA')].sort_values(by='Year').iloc[-1].Value.astype(int)

323127513

In [8]:
df_pop.head()

Unnamed: 0,Country Name,Country Code,Year,Value
0,Arab World,ARB,1960,92490932.0
1,Arab World,ARB,1961,95044497.0
2,Arab World,ARB,1962,97682294.0
3,Arab World,ARB,1963,100411076.0
4,Arab World,ARB,1964,103239902.0


In [13]:
# for ctr in list(pd.unique(df_pop['Country/Region'])):
# #     print(ctr)
#     if (df_cc['official_name_en']==ctr).sum()>0:
#         ctr_code = df_cc.loc[df_cc['official_name_en']==ctr]['ISO3166-1-Alpha-3'].item()
#     else:
#         if ctr in badctrcode.keys():
#             ctr_code = badctrcode[ctr]
#             print(ctr,ctr_code)
            
#     if (df_pop['Country Code']==ctr_code).sum()>0:
#         ctr_pop = df_pop.loc[(df_pop['Country Code']==ctr_code)].sort_values(by='Year').iloc[-1].Value.astype(int)
#     else: 
#         pass # print(ctr)
# #     print(ctr,ctr_pop)
#             #             print(ctr_code,ctr,ctr_pop)

In [14]:
plt.style.use("fivethirtyeight")   # ‘dark_background', ''bmh', 'grayscale', 'ggplot', 'fivethirtyeight'

In [15]:
def get_data():
    ''' read data from JHU CSSE website and return the pandas dataframe
    '''
#     url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
    url = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    df = pd.read_csv(url)
    
    return df

In [16]:
df = get_data()
df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,22,22,22,24,24,40,40,74,84,94
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,55,59,64,70,76,89,104,123,146,174
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,60,74,87,90,139,201,230,264,302,367
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,39,39,53,75,88,113,133,164,188,224
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,1,2,2,3,3,3,4


In [18]:
ltime = pd.Timestamp(df.columns[-1])
print(f'The last date of update is {ltime:%b %d, %Y}')

The last date of update is Mar 26, 2020


In [19]:
def data_1cty(df, cty):
    ''' derive country sum daily cases
    
    Parameters
    ----------
    df : pd dataframe
        the input data
    cty : str
        the country name
        
    Returns
    -------
    series : pd Series
        the total daily cases for the given country
    '''
    
    filt = (df['Country/Region'] == cty)
    nsts = filt.sum()
    if nsts == 0:
        return None
    else:
        series = df.loc[filt].iloc[:,4:].sum(axis=0)
        series.index = pd.DatetimeIndex(series.index)
        return series

In [20]:
def data_countries(df):
    df_ctr = pd.DataFrame()    
    str_ctrs = set(list(df['Country/Region'].values))
    for ctr in str_ctrs:
        df_ctr[ctr] = data_1cty(df, ctr)
    return df_ctr

In [21]:
df_ctr = data_countries(df)

In [22]:
def plot_ctrs_ts(df_ctr, ctr_nm_all, colors=['blue', 'green', 'red', 'cyan', 'magenta', 'yellow'] ):
    output_notebook()
    source = ColumnDataSource(df_ctr)
    
    p = figure(plot_width=900, plot_height=500, x_axis_type='datetime',y_axis_type='log')
    p.title.text = 'CoV19 cases'
    p.xaxis.axis_label='Date'
    p.yaxis.axis_label='Number of cases'
    p.yaxis.formatter = PrintfTickFormatter(format="%5f")
    
    for ctr_nm, c in zip(ctr_nm_all,colors):
        p.line(x='index',y=ctr_nm,source=source, line_width=2, color=c, alpha=0.4)
        ttp = [("date", "$x{%F}"),("cases", "@"+ctr_nm+"{int}"),("country",ctr_nm)]        
        cr = p.circle(x='index',y=ctr_nm,source=source,fill_color=c,size=8,alpha=0.4,line_color=None,
                     hover_fill_color='firebrick',hover_line_color='firebrick',
                     legend_label=ctr_nm, muted_color=c, muted_alpha=1)
        fmt = {'$x': 'datetime'}
        p.add_tools(HoverTool(tooltips=ttp, renderers=[cr],formatters=fmt))
    
    p.legend.location='top_left'
    p.legend.click_policy='mute'
    show(p)

In [23]:
plot_ctrs_ts(df_ctr, ['US','Italy','Spain'])