In [None]:
from urllib.request import Request, urlopen
import json
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

from bs4 import BeautifulSoup

In [None]:
# fetch data
headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'}
req = Request('https://thevirustracker.com/timeline/map-data.json',
              headers=headers)
with urlopen(req) as res:
    data = json.load(res)

In [None]:
# obtain country mapping
req = Request('https://thevirustracker.com/api',
              headers=headers)
with urlopen(req) as res:
     html = res.read().decode('utf-8')
soup = BeautifulSoup(html, 'html.parser')
result = soup.find('tbody')
country_map = {child.a.string[:2]: child.td.string for child in result.find_all('tr')}
country_map['Others'] = 'Others'

In [None]:
country_map['BH']

In [None]:
def intize(i):
    try:
        return int(i)
    except:
        return 0

In [None]:
# clean data
df = pd.DataFrame(data['data'])
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%y')
df[['cases', 'deaths', 'recovered']] = df[['cases', 'deaths', 'recovered']].applymap(intize)
df.sort_values(['countrycode', 'date'], inplace=True)
df.head()

In [None]:
df

In [None]:
# group by country
df_country = df.set_index(['countrycode', 'date'])
df_country

In [None]:
# sort by each column
today = df.groupby('countrycode').last()
today_cases = today.sort_values('cases', ascending=False)
today_deaths = today.sort_values('deaths', ascending=False)
today_recovered = today.sort_values('recovered', ascending=False)

In [None]:
# arugments for ploting
cases = [today_cases['cases'], 'infections', 'Oranges']
deaths = [today_deaths['deaths'], 'deaths', 'Reds']
recoverd = [today_recovered['recovered'], 'recovered', 'Greens']

In [None]:
textprops = dict(fontsize='large', fontfamily='sans serif')
    
def top_countries_proportion(data, category, cmap, n=8):
    other = data[n:].sum()
    today_top = data[:n].append(pd.Series(data=other, index=['Others']))
    
    fig, ax = plt.subplots(figsize=(16, 6.6))
    explode = np.zeros(n+1)
    explode[:] = 0.03
    norm = mpl.colors.Normalize(vmin=today_top.min()/1.25, vmax=today_top.max()*1.25)
    colors = mpl.cm.get_cmap(cmap)(norm(today_top))
    patches, texts, autotexts = ax.pie(today_top,
                                       labels=list(map(lambda index: country_map[index], today_top.index)),
                                       pctdistance=0.7, explode=explode,
                                       autopct='%.1f%%', textprops=textprops,
                                       colors=colors)
    for t, p in zip(autotexts, patches):
        t.set_fontsize(10)
        if np.array(p.get_facecolor()).prod() < 0.1:
            t.set_color('w')
    ax.set_title('Global {} proportions'.format(category))
    plt.show()

In [None]:
top_countries_proportion(*cases)

In [None]:
top_countries_proportion(*deaths)

In [None]:
top_countries_proportion(*recoverd)

In [None]:
font_dict = dict(fontsize='large', fontfamily='sans serif')

def label(rects, ax):
    for rect in rects:
        width = rect.get_width()
        height = rect.get_height()
        ax.annotate('{}'.format(width),
                    xy=(width, rect.get_y() + height / 2), xytext=(20, 0),
                    textcoords="offset points",ha='center', va='center')

def top_countries(data, category, cmap, n=20):
    data_top = data[:n]

    fig, ax = plt.subplots(figsize=(15, 10))
    color = mpl.cm.get_cmap(cmap)(mpl.colors.LogNorm(vmin=data_top.min()/1.5, vmax=data_top.max()*1.5)(data_top))
    rects = ax.barh(range(data_top.shape[0]), data_top, color=color)
    label(rects, ax)

    ax.set_xlabel('cases ($\\times10^3$)', font_dict)
    ax.set_xticklabels(ax.get_xticks() / 1e3, font_dict)
    ax.set_yticks(range(n))
    ax.set_yticklabels(map(lambda index: country_map[index], data_top.index), font_dict)
    ax.invert_yaxis()
    ax.set_title('Countries with the most {}'.format(category), font_dict)
    ax.grid(True, axis='x')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    plt.show()

In [None]:
top_countries(*cases)

In [None]:
top_countries(*deaths)

In [None]:
top_countries(*recoverd)

In [None]:
colors = ['tab:orange', 'tab:red', 'tab:green']

def draw_timeline(countrycode):
    fig, ax = plt.subplots(figsize=(9, 6))
    data = df_country.loc[countrycode]
    c, d, r = np.squeeze(data.last('1D').values)
    for (label, content), color, i in zip(data.iteritems(), colors, [0, 0, 5 if (d-r)/(r++1e-10) <= 0.15 else -5]):
        ax.plot(content, label=label, color=color)
        ax.annotate(content[-1], xy=(ax.get_xticks()[-1], content[-1]),
                    xycoords=ax.transData, xytext=(0, i), textcoords='offset points',
                    va='center', ha='left', fontsize=8)
    ax.set_title('{} ({})'.format(country_map[countrycode], countrycode))
    ax.get_xaxis().set_major_formatter(mpl.dates.DateFormatter('%m-%d'))
    ax.grid()
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.legend()
    plt.savefig(os.path.join('./country_timeline', '{}.png'.format(country_map[countrycode])))

In [None]:
for countrycode in today.index:
    draw_timeline(countrycode)
    plt.close()