In [1]:
%load_ext lab_black

In [2]:
import os
import json
from typing import Tuple
import pycountry
import numpy as np
import pandas as pd
import geopandas as gpd
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, output_file
from bokeh.models import (
    GeoJSONDataSource,
    HoverTool,
    LinearColorMapper,
)
from bokeh.palettes import brewer

In [3]:
output_notebook()

### Fetching data and normalizing

In [4]:
raw_confirmed_df = pd.read_csv(
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv"
)

raw_death_df = pd.read_csv(
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv"
)

raw_recovered_df = pd.read_csv(
    "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv"
)

In [5]:
def melt_raw_df(df: pd.DataFrame, val_name: str) -> pd.DataFrame:
    return df.melt(
        id_vars=["Province/State", "Country/Region", "Lat", "Long"],
        var_name="EventDate",
        value_name=val_name,
    )

In [6]:
confirmed_df = melt_raw_df(raw_confirmed_df, "Confirmed")
death_df = melt_raw_df(raw_death_df, "Death")
recovered_df = melt_raw_df(raw_recovered_df, "Recovered")

In [7]:
combined_df = confirmed_df.merge(death_df, how="left").merge(recovered_df, how="left")

In [8]:
combined_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,EventDate,Confirmed,Death,Recovered
0,Anhui,Mainland China,31.82571,117.2264,1/22/20,1,0,0
1,Beijing,Mainland China,40.18238,116.4142,1/22/20,14,0,0
2,Chongqing,Mainland China,30.05718,107.874,1/22/20,6,0,0
3,Fujian,Mainland China,26.07783,117.9895,1/22/20,1,0,0
4,Gansu,Mainland China,36.0611,103.8343,1/22/20,0,0,0


In [9]:
combined_df[["Confirmed", "Death", "Recovered"]] = (
    combined_df[["Confirmed", "Death", "Recovered"]].fillna(0).astype(int)
)

In [10]:
combined_df["EventDate"] = pd.to_datetime(combined_df["EventDate"])

In [11]:
print(f'Total number of confirmed cases: {combined_df["Confirmed"].sum()}')

Total number of confirmed cases: 1058633


In [12]:
print(f'Death rate: {combined_df["Death"].sum()/combined_df["Confirmed"].sum() * 100}')

Death rate: 2.4678996403852893


In [13]:
print(
    f'Recovery rate: {combined_df["Recovered"].sum()/combined_df["Confirmed"].sum() * 100}'
)

Recovery rate: 13.420326024221804


#### View trend of outbreak

In [14]:
timeseries_df = (
    combined_df.groupby("EventDate")[["Confirmed", "Death", "Recovered"]]
    .sum()
    .reset_index()
)

In [15]:
timeseries_df.head()

Unnamed: 0,EventDate,Confirmed,Death,Recovered
0,2020-01-22,555,17,28
1,2020-01-23,653,18,30
2,2020-01-24,941,26,36
3,2020-01-25,1434,42,39
4,2020-01-26,2118,56,52


In [16]:
p = figure(
    title="COVID-19 Trend",
    height=350,
    sizing_mode="scale_width",
    x_axis_type="datetime",
)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.outline_line_color = None
p.yaxis.visible = False

names = ["Confirmed", "Death", "Recovered"]

p.varea_stack(
    stackers=names,
    x="EventDate",
    color=brewer["YlGnBu"][3],
    legend_label=names,
    source=timeseries_df,
)

p.legend.location = "top_left"
p.toolbar.logo = None
p.toolbar_location = None

show(p)

#### View map of outbreak

In [17]:
country_df = (
    combined_df.groupby("Country/Region")[["Confirmed", "Death", "Recovered"]]
    .sum()
    .reset_index()
)

In [18]:
country_df.head()

Unnamed: 0,Country/Region,Confirmed,Death,Recovered
0,Australia,343,0,111
1,Belgium,18,0,5
2,Cambodia,26,0,10
3,Canada,149,0,12
4,Egypt,8,0,0


In [19]:
shapefile = os.path.join(
    "..", "data", "countries_110m", "ne_110m_admin_0_countries.shp"
)
gdf = gpd.read_file(shapefile)[["ADMIN", "ADM0_A3", "geometry"]]

In [20]:
code_mapping = {
    "Mainland China": "China",
    "UK": "United Kingdom",
    "South Korea": "Korea, Republic of",
    "Macau": "Macao",
    "Others": "",
}


def get_country_code(country: str) -> str:
    if country == "Others":
        return ""

    country = code_mapping.get(country, country)
    try:
        return pycountry.countries.search_fuzzy(country)[0].alpha_3
    except:
        return ""

In [21]:
country_df["ADM0_A3"] = (
    country_df["Country/Region"]
    .apply(lambda country: get_country_code(country))
    .tolist()
)

In [22]:
new_gdf = gdf.merge(country_df, how="left", on="ADM0_A3")

In [23]:
new_gdf[["Confirmed", "Death", "Recovered"]] = new_gdf[
    ["Confirmed", "Death", "Recovered"]
].fillna(0)

In [24]:
gdf_json = json.loads(new_gdf.to_json())
grid = json.dumps(gdf_json)

In [25]:
geosource = GeoJSONDataSource(geojson=grid)

palette = brewer["YlGnBu"][8]

palette = palette[::-1]

color_mapper = LinearColorMapper(palette=palette, low=0, high=1000)

hover = HoverTool(
    tooltips=[
        ("Country Name", "@ADMIN"),
        ("Confirmed", "@Confirmed"),
        ("Death", "@Death"),
        ("Recovered", "@Recovered"),
    ]
)

p = figure(
    tools=[hover, "wheel_zoom"],
    active_scroll="wheel_zoom",
    height=350,
    sizing_mode="scale_width",
)

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.outline_line_color = None
p.axis.visible = False


p.patches(
    "xs",
    "ys",
    source=geosource,
    fill_color={"field": "Confirmed", "transform": color_mapper},
    line_color="black",
    line_width=0.25,
    fill_alpha=1,
)


p.toolbar.logo = None
p.toolbar_location = None

In [None]:
show(p)