## Visualising COVID-19 data using ipyleaflet and bqplot

In [1]:
import os
import json

import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
from branca.colormap import linear

from ipywidgets import link, Text, HTML

from bqplot import Lines, Figure, LinearScale, DateScale, Axis, OrdinalScale

from ipyleaflet import Map, GeoJSON, WidgetControl, GeoData, basemaps, FullScreenControl

### Geodata
"Natural lowres" from geopandas datasets. The columns we will be using are the 'name' column (contains country name) and the 'geometry' column (country geometry).



In [2]:
# read the data to a dataframe
countries = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
countries

Unnamed: 0,pop_est,continent,name,iso_a3,gdp_md_est,geometry
0,920938,Oceania,Fiji,FJI,8374.0,"MULTIPOLYGON (((180.00000 -16.06713, 180.00000..."
1,53950935,Africa,Tanzania,TZA,150600.0,"POLYGON ((33.90371 -0.95000, 34.07262 -1.05982..."
2,603253,Africa,W. Sahara,ESH,906.5,"POLYGON ((-8.66559 27.65643, -8.66512 27.58948..."
3,35623680,North America,Canada,CAN,1674000.0,"MULTIPOLYGON (((-122.84000 49.00000, -122.9742..."
4,326625791,North America,United States of America,USA,18560000.0,"MULTIPOLYGON (((-122.84000 49.00000, -120.0000..."
...,...,...,...,...,...,...
172,7111024,Europe,Serbia,SRB,101800.0,"POLYGON ((18.82982 45.90887, 18.82984 45.90888..."
173,642550,Europe,Montenegro,MNE,10610.0,"POLYGON ((20.07070 42.58863, 19.80161 42.50009..."
174,1895250,Europe,Kosovo,-99,18490.0,"POLYGON ((20.59025 41.85541, 20.52295 42.21787..."
175,1218208,North America,Trinidad and Tobago,TTO,43570.0,"POLYGON ((-61.68000 10.76000, -61.10500 10.890..."


### Coronavirus dataset
I am using dataframes from the Jons Hopkins university CSSEGISandData github page. The dataframes provided contain columns which show either the number of reported cases, number of deaths or the number of recovered patients. It also contains columns with country names which we will be using to link our dataframes. 
One thing i should point out is that the case numbers are divided by province/state of the country in question (eg. Canada is split into Alberta-Canada, British Columbia-Canada, Nova Scotia-Canada etc...). I would much prefer if the data was showing the sum of all cases so we will have to group the data by country.

In [3]:
#Jons Hopkins university CSSEGISandData raw 
corona_confirmed_cases_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
corona_deaths_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
corona_recovered_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

### Cleaning the data
Before we do anything with the data we need to make sure that most of the country names match. We can't match the data names completely since the 'naturalearth_lowres' dataset contains far more countries than the Jons Hopkins dataset.

In [4]:
def data_clean_countries(data):
    
    #matching names
    data = data.replace({'W. Sahara':'Western Sahara',
                                 'Dem. Rep. Congo':'Dr. Kongo',
                                 'Dominican Rep.':'Dominican Republic',
                                 'Central African Rep.':'Central African Republic',
                                 'Eq. Guinea':'Equatorial Guinea',
                                 'eSwatini':'Eswatini',
                                 'Bosnia and Herz.':'Bosnia and Herzegovina',
                                 'Macedonia':'North Macedonia',
                                 'S. Sudan':'South Sudan'})
    #use only columns that are relevant
    data = data[['name', 'geometry']]

    return data

countries = countries.apply(data_clean_countries, axis = 1)


In [5]:
def data_cleaning1(url):
    
    #reading data
    data = pd.read_csv(url)
    #renaming data column for easier typing
    data = data.rename(columns = {'Country/Region' : 'Country'})
    #matching names
    data = data.replace({'US':'United States of America',
                         'Congo (Kinshasa)':'Dr. Kongo',
                         'Congo (Brazzaville)': 'Kongo',
                         'Korea, South':'South Korea'})
    data  = data.drop(columns = ['Province/State', 'Lat', 'Long'])
    #grouping the data by 'Country' and calculating their sum
    data = data.groupby(['Country']).agg('sum')
    data = data.reset_index()
    
    return data

corona_confirmed_cases = data_cleaning1(corona_confirmed_cases_url)
corona_deaths = data_cleaning1(corona_deaths_url)
corona_recovered = data_cleaning1(corona_recovered_url)

In [6]:
#function used to combine all the separate date values into a single list and parsing the list into column of its own
def cleaning_data2(row, new_column):
    
    row[new_column] = np.array(row[1:].values)
    row = row[['Country', new_column]]
    
    return row


In [7]:
corona_confirmed_cases = corona_confirmed_cases.apply(cleaning_data2, new_column = 'Reported cases', axis = 1)
corona_deaths = corona_deaths.apply(cleaning_data2, new_column = 'Deaths', axis = 1)
corona_recovered = corona_recovered.apply(cleaning_data2, new_column = 'Recovered', axis = 1)

In [8]:
#merging the data into a single pd dataframe
corona_data = pd.merge(pd.merge(corona_confirmed_cases, corona_deaths, on='Country'), corona_recovered, on='Country')
corona_data

Unnamed: 0,Country,Reported cases,Deaths,Recovered
0,Afghanistan,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Albania,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Algeria,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Andorra,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Angola,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...
180,West Bank and Gaza,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
181,Western Sahara,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
182,Yemen,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
183,Zambia,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [9]:
#The data doesn't contain the number of active cases since it can be easily calculated as: Rep.cases - Deaths - A.cases
def get_active_cases(row, target_col = ['Reported cases', 'Deaths', 'Recovered'], new_col = 'Active cases'):
    
    row[new_col] = row[target_col[0]] - row[target_col[1]] - row[target_col[2]]
    
    return row

corona_data = corona_data.apply(get_active_cases, axis = 1)
corona_data

Unnamed: 0,Country,Reported cases,Deaths,Recovered,Active cases
0,Afghanistan,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,Albania,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,Algeria,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,Andorra,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,Angola,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...
180,West Bank and Gaza,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
181,Western Sahara,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
182,Yemen,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
183,Zambia,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


### Ipyleaflet and bqplot
We will be using lines to represent our data. For Bqplot.marks.Lines scales for "x" and "y" MUST be provided. The x axis will show dates so we will need to make a "date scale" and the y values are integer so it will use a linear scale. The y values need to be type np.int32 or np.float64 in order to work so we will need to convert it.

In [10]:
#The x axis will show dates so we will need to extract the column names and make datetime data out of them. 
#his will also be used to make a scale for our x values.

def get_date(url):
    
    data = pd.read_csv(url)
    #converting values into datetime
    date = [datetime.datetime.strptime(i+'20', '%m/%d/%Y') for i in  data.iloc[:,4:].columns.values]
    date_start = date[0]
    date_end = date[-1]
    #creating a scale for our x values
    date_scale = DateScale(min=date_start, max=date_end)
    date_data = pd.DatetimeIndex(date, freq='D')

    return date_scale, date_data
    

date_scale, date_data = get_date(corona_confirmed_cases_url)

#y values use a linear scale
y_scale = LinearScale()

#setting a value for y (any value)
#bqplot x and y takes only numpys data types x has to be np.int32 or np.float64 or else it will not work hense the 'np.int_'
y_data = [np.int_(corona_data.loc[1:2]['Reported cases'].values[0]),
          np.int_(corona_data.loc[1:2]['Active cases'].values[0]),
          np.int_(corona_data[1:2]['Deaths'].values[0]),
          np.int_(corona_data[1:2]['Recovered'].values[0])]

#set country name (any name)
country_name = 'Italy'

In [11]:
lines = Lines(x=date_data, 
              y=y_data, 
              scales={'x': date_scale, 'y': y_scale}, 
              display_legend = True, 
              colors = ['#6b32a8','#3268a8', 'red', 'green'],
              labels=['Reported cases', 'Active cases','Deaths', 'Recovered'],
              )
 

ax_x = Axis(label='Date', 
            scale=date_scale, 
            num_ticks=10, 
            tick_style={'font-size': 11}, 
            label_offset='25')

ax_y = Axis(label='Reported cases', 
            scale=y_scale, 
            orientation='vertical', 
            side='left',
            label_offset='-50')

figure = Figure(axes=[ax_x, ax_y], 
                title=country_name, marks=[lines], 
                animation_duration=500,
                layout={'max_height': '300px', 'min_width': '550px'}, 
                legend_location = 'top-left',
                fig_margin={'top':40, 'bottom':45, 'left':65, 'right':10},
                legend_style = {'width': '120px'},
                legend_text = {'font-size':11, 'font-family':'Arial, Helvetica, sans-serif'})
figure

Figure(animation_duration=500, axes=[Axis(label='Date', label_offset='25', num_ticks=10, scale=DateScale(max=d…

In [12]:
#updating y values when you hover over countries
def update_figure(country_name):
    
    lines.y = [np.int_(corona_data.loc[corona_data['Country'] == country_name]['Reported cases'].values[0]),
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Active cases'].values[0]),
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Deaths'].values[0]), 
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Recovered'].values[0])]
    
    figure.title = country_name

In [13]:
#creating a map with Esri basemap

m = Map(zoom=2, basemap= basemaps.Esri.WorldTopoMap, scroll_wheel_zoom = True)
#we added a hover style to the map
geo = GeoData(geo_dataframe=countries,
              style={'opacity':0.05, 'color': 'black', 'fillColor': 'white', 'weight': 0.5}, 
              hover_style={'fillColor': '#1f77b4'}, name='Countries',
             )

m.add_layer(geo)

m



Map(center=[0.0, 0.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

IndexError: index 0 is out of bounds for axis 0 with size 0

In [14]:
widget_control1 = WidgetControl(widget=figure, position='bottomright')

m.add_control(widget_control1)


def on_hover(event, feature, **kwargs):
    
    global country_name
    country_name = feature['properties']['name']
    #if you pass over a country that isnt in the corona dataframe an IndexError appears
    try:
        update_figure(country_name)
    except IndexError:
        pass
        
geo.on_hover(on_hover)



In [15]:
#a fullscreen button
control2 = FullScreenControl()
m.add_control(control2)


In [16]:
#adding a html widget to show precise numbers 
html = HTML('''Hover over a country''')
html.layout.margin = '0px 20px 20px 20px'
control = WidgetControl(widget=html, position='topright')
m.add_control(control)

In [18]:
#updating html values
def update_html(feature,  **kwargs):

        html.value = '''
        <h3><b>{}</b></h3>
        <h4>Reported cases: {}</h4> 
        <h4>Active cases: {}</h4>
        <h4>Deaths: {}</h4>
        <h4>Recovered: {}</h4>
    '''.format(country_name,
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Reported cases'].values[0][-1]),
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Active cases'].values[0][-1]),
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Deaths'].values[0][-1]), 
               np.int_(corona_data.loc[corona_data['Country'] == country_name]['Recovered'].values[0][-1]))

geo.on_hover(update_html)
