# COVID-19 in the Czech Republic

In [None]:
# Imports
import geopandas as gpd
import pandas as pd
from math import ceil
import json
import os
from datetime import datetime
import requests

# Bokeh
from bokeh.io import output_notebook, show, output_file, export_png
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, LogColorMapper, ColorBar, LogTicker
from bokeh import palettes

# Image Processing
from PIL import Image, ImageDraw, ImageFont

In [65]:
# Sources
covid_data_source = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19/kraj-okres-nakazeni-vyleceni-umrti.csv"
# looks like they've removed this data set, might have to shift source to another one?

# covid_data_source = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19/kraj-okres-testy.csv"
covidfile = './sources/kraj-okres-nakazeni-vyleceni-umrti.csv'

shapefile = './sources/cz_shapefile/JTSK/SPH_OKRES.shp'

In [66]:
# Get fresh covid data if file more than 1 day old

def hours_since_modified(target):
    now = datetime.timestamp(datetime.now())
    target = os.stat(target).st_mtime
    diff_hours = ( now - target ) / 60 / 60
    return round(diff_hours)

def download_covid_data(url):
    r = requests.get(url)
    with open(covidfile, 'w') as f:
        f.write(r.text)

if not os.path.exists(covidfile) or hours_since_modified(covidfile) < 24:
    print('file is {} hours old. Updating file..'.format(hours_since_modified(covidfile)), end="")
    download_covid_data(covid_data_source)
    size = round(os.path.getsize(covidfile) / (1024*1024), 2)
    with open(covidfile) as f:
        last_line = f.readlines()[-1]
    print('Downloaded {} MB!\nLast line: {}'.format(size, last_line))

file is 0 hours old. Updating file..Downloaded 0.86 MB!
Last line: 2021-01-25,,,29,1908,0



In [67]:
# Load into DataFrames

# Read data into Pandas
gdf = gpd.read_file(shapefile)[['KOD_LAU1','NAZEV_LAU1','geometry']]
df = pd.read_csv(covidfile)

# Simplify topology 
gdf['geometry'] = gdf['geometry'].simplify(300) # Probably need to preserve_topology=True

# Drop NaN values
df.dropna(inplace=True)

In [68]:
df

Unnamed: 0,datum,kraj_nuts_kod,okres_lau_kod,kumulativni_pocet_nakazenych,kumulativni_pocet_vylecenych,kumulativni_pocet_umrti
0,2020-03-01,CZ010,CZ0100,2,0,0
1,2020-03-01,CZ020,CZ020A,0,0,0
2,2020-03-01,CZ020,CZ020B,0,0,0
3,2020-03-01,CZ020,CZ020C,0,0,0
4,2020-03-01,CZ020,CZ0201,0,0,0
...,...,...,...,...,...,...
25812,2021-01-25,CZ080,CZ0802,20736,19518,391
25813,2021-01-25,CZ080,CZ0803,23484,21864,429
25814,2021-01-25,CZ080,CZ0804,13546,12625,182
25815,2021-01-25,CZ080,CZ0805,20688,19227,289


In [69]:
gdf

Unnamed: 0,KOD_LAU1,NAZEV_LAU1,geometry
0,CZ0100,Hlavní město Praha,"POLYGON ((-736538.020 -1053708.250, -737677.83..."
1,CZ0201,Benešov,"POLYGON ((-746500.570 -1072617.070, -746261.63..."
2,CZ0202,Beroun,"POLYGON ((-760901.670 -1049328.700, -761489.83..."
3,CZ0203,Kladno,"POLYGON ((-776276.650 -1024382.940, -776267.08..."
4,CZ0204,Kolín,"POLYGON ((-675684.020 -1058939.760, -677411.67..."
...,...,...,...
72,CZ0806,Ostrava-město,"POLYGON ((-468269.230 -1094196.750, -467350.78..."
73,CZ0724,Zlín,"POLYGON ((-511885.360 -1151869.790, -511474.07..."
74,CZ0721,Kroměříž,"POLYGON ((-559576.450 -1158750.950, -558171.22..."
75,CZ0722,Uherské Hradiště,"POLYGON ((-536685.800 -1172033.730, -535832.16..."


In [70]:
# Calculate and add columns with new cases per day and 7 day running sum of new cases

df_extended = pd.DataFrame()

# iterate through regions
for okres in df.okres_lau_kod.unique():
    
    # Subselect items for current region only
    okres_data = df.loc[ df.okres_lau_kod == okres].sort_values('datum')
    
    # Cases
    okres_data['new_cases'] =  okres_data.kumulativni_pocet_nakazenych - okres_data.kumulativni_pocet_nakazenych.shift(1)
    okres_data['week_total'] = okres_data['new_cases'].rolling(7).sum()
    
    # Deaths (WIP)
    okres_data['new_deaths'] =  okres_data.kumulativni_pocet_umrti - okres_data.kumulativni_pocet_umrti.shift(1)
    okres_data['week_deaths'] = okres_data['new_deaths'].rolling(7).sum()
    
    df_extended = df_extended.append(okres_data)

df_extended.fillna(0, inplace=True)

max_cases = ceil( df_extended['week_total'].max() / 1000 ) * 1000
max_deaths = ceil( df_extended['week_deaths'].max() / 20 ) * 20

In [None]:
# Iterate over days and export visualisation to PNG
for date in df_extended.datum.unique():
    
    # Limit to one day for now
    df_today = df_extended.loc[df_extended['datum'] == str(date)]
    
    # Merge datasets
    
    # A: New Cases
#     merged = gdf.merge(df_today, left_on='KOD_LAU1', right_on='okres_lau_kod')[
#         ['okres_lau_kod','NAZEV_LAU1','datum','geometry','new_cases','week_total']
#     ]
    
    # B: Deaths
    merged = gdf.merge(df_today, left_on='KOD_LAU1', right_on='okres_lau_kod')[
        ['okres_lau_kod','NAZEV_LAU1','datum','geometry','new_deaths','week_deaths']
    ]
    
    merged_json = json.loads(merged.to_json())
    
    json_data = json.dumps(merged_json)

    # Export JSON to file if ya wanna
#     with open('merged_json.json', 'w') as f:
#         json.dump(merged_json, f)

    geosource = GeoJSONDataSource(geojson = json_data)

    # Color palette
    palette = palettes.Plasma256

    # Invert pallete so that highest number is darkest
    palette = palette[::-1]
    palette = tuple(list(['#e4e4e4']) + list(palette)) # Add gray for 0 value color

    # A: Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
    color_mapper = LinearColorMapper(palette = palette, low = 1, high = max_deaths)
    
    # B: Or do it in Log
#     color_mapper = LogColorMapper(palette = palette, low = 1e0, high = 1e5)
#     color_mapper = LogColorMapper(palette = palette, low = 1, high = 10000)
    
    #Create color bar. 
    color_bar = ColorBar(color_mapper=color_mapper, label_standoff=8,width = 500, height = 15,
        border_line_color='white',location = (0,50), orientation = 'horizontal')
        # removed ticker=LogTicker() for log
    
    #Create figure object.
    p = figure(title = None, plot_height = 600 , plot_width = 950, toolbar_location = None)
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    p.axis.visible = False

    #Add patch renderer to figure. 
    p.patches('xs','ys', 
              source = geosource,fill_color = {'field' :'week_deaths', 'transform' : color_mapper},
              line_color = 'white', line_width = 1, fill_alpha = 1)

    #Specify figure layout.
    p.add_layout(color_bar, 'below')
    
    # Export to PNG
    export_png(p, filename=f'./output/frames/DEATHS_v2_{date}.png')

    #Display figure inline in Jupyter Notebook.
    output_notebook()
    show(p)

## Add Text and Export to GIF

In [85]:
# Iterate over exported PNGs, add text, combine to GIF

frames = []
ms_per_frame = 15
    
for date in df.datum.unique():
    img = Image.open(f'./output/frames/DEATHS_v2_{date}.png')
    draw = ImageDraw.Draw(img)
    
    # Add large date title
    fnt = ImageFont.truetype('./fonts/Ubuntu-Medium.ttf', size=50)
    draw.text((640, 40), date, fill="black", font=fnt)
    
    # Add title to legend
    fnt = ImageFont.truetype('./fonts/Ubuntu-Medium.ttf', size=12)
    draw.text((30, 470), "CZECH REPUBLIC", fill="black", font=fnt)
    
    # Add title to legend
    fnt = ImageFont.truetype('./fonts/Ubuntu-Medium.ttf', size=12)
    draw.text((30, 485), "COVID-19 DEATHS IN LAST 7 DAYS", fill="black", font=fnt)

    frames.append(img)

for i in range(0, round(2000/ms_per_frame)):
    frames.append(frames[-1])
    
# gif_palette = ImagePalette.ImagePalette(mode='HEX', palette=list(palette[:-1]), size=256)
    
frames[0].save('./output/export_DEATHS_v2_2021-02-01.gif', format='GIF',
               append_images=frames[1:], save_all=True, duration=ms_per_frame, 
               disposal=1)