Please head over to this website to view the results: https://walterra.github.io/covid-19/

*I really appreciate if you're interested in the code, however, proceed with caution: All of this is a bunch of spaghetti code that gets the job done. There's lot of room to improve things and make the Python code more readable and simple. (It wouldn't change a lot about the generated VEGA specs though, they are fine, thanks Altair!*

In [1]:
import altair as alt
import json
import pandas as pd

In [2]:
counties = ['B', 'K', 'Noe', 'Ooe', 'S', 'St', 'T', 'V', 'W']
counties_confirmed = ['dataTime', 'B_confirmed', 'K_confirmed', 'Noe_confirmed', 'Ooe_confirmed', 'S_confirmed', 'St_confirmed', 'T_confirmed', 'V_confirmed', 'W_confirmed']
counties_death = ['dataTime', 'B_death', 'K_death', 'Noe_death', 'Ooe_death', 'S_death', 'St_death', 'T_death', 'V_death', 'W_death']
counties_recovered = ['dataTime', 'B_recovered', 'K_recovered', 'Noe_recovered', 'Ooe_recovered', 'S_recovered', 'St_recovered', 'T_recovered', 'V_recovered', 'W_recovered']



In [3]:
df = pd.read_csv("../data/corin_data.csv")
# Use this line if only morning data is available and not the full day (removes last row)
# df = df[:-1]
df['dataTime'] = pd.to_datetime(df['dataTime'], format='%d.%m.%Y %H:%M:%S')

# Ignore the 9:00 numbers
df = df[df['dataTime'].dt.hour > 12]

df_confirmed = df[counties_confirmed]
df_confirmed.tail()

Unnamed: 0,dataTime,B_confirmed,K_confirmed,Noe_confirmed,Ooe_confirmed,S_confirmed,St_confirmed,T_confirmed,V_confirmed,W_confirmed
312,2020-09-07 15:00:00,548.0,666.0,4309.0,4923.0,1749.0,2654.0,4453.0,1185.0,9086.0
314,2020-09-08 15:00:00,557.0,672.0,4389.0,4969.0,1756.0,2666.0,4512.0,1210.0,9367.0
316,2020-09-09 15:00:00,567.0,674.0,4416.0,5043.0,1766.0,2700.0,4582.0,1249.0,9638.0
318,2020-09-10 15:00:00,576.0,680.0,4505.0,5086.0,1788.0,2733.0,4691.0,1271.0,9983.0
320,2020-09-11 15:00:00,588.0,686.0,4595.0,5154.0,1797.0,2782.0,4791.0,1329.0,10414.0


In [4]:
def c_line(att='total_confirmed',col='blue',scaleType='linear',label='value',highlight=False):
    the_base = alt.Chart(df_confirmed, width=180, height=120).transform_filter(
        alt.datum[att] > 0  
    ).transform_timeunit(
        date='yearmonthdate(dataTime)'
    ).transform_aggregate(
        max_att='max('+att+')',
        groupby=['date']
    )
    
    the_chart = the_base.mark_line(
        color=col,
        size=2
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50))
    )

    if highlight is False:
        return the_chart

    the_tooltip = the_base.mark_circle(
        color=col,
        size=200,
        opacity=0
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50)),
        tooltip=[alt.Tooltip('date:T', title='Datum'), alt.Tooltip('max_att:Q', title=label)]
    )
    
    the_highlight = the_base.transform_window(
        sort=[alt.SortField("date", order="descending")], 
        rank="rank(date)"
    ).transform_filter(
        alt.datum.rank == 1
    )
    
    the_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        width=500
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.Text("max_att:Q")
    )
    
    # transparent text so every small multiple has the same right padding
    the_alibi_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        opacity=0
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.value('100000')
    )
    
    the_mark = the_highlight.mark_circle(
        color=col,
        opacity=1
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
    )
    
    return the_chart + the_alibi_text + the_text + the_mark + the_tooltip

# ['B', 'K', 'Noe', 'Ooe', 'S', 'St', 'T', 'V', 'W']
def county_sm(highlight='T',title='Tirol'):
    highlight_color='#1f77b4'
    blur='#ddd'
    c_B = c_line('B_confirmed', highlight_color if highlight=='B' else blur, 'linear', 'Burgenland')
    c_K = c_line('K_confirmed', highlight_color if highlight=='K' else blur, 'linear', 'Kärnten')
    c_Noe = c_line('Noe_confirmed', highlight_color if highlight=='Noe' else blur, 'linear', 'Niederösterreich')
    c_Ooe = c_line('Ooe_confirmed', highlight_color if highlight=='Ooe' else blur, 'linear', 'Oberösterreich')
    c_S = c_line('S_confirmed', highlight_color if highlight=='S' else blur, 'linear', 'Salzburg')
    c_St = c_line('St_confirmed', highlight_color if highlight=='St' else blur, 'linear', 'Steiermark')
    c_T = c_line('T_confirmed', highlight_color if highlight=='T' else blur, 'linear', 'Tirol')
    c_V = c_line('V_confirmed', highlight_color if highlight=='V' else blur, 'linear', 'Vorarlberg')
    c_W = c_line('W_confirmed', highlight_color if highlight=='W' else blur, 'linear', 'Wien')
    c_highlight = c_line(highlight+'_confirmed', highlight_color, 'linear', title, True)

    return alt.layer(c_B, c_K, c_Noe, c_Ooe, c_S, c_St, c_T, c_V, c_W, c_highlight).properties(title=title)

chart_confirmed = (
    (county_sm('B', 'Burgenland') |
    county_sm('K', 'Kärnten') |
    county_sm('Noe', 'Niederösterreich')) &
    (county_sm('Ooe', 'Oberösterreich') |
    county_sm('S', 'Salzburg') |
    county_sm('St', 'Steiermark')) &
    (county_sm('T', 'Tirol') |
    county_sm('V', 'Vorarlberg') |
    county_sm('W', 'Wien'))
).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=12,
    fontWeight='normal',
#     anchor='start',
    color='gray'
)
    
chart_confirmed

In [5]:
df_death = df[counties_death]

def c_line(att='total_confirmed',col='blue',scaleType='linear',label='value',highlight=False):
    the_base = alt.Chart(df_death, width=180, height=120).transform_filter(
        alt.datum[att] > 0  
    ).transform_timeunit(
        date='yearmonthdate(dataTime)'
    ).transform_aggregate(
        max_att='max('+att+')',
        groupby=['date']
    )
    
    the_chart = the_base.mark_line(
        color=col,
        size=2
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50))
    )

    if highlight is False:
        return the_chart

    the_tooltip = the_base.mark_circle(
        color=col,
        size=200,
        opacity=0
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50)),
        tooltip=[alt.Tooltip('date:T', title='Datum'), alt.Tooltip('max_att:Q', title=label)]
    )
    
    the_highlight = the_base.transform_window(
        sort=[alt.SortField("date", order="descending")], 
        rank="rank(date)"
    ).transform_filter(
        alt.datum.rank == 1
    )
    
    the_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        width=500
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.Text("max_att:Q")
    )
    
    # transparent dummy text so every small multiple has the same right padding
    the_alibi_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        opacity=0
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.value('100000')
    )
    
    the_mark = the_highlight.mark_circle(
        color=col,
        opacity=1
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
    )
    
    return the_chart + the_alibi_text + the_text + the_mark + the_tooltip

# ['B', 'K', 'Noe', 'Ooe', 'S', 'St', 'T', 'V', 'W']
def county_sm(highlight='T',title='Tirol'):
    highlight_color='#d62728'
    blur='#ddd'
    c_B = c_line('B_death', highlight_color if highlight=='B' else blur, 'linear', 'Burgenland')
    c_K = c_line('K_death', highlight_color if highlight=='K' else blur, 'linear', 'Kärnten')
    c_Noe = c_line('Noe_death', highlight_color if highlight=='Noe' else blur, 'linear', 'Niederösterreich')
    c_Ooe = c_line('Ooe_death', highlight_color if highlight=='Ooe' else blur, 'linear', 'Oberösterreich')
    c_S = c_line('S_death', highlight_color if highlight=='S' else blur, 'linear', 'Salzburg')
    c_St = c_line('St_death', highlight_color if highlight=='St' else blur, 'linear', 'Steiermark')
    c_T = c_line('T_death', highlight_color if highlight=='T' else blur, 'linear', 'Tirol')
    c_V = c_line('V_death', highlight_color if highlight=='V' else blur, 'linear', 'Vorarlberg')
    c_W = c_line('W_death', highlight_color if highlight=='W' else blur, 'linear', 'Wien')
    c_highlight = c_line(highlight+'_death', highlight_color, 'linear', title, True)

    return alt.layer(c_B, c_K, c_Noe, c_Ooe, c_S, c_St, c_T, c_V, c_W, c_highlight).properties(title=title)

chart_death = (
    (county_sm('B', 'Burgenland') |
    county_sm('K', 'Kärnten') |
    county_sm('Noe', 'Niederösterreich')) &
    (county_sm('Ooe', 'Oberösterreich') |
    county_sm('S', 'Salzburg') |
    county_sm('St', 'Steiermark')) &
    (county_sm('T', 'Tirol') |
    county_sm('V', 'Vorarlberg') |
    county_sm('W', 'Wien'))
).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=12,
    fontWeight='normal',
#     anchor='start',
    color='gray'
)
    
chart_death

In [6]:
df_recovered = df[counties_recovered]

def c_line(att='total_confirmed',col='blue',scaleType='linear',label='value',highlight=False):
    the_base = alt.Chart(df_recovered, width=180, height=120).transform_filter(
        alt.datum[att] > 0  
    ).transform_timeunit(
        date='yearmonthdate(dataTime)'
    ).transform_aggregate(
        max_att='max('+att+')',
        groupby=['date']
    )
    
    the_chart = the_base.mark_line(
        color=col,
        size=2
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50))
    )

    if highlight is False:
        return the_chart

    the_tooltip = the_base.mark_circle(
        color=col,
        size=200,
        opacity=0
    ).encode(
        alt.X('date:T', title=''),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title='', axis=alt.Axis(minExtent=50)),
        tooltip=[alt.Tooltip('date:T', title='Datum'), alt.Tooltip('max_att:Q', title=label)]
    )
    
    the_highlight = the_base.transform_window(
        sort=[alt.SortField("date", order="descending")], 
        rank="rank(date)"
    ).transform_filter(
        alt.datum.rank == 1
    )
    
    the_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        width=500
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.Text("max_att:Q")
    )
    
    # transparent text so every small multiple has the same right padding
    the_alibi_text = the_highlight.mark_text(
        align='left',
        color=col,
        dx=5,
        opacity=0
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
        text=alt.value('100000')
    )
    
    the_mark = the_highlight.mark_circle(
        color=col,
        opacity=1
    ).encode(
        alt.X('date:T'),
        alt.Y('max_att:Q', scale=alt.Scale(type=scaleType), title=''),
    )
    
    return the_chart + the_alibi_text + the_text + the_mark + the_tooltip

# ['B', 'K', 'Noe', 'Ooe', 'S', 'St', 'T', 'V', 'W']
def county_sm(highlight='T',title='Tirol'):
    highlight_color='#ff7f0e'
    blur='#ddd'
    c_B = c_line('B_recovered', highlight_color if highlight=='B' else blur, 'linear', 'Burgenland')
    c_K = c_line('K_recovered', highlight_color if highlight=='K' else blur, 'linear', 'Kärnten')
    c_Noe = c_line('Noe_recovered', highlight_color if highlight=='Noe' else blur, 'linear', 'Niederösterreich')
    c_Ooe = c_line('Ooe_recovered', highlight_color if highlight=='Ooe' else blur, 'linear', 'Oberösterreich')
    c_S = c_line('S_recovered', highlight_color if highlight=='S' else blur, 'linear', 'Salzburg')
    c_St = c_line('St_recovered', highlight_color if highlight=='St' else blur, 'linear', 'Steiermark')
    c_T = c_line('T_recovered', highlight_color if highlight=='T' else blur, 'linear', 'Tirol')
    c_V = c_line('V_recovered', highlight_color if highlight=='V' else blur, 'linear', 'Vorarlberg')
    c_W = c_line('W_recovered', highlight_color if highlight=='W' else blur, 'linear', 'Wien')
    c_highlight = c_line(highlight+'_recovered', highlight_color, 'linear', title, True)

    return alt.layer(c_B, c_K, c_Noe, c_Ooe, c_S, c_St, c_T, c_V, c_W, c_highlight).properties(title=title)

chart_recovered = (
    (county_sm('B', 'Burgenland') |
    county_sm('K', 'Kärnten') |
    county_sm('Noe', 'Niederösterreich')) &
    (county_sm('Ooe', 'Oberösterreich') |
    county_sm('S', 'Salzburg') |
    county_sm('St', 'Steiermark')) &
    (county_sm('T', 'Tirol') |
    county_sm('V', 'Vorarlberg') |
    county_sm('W', 'Wien'))
).configure_axis(
    grid=True,
    gridColor="#eee",
    domainColor="#ddd",
    tickColor="#ddd",
    labelColor="gray",
    labelBound=True,
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=12,
    fontWeight='normal',
#     anchor='start',
    color='gray'
)
    
chart_recovered

In [7]:
# save chart_counties_confirmed as formatted JSON
with open('../data/chart_counties_confirmed.json', 'w') as jsonFile:
    json.dump(json.loads(chart_confirmed.to_json()), jsonFile, indent=4, sort_keys=True)

In [8]:
# save chart_counties_death as formatted JSON
with open('../data/chart_counties_death.json', 'w') as jsonFile:
    json.dump(json.loads(chart_death.to_json()), jsonFile, indent=4, sort_keys=True)

In [9]:
# save chart_counties_recovered as formatted JSON
with open('../data/chart_counties_recovered.json', 'w') as jsonFile:
    json.dump(json.loads(chart_recovered.to_json()), jsonFile, indent=4, sort_keys=True)