In [1]:
import json
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('../data/tirol_obituaries_deduped.csv', delimiter = ',')

In [3]:
df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')
df.head()

Unnamed: 0,date,year,week,municipaly,district,hash
0,2020-11-21,2020,47,Ramsau im Zillertal,Schwaz,1848114505
1,2020-11-21,2020,47,Ellmau,Kufstein,1983012736
2,2020-11-20,2020,47,Sistrans,Innsbruck-Land,-831010056
3,2020-11-20,2020,47,Landeck,Landeck,398421071
4,2020-11-20,2020,47,Erl,Kufstein,1674161021


In [4]:
df_count_by_day = (pd.to_datetime(df['date'])
       .dt.floor('d')
       .value_counts()
       .rename_axis('date')
       .reset_index(name='count'))

df_count_by_day = df_count_by_day.sort_values('date')

df_count_by_day.head(10)

Unnamed: 0,date,count
1632,2016-06-01,2
1382,2016-06-02,10
1194,2016-06-03,11
1308,2016-06-04,10
1337,2016-06-05,10
946,2016-06-06,13
1226,2016-06-07,11
1095,2016-06-08,12
488,2016-06-09,16
1081,2016-06-10,12


In [5]:
df_by_day_by_location = df.groupby(['date','municipaly']).size().reset_index(name='count')
df_by_day_by_location.head(10)

Unnamed: 0,date,municipaly,count
0,2016-06-01,Ellmau,1
1,2016-06-01,Ladis,1
2,2016-06-02,Hopfgarten im Brixental,1
3,2016-06-02,Kramsach,1
4,2016-06-02,Münster,1
5,2016-06-02,Nußdorf-Debant,1
6,2016-06-02,Pfons,1
7,2016-06-02,Schwaz,2
8,2016-06-02,St. Johann in Tirol,2
9,2016-06-02,Völs,1


In [6]:
df_by_day_by_district = df.groupby(['date','district']).size().reset_index(name='count')
df_by_day_by_district.head(10)

Unnamed: 0,date,district,count
0,2016-06-01,Kufstein,1
1,2016-06-01,Landeck,1
2,2016-06-02,Innsbruck-Land,2
3,2016-06-02,Kitzbühel,3
4,2016-06-02,Kufstein,2
5,2016-06-02,Lienz,1
6,2016-06-02,Schwaz,2
7,2016-06-03,Imst,2
8,2016-06-03,Innsbruck-Stadt,2
9,2016-06-03,Kitzbühel,1


In [7]:
zillertal = [
    "Aschau im Zillertal",
    "Brandberg",
    "Bruck am Ziller",
    "Finkenberg",
    "Fügen",
    "Fügenberg",
    "Gerlos",
    "Gerlosberg",
    "Hainzenberg",
    "Hart im Zillertal",
    "Hippach",
    "Kaltenbach",
    "Mayrhofen",
    "Ramsau im Zillertal",
    "Ried im Zillertal",
    "Rohrberg",
    "Schlitters",
    "Schwendau",
    "Strass im Zillertal",
    "Stumm",
    "Stummerberg",
    "Tux",
    "Uderns",
    "Zell am Ziller",
    "Zellberg"
]

df_zillertal = df[df['municipaly'].isin(zillertal)]
df_zillertal.head()

Unnamed: 0,date,year,week,municipaly,district,hash
0,2020-11-21,2020,47,Ramsau im Zillertal,Schwaz,1848114505
25,2020-11-19,2020,47,Brandberg,Schwaz,-862257137
31,2020-11-19,2020,47,Ried im Zillertal,Schwaz,-2009189618
34,2020-11-18,2020,47,Tux,Schwaz,1142640935
52,2020-11-18,2020,47,Mayrhofen,Schwaz,881520331


In [8]:
import time

idx = pd.date_range('06-01-2016', '20-11-2020')

df_count_by_day = df_count_by_day.set_index('date')
df_count_by_day = df_count_by_day.reindex(idx, fill_value=0)
df_count_by_day.index.name = 'date'
df_count_by_day = df_count_by_day.reset_index()

df_count_by_day.head()

Unnamed: 0,date,count
0,2016-06-01,2
1,2016-06-02,10
2,2016-06-03,11
3,2016-06-04,10
4,2016-06-05,10


In [9]:
import altair as alt
alt.data_transformers.disable_max_rows()
day = alt.Chart(df_count_by_day, width=900, height=200).properties(
    title='Todesfälle pro Tag mit 7-day moving average'
).mark_circle(
    color='lightblue',
    size=16
).encode(
    x='date:T',
    y='count:Q',
    tooltip=['date:T', 'count:Q']
)

# .transform_timeunit(
#     yearmonth='yearmonth(date)'
# ).transform_aggregate(
#     sum_deaths='sum(count)',
#     sum_buckets='distinct(yearmonth)'
# ).transform_calculate(
#     avg='datum.sum_deaths / datum.sum_buckets'
# )


#   "transform": [
#     {"timeUnit": "yearmonthdate", "field": "date", "as": "date"},
#     {
#       "aggregate": [{"op": "mean", "field": "temp", "as": "temp"}],
#       "groupby": ["date"]
#     },
#     {"calculate": "day(datum.date) == 0", "as": "sundays"},
#     {
#       "window": [{"op": "sum", "field": "sundays", "as": "week"}],
#       "sort": "date"
#     }
#   ],

ma = alt.Chart(df_count_by_day, width=900, height=200
              ).mark_line(
    color='#336699',
    size=1.5
).transform_window(
    rolling_mean='mean(count)',
    frame=[-7, 0]
).encode(
    x='date:T',
    y='rolling_mean:Q',
    tooltip=['date:T', 'rolling_mean:Q']
)

(day + ma).interactive()

In [10]:
df_by_location = df.groupby(['municipaly']).size().reset_index(name='count')
df_by_location = df_by_location.sort_values('count', ascending=False)
df_by_location.head(10)

Unnamed: 0,municipaly,count
88,Innsbruck,2969
114,Kufstein,849
126,Lienz,670
261,Wattens,662
197,Schwaz,492
271,Wörgl,421
72,Hall in Tirol,380
110,Kitzbühel,356
215,St. Johann in Tirol,344
118,Landeck,326


In [11]:
df_by_district = df.groupby(['district']).size().reset_index(name='count')
df_by_district = df_by_district.sort_values('count', ascending=False)
df_by_district.head(10)

Unnamed: 0,district,count
1,Innsbruck-Land,5103
4,Kufstein,3525
2,Innsbruck-Stadt,2969
8,Schwaz,2717
3,Kitzbühel,2312
6,Lienz,2143
5,Landeck,1570
0,Imst,1539
7,Reutte,999


In [12]:
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows()
chartDistrictsBars = alt.Chart().mark_bar().encode(
    x=alt.X('yearmonth(date):T', title=None),
    y=alt.Y('sum(count):Q', title=None),
    tooltip=['yearmonth(date):T', 'sum(count):Q']
).properties(
    width=225,
    height=100,
)

chartDistrictsLine = alt.Chart().mark_rule(color='lightblue').encode(
    y='avg:Q',
    size=alt.SizeValue(1.5),
    tooltip=['avg:Q']
).transform_timeunit(
    yearmonth='yearmonth(date)'
).transform_aggregate(
    sum_deaths='sum(count)',
    sum_buckets='distinct(yearmonth)'
).transform_calculate(
    avg='datum.sum_deaths / datum.sum_buckets'
)

chartDistricts = alt.layer(chartDistrictsBars, data=df_by_day_by_district).facet(
    facet=alt.Facet('district:N', title=None),
    columns=3,
    title={
      "text": ["Todesanzeigen Tirol pro Bundesland pro Monat seit Juni 2016"], 
      "subtitle": [
          "Quelle: https://github.com/walterra/covid-19-tirol-ds"
      ],
      "color": "black",
      "subtitleColor": "gray"
    },
)


chartDistricts

In [13]:
bars = alt.Chart(df_by_location.head(20)).mark_bar().encode(
    x='sum(count):Q',
    y=alt.Y('municipaly:N', sort='-x')
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='count:Q'
)

bars + text


In [14]:
# with open("../docs/data/vega_districts.json", "w") as jsonFile:
#     json.dump(json.loads(chartDistricts.to_json()), jsonFile, indent=4, sort_keys=True)

In [15]:
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows()

def municipalyChart(district='Landeck'):
    df_sm = df[df['district']==district]

    df_sm = df_sm.groupby(['date','district','municipaly']).size().reset_index(name='count')

    bars = alt.Chart().mark_bar().encode(
        x=alt.X('yearmonth(date):T', title=None),
        y=alt.Y('sum(count):Q', title=None),
        tooltip=['yearmonth(date):T', 'sum(count):Q']
    ).properties(
        width=135,
        height=80,
    )
    
    line = alt.Chart().mark_rule(color='lightblue').encode(
        y='avg:Q',
        size=alt.SizeValue(1.5),
        tooltip=['avg:Q']
    ).transform_timeunit(
        yearmonth='yearmonth(date)'
    ).transform_aggregate(
        sum_deaths='sum(count)',
        sum_buckets='distinct(yearmonth)'
    ).transform_calculate(
        avg='datum.sum_deaths / datum.sum_buckets'
    )
    
    return alt.layer(bars, data=df_sm).facet(
        facet=alt.Facet('municipaly:N', title=None),
        columns=6,
        padding=0,
        title={
          "text": ["Zeitverlauf Todesanzeigen im Bezirk "+district+" pro Gemeinde pro Monat"], 
          "subtitle": [
#               "Durchschnitt pro Monat in Hellblau",
              "Quelle: https://github.com/walterra/covid-19-tirol-ds"
          ],
          "color": "black",
          "subtitleColor": "gray"
        },
    )

municipalyChart('Landeck')

In [16]:
# districts = {
#     "imst": "Imst",
#     "innsbruck-land": "Innsbruck-Land",
#     "innsbruck-stadt": "Innsbruck-Stadt",
#     "kitzbuehel": "Kitzbühel",
#     "kufstein": "Kufstein",
#     "landeck": "Landeck",
#     "reutte": "Reutte",
#     "schwaz": "Schwaz",
#     "lienz": "Lienz"
# }

# for d in districts:
#     chart = municipalyChart(districts[d])
#     with open("../docs/data/vega_" + d + ".json", "w") as jsonFile:
#         json.dump(json.loads(chart.to_json()), jsonFile, indent=4, sort_keys=True)

In [17]:
alt.renderers.enable('html')
alt.data_transformers.disable_max_rows()

def municipalyChartZillertal():
    df_sm = df_zillertal

    df_sm = df_sm.groupby(['date','district','municipaly']).size().reset_index(name='count')

    bars = alt.Chart().mark_bar().encode(
        x=alt.X('yearmonth(date):T', title=None),
        y=alt.Y('sum(count):Q', title=None),
        tooltip=['yearmonth(date):T', 'sum(count):Q']
    ).properties(
        width=135,
        height=80,
    )
    
    line = alt.Chart().mark_rule(color='lightblue').encode(
        y='avg:Q',
        size=alt.SizeValue(1.5),
        tooltip=['avg:Q']
    ).transform_timeunit(
        yearmonth='yearmonth(date)'
    ).transform_aggregate(
        sum_deaths='sum(count)',
        sum_buckets='distinct(yearmonth)'
    ).transform_calculate(
        avg='datum.sum_deaths / datum.sum_buckets'
    )
    
    return alt.layer(bars, data=df_sm).facet(
        facet=alt.Facet('municipaly:N', title=None),
        columns=6,
        padding=0,
        title={
          "text": ["Zeitverlauf Todesanzeigen im pro Zillertal-Gemeinde pro Monat"], 
          "subtitle": [
#               "Durchschnitt pro Monat in Hellblau",
              "Quelle: https://github.com/walterra/covid-19-tirol-ds"
          ],
          "color": "black",
          "subtitleColor": "gray"
        },
    )

municipalyChartZillertal()