# COVID 19 Stats -- Data by Robert Koch Institut, processed by [NPGEO](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0)

## Boilerplate

In [1]:
import pandas as pd
import qgrid

%matplotlib widget
import matplotlib.pyplot as plt


def complex_sum(df: pd.DataFrame) -> pd.Series:
    """Calculate the sum of cases, deaths, recovered according to algorithm at
    https://www.arcgis.com/home/item.html?id=f10774f1c63e40168479a1feb6c7ca74
    """
    def h_sum(col: str, refcol: str) -> int:
        sum = 0
        for idx, row in df.iterrows():
            sum += row[col] if row[refcol] in [0,1] else 0
        return sum
    return pd.Series({
            "cases": h_sum("AnzahlFall", "NeuerFall"),
            "deaths": h_sum("AnzahlTodesfall", "NeuerTodesfall"),
            "recovered": h_sum("AnzahlGenesen", "NeuGenesen")
            })

## Load main data source

Data License: Robert Koch-Institut (RKI), [dl-de/by-2-0](https://www.govdata.de/dl-de/by-2-0).
Provided by [NPGEO](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/dd4580c810204019a7b8eb3e0b329dd6_0).

In [2]:
rki_cases = pd.read_csv("data/RKI_COVID19.csv")
rki_cases["Datenstand"] = pd.to_datetime(rki_cases["Datenstand"], format="%d.%m.%Y, %H:%M Uhr")
rki_cases["Meldedatum"] = pd.to_datetime(rki_cases["Meldedatum"], format="%Y/%m/%d %H:%M:%S")
rki_cases["Refdatum"] = pd.to_datetime(rki_cases["Refdatum"], format="%Y/%m/%d %H:%M:%S")

## Load county data
Used to get number of residents and size in km² of every county.
Dataset is taken from [NPGEO](https://npgeo-corona-npgeo-de.hub.arcgis.com/datasets/917fc37a709542548cc3be077a786c17_0)

In [3]:
rki_lkr = pd.read_csv("data/RKI_Corona_Landkreise.csv")

## Load state data
The destatis dataset is used to get number of residents and size in km² of every german state.
Dataset is extracted from [destatis](https://www.destatis.de/DE/Themen/Laender-Regionen/Regionales/Gemeindeverzeichnis/Administrativ/02-bundeslaender.xlsx?__blob=publicationFile).

In [4]:
destatis = pd.read_csv("data/destatis_bundeslaender.csv")

## Overview States

In [5]:
cases = rki_cases.groupby("Bundesland").apply(complex_sum)
states = pd.merge(cases, destatis[["Bundesland", "Flaeche", "Einwohner"]], on="Bundesland")
states["cases/km²"] = states["cases"]/states["Flaeche"]
states["cases/inhabitants"] = states["cases"]/states["Einwohner"]
states["deaths/km²"] = states["deaths"]/states["Flaeche"]
states["deaths/inhabitants"] = states["deaths"]/states["Einwohner"]
states.drop(columns=["Flaeche", "Einwohner"], inplace=True)
qgrid.show_grid(states, show_toolbar=True)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

## Overview Counties

In [6]:
lkr = rki_lkr[["county", "EWZ", "KFL"]]
cum_lkr = rki_cases.groupby("Landkreis").apply(complex_sum)
counties = pd.merge(lkr, cum_lkr, right_on="Landkreis", left_on="county")
counties["cases/km²"] = counties["cases"]/counties["KFL"]
counties["cases/inhabitants"] = counties["cases"]/counties["EWZ"]
counties["deaths/km²"] = counties["deaths"]/counties["KFL"]
counties["deaths/inhabitants"] = counties["deaths"]/counties["EWZ"]
counties.drop(["EWZ", "KFL"], axis="columns", inplace=True)
qgrid.show_grid(counties, show_toolbar=True)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

## Cases, recovered, deaths (all of Germany)

In [7]:
#datecol = "Refdatum"
datecol = "Meldedatum"

#per_day = rki_cases[rki_cases["Bundesland"] == "Bayern"].copy()
per_day = rki_cases.copy()

per_day[datecol] = rki_cases[datecol].dt.floor("D")
per_day = per_day.groupby(datecol).apply(complex_sum)
per_day.sort_values(datecol, inplace=True)
per_day["cum_cases"] = per_day["cases"].cumsum()
per_day["cum_deaths"] = per_day["deaths"].cumsum()
per_day["cum_recovered"] = per_day["recovered"].cumsum()
per_day.tail(5)

Unnamed: 0_level_0,cases,deaths,recovered,cum_cases,cum_deaths,cum_recovered
Meldedatum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-04-27,1124,32,122,157252,6544,128782
2020-04-28,1414,20,82,158666,6564,128864
2020-04-29,1384,2,75,160050,6566,128939
2020-04-30,1260,8,58,161310,6574,128997
2020-05-01,393,1,6,161703,6575,129003


In [8]:
fig, ax = plt.subplots(figsize=(10,8))
import matplotlib.dates as mdates

plt.plot(per_day.index, per_day.cum_cases, label="Cases")
plt.plot(per_day.index, per_day.cum_deaths, label="Deaths")
plt.plot(per_day.index, per_day.cum_recovered, label="Recovered")
ax.get_xaxis().set_major_locator(mdates.DayLocator(interval=5))
ax.get_xaxis().set_minor_locator(mdates.DayLocator(interval=1))

fig.autofmt_xdate()
plt.legend()
plt.grid()
plt.show()


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …