In [None]:
import pandas as pd

dtype_mappings = {
    "Land": str,
    "Regierungsbezirk": str,
    "Kreis": str,
    "Verbandsgemeinde": str,
    "Gemeinde": str,
}

df_elec = pd.read_csv("../../data/raw/election_2021/btw21_wbz_ergebnisse.csv", sep=";", dtype=dtype_mappings)
df_elec = df_elec.dropna()

# Add municipality codes from Land, Regierungsbezirk, Kreis, and Gemeinde columns as strings
df_elec["municipality_code"] = (
    df_elec["Land"]
    + df_elec["Regierungsbezirk"]
    + df_elec["Kreis"]
    + df_elec["Verbandsgemeinde"].str.zfill(4)
    + df_elec["Gemeinde"].str.zfill(3)
)

df_elec["AGS"] = df_elec["Land"] + df_elec["Regierungsbezirk"] + df_elec["Kreis"] + df_elec["Gemeinde"].str.zfill(3)

df_elec.head(10)

## Load municipality data and compare unique municipality codes

In [None]:
from geoscore_de.data_flow.municipality import load_municipality_data

df_muni = load_municipality_data("../../data/raw/municipalities_2022.csv")

In [None]:
print(f"Unique municipality codes in election data: {df_elec['municipality_code'].nunique()}")
print(f"Unique municipality codes in municipality data: {df_muni['MU_ID'].nunique()}")

In [None]:
# join data and check for missing municipalities
df_merged = df_elec.merge(df_muni, left_on="AGS", right_on="AGS", how="outer", indicator=True)

In [None]:
df_merged.drop_duplicates(subset=["AGS"])["_merge"].value_counts()