In [None]:
import pandas as pd
import numpy as np


def mergeWithPopulation(df):
    df['Zeit'] =  pd.to_datetime(df['Zeit'])
    df['Jahr'] = df['Zeit'].dt.year
    df["AGS"].replace({"DG":0},inplace=True)
    df["AGS"] = pd.to_numeric(df["AGS"])

    population = pd.read_csv("../general_data/population.csv")
    population["AGS"].replace({"DG":0},inplace=True)
    population["AGS"] = pd.to_numeric(population["AGS"])

    #add population numbers
    df = pd.merge(df,population[['AGS','Jahr','BEVSTD__Bevoelkerungsstand__Anzahl']],on=['AGS','Jahr'], how='left')
    return df

def calcCarDensity(df,carcolumn,resultcolumn = "PkW_pro_Tausend_Personen"):

    #make numeric
    df[carcolumn].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df[carcolumn] = pd.to_numeric(df[carcolumn])

    #name of population stays the same, because merged with same dataset
    df["BEVSTD__Bevoelkerungsstand__Anzahl"].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df["BEVSTD__Bevoelkerungsstand__Anzahl"] = pd.to_numeric(df["BEVSTD__Bevoelkerungsstand__Anzahl"])
    df["Bevoelkerung_in_Tausend"] = df["BEVSTD__Bevoelkerungsstand__Anzahl"] / 1000

    #divide cars by population
    df[resultcolumn] = df[carcolumn] / df["Bevoelkerung_in_Tausend"]

    return df

def createWideFormat(df,column, values):
    #pivot table to wide format data
    orte = df["AGS"].unique()

    pivot_total = pd.DataFrame()

    for ort in orte:
            snippet = df[df["AGS"] == ort]

            temp = pd.pivot_table(snippet, index='Jahr',columns=column, values=values)
            temp = temp.reset_index(level=0)

            temp["ort_ags"] = ort
            temp["ort_name"]= str(snippet["1_Auspraegung_Label"].iloc[0])

            pivot_total = pd.concat([pivot_total, temp])
            
    return pivot_total
    
    

In [None]:
#car density with fueltype
#source: Regionalstatistik

df = pd.read_csv("data/car_fueltype.csv",delimiter=";",encoding="latin1")

#prepare AGS and year for merging with population
df.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"fueltype"},axis=1,inplace=True)

#drop plugin hybrid (because already included in hybrid)
df = df[df.fueltype != "  darunter Plug-In-Hybrid"]

#merge with population numbers
df = mergeWithPopulation(df)

#calc cars per person
df = calcCarDensity(df,"PKWBES__Personenkraftwagen_nach_Kraftstoffarten__Anzahl")

###IMPORTANT
#for 2021 there is not population data yet on landkreis level, so I will just use the data from 2020

#only 2020 data
df_2020 = df[df["Jahr"] == 2020]
#df_2020.to_csv("data/car_density.csv")

#pivot table to wide format data
orte = df["AGS"].unique()

pivot_total = createWideFormat(df,"fueltype","PkW_pro_Tausend_Personen")

#calc total car numbers
pivot_total["Gesamt"] = pivot_total.iloc[:, -8:-2].sum(axis=1)

pivot_total.to_csv("data/car_density_wideformat.csv")
pivot_total

In [None]:
#car density total
#source: Regionalstatistik

df2 = pd.read_csv("data/car_total.csv",delimiter=";",encoding="latin1")

#prepare AGS and year for merging with population
df2.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"cartype"},axis=1,inplace=True)

df2 = mergeWithPopulation(df2)
df2 = calcCarDensity(df2,"VER012__Kraftfahrzeugbestand__Anzahl","Fahrzeug_pro_Tausend_Personen")

pivot2 = createWideFormat(df2,"cartype","Fahrzeug_pro_Tausend_Personen")
pivot2.to_csv("data/car_density_total_wideformat.csv")
pivot2

melt = pd.melt(pivot2,id_vars=["ort_name","Jahr"],value_vars=['Insgesamt','Krafträder','Lkw','Pkw','Zugmaschinen'])
#melt['Jahr'] = melt["Jahr"].str.replace("year_", '')

#melt.to_csv("data/car_density_total_longformat.csv")
#melt
pivot2
