In [2]:
import pandas as pd
import numpy as np


def mergeWithPopulation(df):
    df['Zeit'] =  pd.to_datetime(df['Zeit'])
    df['Jahr'] = df['Zeit'].dt.year
    df["AGS"].replace({"DG":0},inplace=True)
    df["AGS"] = pd.to_numeric(df["AGS"])

    population = pd.read_csv("../general_data/population.csv")
    population["AGS"].replace({"DG":0},inplace=True)
    population["AGS"] = pd.to_numeric(population["AGS"])

    #add population numbers
    df = pd.merge(df,population[['AGS','Jahr','BEVSTD__Bevoelkerungsstand__Anzahl']],on=['AGS','Jahr'], how='left')
    return df

def calcCarDensity(df,carcolumn,resultcolumn = "PkW_pro_Tausend_Personen"):

    #make numeric
    df[carcolumn].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df[carcolumn] = pd.to_numeric(df[carcolumn])

    #name of population stays the same, because merged with same dataset
    df["BEVSTD__Bevoelkerungsstand__Anzahl"].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df["BEVSTD__Bevoelkerungsstand__Anzahl"] = pd.to_numeric(df["BEVSTD__Bevoelkerungsstand__Anzahl"])
    df["Bevoelkerung_in_Tausend"] = df["BEVSTD__Bevoelkerungsstand__Anzahl"] / 1000

    #divide cars by population
    df[resultcolumn] = df[carcolumn] / df["Bevoelkerung_in_Tausend"]

    return df

def createWideFormat(df,column, values):
    #pivot table to wide format data
    orte = df["AGS"].unique()

    pivot_total = pd.DataFrame()

    for ort in orte:
            snippet = df[df["AGS"] == ort]

            temp = pd.pivot_table(snippet, index='Jahr',columns=column, values=values)
            temp = temp.reset_index(level=0)

            temp["ort_ags"] = ort
            temp["ort_name"]= str(snippet["1_Auspraegung_Label"].iloc[0])

            pivot_total = pd.concat([pivot_total, temp])
            
    return pivot_total
    
    

In [3]:
#car density with fueltype
#source: Regionalstatistik

df = pd.read_csv("data/car_fueltype.csv",delimiter=";",encoding="latin1")

#prepare AGS and year for merging with population
df.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"fueltype"},axis=1,inplace=True)

#drop plugin hybrid (because already included in hybrid)
df = df[df.fueltype != "  darunter Plug-In-Hybrid"]

#merge with population numbers
df = mergeWithPopulation(df)

#calc cars per person
df = calcCarDensity(df,"PKWBES__Personenkraftwagen_nach_Kraftstoffarten__Anzahl")

###IMPORTANT
#for 2021 there is not population data yet on landkreis level, so I will just use the data from 2020

#only 2020 data
df_2020 = df[df["Jahr"] == 2020]
#df_2020.to_csv("data/car_density.csv")

#pivot table to wide format data
orte = df["AGS"].unique()

pivot_total = createWideFormat(df,"fueltype","PkW_pro_Tausend_Personen")

#calc total car numbers
pivot_total["Gesamt"] = pivot_total.iloc[:, -8:-2].sum(axis=1)

pivot_total.to_csv("data/car_density_wideformat.csv")
pivot_total

fueltype,Jahr,Benzin,Diesel,Elektro,Gas (einschl. bivalent),Hybrid,sonstige Kraftstoffarten,ort_ags,ort_name,Gesamt
0,2020,378.385765,181.725409,1.642919,5.455713,7.715204,0.123204,0,Deutschland,575.048213
0,2020,368.892859,199.421823,1.692618,4.415511,6.144544,0.106154,1,Schleswig-Holstein,580.673509
0,2020,301.810216,182.867436,1.601174,3.146752,7.138568,0.166789,1001,"Flensburg, kreisfreie Stadt",496.730936
0,2020,296.551920,143.843699,1.427407,3.057571,6.719356,0.072992,1002,"Kiel, Landeshauptstadt, kreisfreie Stadt",451.672945
0,2020,312.000222,139.965531,1.436209,3.914828,5.225948,0.074127,1003,"Lübeck, Hansestadt, kreisfreie Stadt",462.616866
...,...,...,...,...,...,...,...,...,...,...
0,2020,406.250306,175.300326,0.567854,3.886860,5.257541,0.166440,16073,"Saalfeld-Rudolstadt, Kreis",591.429327
0,2020,403.786708,181.631569,0.712423,4.902434,7.100077,0.217349,16074,Saale-Holzland-Kreis,598.350560
0,2020,421.652100,203.988346,0.602773,4.319871,3.541290,0.226040,16075,Saale-Orla-Kreis,634.330420
0,2020,426.045848,187.611205,0.486200,4.499938,5.430960,0.186204,16076,"Greiz, Kreis",624.260355


In [19]:
#car density total
#source: Regionalstatistik

df2 = pd.read_csv("data/car_total.csv",delimiter=";",encoding="latin1")

#prepare AGS and year for merging with population
df2.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"cartype"},axis=1,inplace=True)

df2 = mergeWithPopulation(df2)
df2 = calcCarDensity(df2,"VER012__Kraftfahrzeugbestand__Anzahl","Fahrzeug_pro_Tausend_Personen")

pivot2 = createWideFormat(df2,"cartype","Fahrzeug_pro_Tausend_Personen")
pivot2.to_csv("data/car_density_total_wideformat.csv")
pivot2

melt = pd.melt(pivot2,id_vars=["ort_name","Jahr"],value_vars=['Insgesamt','Krafträder','Lkw','Pkw','Zugmaschinen'])
#melt['Jahr'] = melt["Jahr"].str.replace("year_", '')

#melt.to_csv("data/car_density_total_longformat.csv")
#melt
pivot2


  df2 = pd.read_csv("data/car_total.csv",delimiter=";",encoding="latin1")


cartype,Jahr,Insgesamt,Krafträder,Lkw,Pkw,Zugmaschinen,ort_ags,ort_name
0,1996,581.119334,28.096479,27.451124,493.822392,23.188756,0,Deutschland
1,1997,590.903287,30.883341,27.990158,500.201414,23.158941,0,Deutschland
2,1998,597.791697,33.633149,28.579320,503.758919,23.154378,0,Deutschland
3,1999,605.661153,36.599596,29.390334,507.728501,23.187177,0,Deutschland
4,2000,616.663672,38.640406,30.283029,515.724425,23.234448,0,Deutschland
...,...,...,...,...,...,...,...,...
20,2016,670.778434,45.542371,46.655823,556.791512,16.516205,16077,"Altenburger Land, Kreis"
21,2017,683.022614,47.093216,48.659680,565.151682,16.756757,16077,"Altenburger Land, Kreis"
22,2018,689.651346,48.691715,50.223041,567.600257,17.632437,16077,"Altenburger Land, Kreis"
23,2019,699.327688,50.339512,50.876467,574.206034,18.334769,16077,"Altenburger Land, Kreis"


In [20]:
melt

Unnamed: 0,ort_name,Jahr,cartype,value
0,Deutschland,1996,Insgesamt,581.119334
1,Deutschland,1997,Insgesamt,590.903287
2,Deutschland,1998,Insgesamt,597.791697
3,Deutschland,1999,Insgesamt,605.661153
4,Deutschland,2000,Insgesamt,616.663672
...,...,...,...,...
59000,"Altenburger Land, Kreis",2016,Zugmaschinen,16.516205
59001,"Altenburger Land, Kreis",2017,Zugmaschinen,16.756757
59002,"Altenburger Land, Kreis",2018,Zugmaschinen,17.632437
59003,"Altenburger Land, Kreis",2019,Zugmaschinen,18.334769


In [43]:
from dtaidistance import dtw


def get_most_different_cities:

    pkw = pivot2.copy()
    pkw = pkw[['Jahr','Pkw','ort_ags','ort_name']]

    deutschland = pkw[pkw["ort_ags"] == 0].Pkw

    orte = pkw.ort_ags.unique()

    distances = []

    for ags in orte:
        ort = pkw[pkw["ort_ags"] == ags].Pkw
        distance = dtw.distance(deutschland, ort)

        distances.append([distance,ags,pkw[pkw["ort_ags"] == ags].ort_name.iloc[0]])


    return sorted(distances, key=lambda x: x[0], reverse=True)

##this is not really helpful, because it kind of only reflects if a time series is in general a lot higher / lower 
# than the German average. I will now try to look at the gradient / derivative of the data



In [130]:
#comment this line out if you want to look at the total development
#deutschland = pkw[pkw["ort_ags"] == 0 && pkw["Jahr"] > 2009].Pkw

def get_most_different_cities_by_slope(use_recent_years = False):

    pkw = pivot2.copy()
    pkw = pkw[['Jahr','Pkw','ort_ags','ort_name']]

    
    if(use_recent_years):
         deutschland = pkw[(pkw["ort_ags"] == 0) & (pkw["Jahr"] > 2009)].Pkw
    else: 
        deutschland = pkw[pkw["ort_ags"] == 0].Pkw
    

    deutschland_slope = pd.Series(np.gradient(deutschland), name='slope')
    deutschland_slope

    orte = pkw.ort_ags.unique()

    distances_slope = []

    for ags in orte:
        if(use_recent_years):
            ort = pkw[(pkw["ort_ags"] == ags) & (pkw["Jahr"] > 2009)].Pkw
        else: 
            ort = pkw[pkw["ort_ags"] == ags].Pkw

        #skip places we only have old data of
        if ((use_recent_years and ort.size == (2021-2010)) or not use_recent_years):
            ort_slope = pd.Series(np.gradient(ort), name='slope')
            distance = dtw.distance(deutschland_slope, ort_slope)

            distances_slope.append([distance,ags,pkw[pkw["ort_ags"] == ags].ort_name.iloc[0]])

    return sorted(distances_slope, key=lambda x: x[0], reverse=True)

def get_total_difference():
    orte = pkw.ort_ags.unique()

    differences = []

    for ags in orte:
        ort_1996 = pkw[(pkw["ort_ags"] == ags) & (pkw["Jahr"] == 1996)].Pkw
        ort_2020 = pkw[(pkw["ort_ags"] == ags) & (pkw["Jahr"] == 2020)].Pkw

        if(ort_1996.size > 0 and ort_2020.size > 0):
            diff = ort_2020.iloc[0] - ort_1996.iloc[0]
            differences.append([diff,ags,pkw[pkw["ort_ags"] == ags].ort_name.iloc[0]])

    return sorted(differences, key=lambda x: x[0], reverse=True)

    


In [131]:
get_total_difference()


[[558.0338251662442, 3103, '      Wolfsburg, kreisfreie Stadt'],
 [210.19831210253028, 6436, '      Main-Taunus-Kreis'],
 [182.24070297878154, 9161, '      Ingolstadt'],
 [176.52058462600735, 9279, '      Dingolfing-Landau, Landkreis'],
 [172.1216726995172, 5966, '      Olpe, Kreis'],
 [171.35337326415674, 10046, '      St. Wendel, Landkreis'],
 [169.51757913442293, 9374, '      Neustadt a.d.Waldnaab, Landkreis'],
 [168.05852654376025, 9371, '      Amberg-Sulzbach, Landkreis'],
 [167.1781077850087, 9674, '      Haßberge, Landkreis'],
 [164.10597989055475, 10044, '      Saarlouis, Landkreis'],
 [162.77055445690985, 9372, '      Cham, Landkreis'],
 [162.76933708699607, 9673, '      Rhön-Grabfeld, Landkreis'],
 [162.0508623322155, 9377, '      Tirschenreuth, Landkreis'],
 [158.46232434120952, 7340, '      Südwestpfalz, Landkreis'],
 [158.35041674507374, 9473, '      Coburg, Landkreis'],
 [154.00427401816103, 9272, '      Freyung-Grafenau, Landkreis'],
 [153.9241181000831, 9571, '      Ans

In [109]:
get_most_different_cities_by_slope(True)

[[100.53178242170591, 5366, '      Euskirchen, Kreis'],
 [86.16600373264441, 3103, '      Wolfsburg, kreisfreie Stadt'],
 [77.25558197971378, 9184, '      München, Landkreis'],
 [70.86738530019072, 6433, '      Groß-Gerau, Landkreis'],
 [65.62016404223293,
  6414,
  '      Wiesbaden, Landeshauptstadt, kreisfreie Stadt'],
 [58.113901282729195, 9362, '      Regensburg'],
 [36.301830813116815, 5314, '      Bonn, kreisfreie Stadt'],
 [33.58749493060507, 5515, '      Münster, kreisfreie Stadt'],
 [31.856070800471354, 9661, '      Aschaffenburg'],
 [30.673673424258006, 9763, '      Kempten (Allgäu)'],
 [27.995013069890245, 9279, '      Dingolfing-Landau, Landkreis'],
 [27.49358194866716, 6436, '      Main-Taunus-Kreis'],
 [26.664917434562255, 16054, '      Suhl, kreisfreie Stadt'],
 [26.23096250645361, 16055, '      Weimar, kreisfreie Stadt'],
 [26.169288072535622, 1001, '      Flensburg, kreisfreie Stadt'],
 [25.82480318367041, 9464, '      Hof'],
 [25.796314296291786, 5558, '      Coesfeld