In [9]:
### Download car count data and population data for Landkreise
### from regionalstatistik.de and compile into csv with Regions as rows
### and years as columns.

In [10]:
#pkw nach kraftstoffarten: https://www.regionalstatistik.de/genesis//online?operation=table&code=46251-02-01-4&bypass=true&levelindex=1&levelid=1658732979330#abreadcrumb
#kfz nach kfz-arten: https://www.regionalstatistik.de/genesis//online?operation=table&code=46251-01-02-4&bypass=true&levelindex=1&levelid=1658732979330#abreadcrumb
#population data: https://www-genesis.destatis.de/genesis//online?operation=table&code=12411-0015&bypass=true&levelindex=0&levelid=1657696365046#abreadcrumb
#manually added bundeslaender and german data from 1995 - 2021

In [11]:
# import necessary libraries

import pandas as pd
import numpy as np

In [12]:
#merge car data with population data
def mergeWithPopulation(df):
    df['Zeit'] =  pd.to_datetime(df['Zeit'])
    df['Jahr'] = df['Zeit'].dt.year
    df["AGS"].replace({"DG":0},inplace=True)
    df["AGS"] = pd.to_numeric(df["AGS"])

    #population data: https://www-genesis.destatis.de/genesis//online?operation=table&code=12411-0015&bypass=true&levelindex=0&levelid=1657696365046#abreadcrumb
    #manually added bundeslaender and german data from 1995 - 2021

    population = pd.read_csv("../general_data/population.csv",delimiter=";",encoding="latin1")
    population.rename({"1_Auspraegung_Code":"AGS"},axis=1,inplace=True)
    population["AGS"].replace({"DG":0},inplace=True)
    population["AGS"] = pd.to_numeric(population["AGS"])
    population["Zeit"] = population["Zeit"].str.replace(".","/",regex=True)
    population['Zeit'] =  pd.to_datetime(population['Zeit'],dayfirst=True)
    population['Jahr'] = population['Zeit'].dt.year

    #add population numbers
    df = pd.merge(df,population[['AGS','Jahr','BEVSTD__Bevoelkerungsstand__Anzahl']],on=['AGS','Jahr'], how='left')
    return df

In [13]:
#calculate car density per 1000 people
def calcCarDensity(df,carcolumn,resultcolumn = "PkW_pro_Tausend_Personen"):

    #make numeric
    df[carcolumn].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df[carcolumn] = pd.to_numeric(df[carcolumn])

    #name of population stays the same, because merged with same dataset
    df["BEVSTD__Bevoelkerungsstand__Anzahl"].replace({"-":np.NaN,".":np.NaN},inplace=True)
    df["BEVSTD__Bevoelkerungsstand__Anzahl"] = pd.to_numeric(df["BEVSTD__Bevoelkerungsstand__Anzahl"])
    df["Bevoelkerung_in_Tausend"] = df["BEVSTD__Bevoelkerungsstand__Anzahl"] / 1000

    #divide cars by population
    df[resultcolumn] = df[carcolumn] / df["Bevoelkerung_in_Tausend"]

    return df

In [14]:
#pivot table to wide format data
def createWideFormat(df,column, values):
    orte = df["AGS"].unique()

    pivot_total = pd.DataFrame()

    for ort in orte:
            snippet = df[df["AGS"] == ort]

            temp = pd.pivot_table(snippet, index='Jahr',columns=column, values=values)
            temp = temp.reset_index(level=0)

            temp["ort_ags"] = ort
            temp["ort_name"]= str(snippet["1_Auspraegung_Label"].iloc[0])

            pivot_total = pd.concat([pivot_total, temp])
            
    return pivot_total

# 1. Car density with fueltype (source: Regionalstatistik)

In [15]:


df = pd.read_csv("data/car_fueltype.csv",delimiter=";",encoding="latin1")

#prepare AGS and year for merging with population
df.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"fueltype"},axis=1,inplace=True)

#drop plugin hybrid (because already included in hybrid)
df = df[df.fueltype != "  darunter Plug-In-Hybrid"]

#merge with population numbers
df = mergeWithPopulation(df)

#calc cars per person
df = calcCarDensity(df,"PKWBES__Personenkraftwagen_nach_Kraftstoffarten__Anzahl")

#pivot table to wide format data
orte = df["AGS"].unique()

pivot_total = createWideFormat(df,"fueltype","PkW_pro_Tausend_Personen")

#calc total car numbers
pivot_total["Gesamt"] = pivot_total.iloc[:, -8:-2].sum(axis=1)

pivot_total.to_csv("data/car_density_wideformat.csv")
pivot_total

fueltype,Jahr,Benzin,Diesel,Elektro,Gas (einschl. bivalent),Hybrid,sonstige Kraftstoffarten,ort_ags,ort_name,Gesamt
0,2020,378.385765,181.725409,1.642919,5.455713,7.715204,0.123204,0,Deutschland,575.048213
1,2021,377.660093,180.930374,3.713283,5.163946,15.425209,0.121532,0,Deutschland,583.014437
0,2020,368.892859,199.421823,1.692618,4.415511,6.144544,0.106154,1,Schleswig-Holstein,580.673509
1,2021,367.834757,201.277547,4.110534,4.100609,12.212505,0.110198,1,Schleswig-Holstein,589.646151
0,2020,301.810216,182.867436,1.601174,3.146752,7.138568,0.166789,1001,"Flensburg, kreisfreie Stadt",496.730936
...,...,...,...,...,...,...,...,...,...,...
1,2021,420.485891,208.389219,1.556371,3.694799,8.224725,0.240415,16075,Saale-Orla-Kreis,642.591421
0,2020,426.045848,187.611205,0.486200,4.499938,5.430960,0.186204,16076,"Greiz, Kreis",624.260355
1,2021,422.384550,189.517388,1.435974,4.026971,10.540884,0.145679,16076,"Greiz, Kreis",628.051445
0,2020,429.172891,144.200733,0.633800,4.481869,3.644348,0.203721,16077,"Altenburger Land, Kreis",582.337362


In [18]:
#change year column to string to facilitate renaming columns after second pivot
pivot_total['Jahr'] = pivot_total['Jahr'].astype(str)

#add column for fossils
pivot_total['Fossils'] = pivot_total['Benzin']+pivot_total['Diesel']+pivot_total['Gas (einschl. bivalent)']

#pivot dataframe to have both energy types and years as columns
piv_df = pivot_total.pivot(index='ort_ags',columns=['Jahr'],values=['ort_name',"Gesamt",'Fossils',"Benzin","Diesel","Gas (einschl. bivalent)","Elektro","Hybrid","sonstige Kraftstoffarten"])

#collapse levels of column names and join energy type to year
piv_df.columns = piv_df.columns.map('_'.join)

#drop repetition of region name
piv_df.drop(columns=["ort_name_2020"],inplace=True)

#prepend all columns with indicator name
piv_df=piv_df.add_prefix('mobility_fueltype_cardensity_')

#remove prepends from regional id and name and strip whitespace from Name column
piv_df.rename(columns={'mobility_fueltype_cardensity_ort_name_2021':'Name'},inplace=True)
piv_df['Name']=piv_df['Name'].str.strip()
piv_df.index.rename('AGS',inplace=True)

piv_df

Unnamed: 0_level_0,Name,mobility_fueltype_cardensity_Gesamt_2020,mobility_fueltype_cardensity_Gesamt_2021,mobility_fueltype_cardensity_Fossils_2020,mobility_fueltype_cardensity_Fossils_2021,mobility_fueltype_cardensity_Benzin_2020,mobility_fueltype_cardensity_Benzin_2021,mobility_fueltype_cardensity_Diesel_2020,mobility_fueltype_cardensity_Diesel_2021,mobility_fueltype_cardensity_Gas (einschl. bivalent)_2020,mobility_fueltype_cardensity_Gas (einschl. bivalent)_2021,mobility_fueltype_cardensity_Elektro_2020,mobility_fueltype_cardensity_Elektro_2021,mobility_fueltype_cardensity_Hybrid_2020,mobility_fueltype_cardensity_Hybrid_2021,mobility_fueltype_cardensity_sonstige Kraftstoffarten_2020,mobility_fueltype_cardensity_sonstige Kraftstoffarten_2021
AGS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,Deutschland,575.048213,583.014437,565.566887,563.754413,378.385765,377.660093,181.725409,180.930374,5.455713,5.163946,1.642919,3.713283,7.715204,15.425209,0.123204,0.121532
1,Schleswig-Holstein,580.673509,589.646151,572.730193,573.212914,368.892859,367.834757,199.421823,201.277547,4.415511,4.100609,1.692618,4.110534,6.144544,12.212505,0.106154,0.110198
2,Hamburg,435.469679,438.587113,423.919204,418.058886,285.898672,280.960228,134.985679,134.21614,3.034854,2.882517,1.83268,3.794631,9.638441,16.652687,0.079353,0.080909
3,Niedersachsen,602.37291,610.8099,594.660833,592.75503,381.300197,379.290176,206.136726,206.788911,7.223911,6.675943,1.560708,4.527951,6.047289,13.423768,0.10408,0.103151
4,Bremen,433.946451,443.255581,426.227339,427.677493,289.188832,290.324822,132.339406,132.852795,4.699102,4.499877,1.105671,2.230721,6.529634,13.261627,0.083808,0.08574
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16073,"Saalfeld-Rudolstadt, Kreis",591.429327,601.996652,585.437492,589.775079,406.250306,407.630065,175.300326,178.440908,3.88686,3.704107,0.567854,1.465796,5.257541,10.607216,0.16644,0.14856
16074,Saale-Holzland-Kreis,598.35056,603.880601,590.320711,589.070813,403.786708,401.064075,181.631569,183.328688,4.902434,4.678051,0.712423,1.74518,7.100077,12.882818,0.217349,0.18179
16075,Saale-Orla-Kreis,634.33042,642.591421,629.960317,632.56991,421.6521,420.485891,203.988346,208.389219,4.319871,3.694799,0.602773,1.556371,3.54129,8.224725,0.22604,0.240415
16076,"Greiz, Kreis",624.260355,628.051445,618.156991,615.928909,426.045848,422.38455,187.611205,189.517388,4.499938,4.026971,0.4862,1.435974,5.43096,10.540884,0.186204,0.145679


# 2. car density total (source: Regionalstatistik)

In [9]:
df2 = pd.read_csv("data/car_total.csv",delimiter=";",encoding="latin1",low_memory=False)

#prepare AGS and year for merging with population
df2.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"cartype"},axis=1,inplace=True)

df2 = mergeWithPopulation(df2)
df2 = calcCarDensity(df2,"VER012__Kraftfahrzeugbestand__Anzahl","Fahrzeug_pro_Tausend_Personen")

pivot2 = createWideFormat(df2,"cartype","Fahrzeug_pro_Tausend_Personen")
pivot2.to_csv("data/car_density_total_wideformat.csv")
pivot2

melt = pd.melt(pivot2,id_vars=["ort_name","Jahr"],value_vars=['Insgesamt','Krafträder','Lkw','Pkw','Zugmaschinen'])
#melt['Jahr'] = melt["Jahr"].str.replace("year_", '')

melt.to_csv("data/car_density_total_longformat.csv")
#melt
pivot2

cartype,Jahr,Insgesamt,Krafträder,Lkw,Pkw,Zugmaschinen,ort_ags,ort_name
0,1996,581.119334,28.096479,27.451124,493.822392,23.188756,0,Deutschland
1,1997,590.903287,30.883341,27.990158,500.201414,23.158941,0,Deutschland
2,1998,597.791697,33.633149,28.579320,503.758919,23.154378,0,Deutschland
3,1999,605.661153,36.599596,29.390334,507.728501,23.187177,0,Deutschland
4,2000,616.663672,38.640406,30.283029,515.724425,23.234448,0,Deutschland
...,...,...,...,...,...,...,...,...
20,2016,670.778434,45.542371,46.655823,556.791512,16.516205,16077,"Altenburger Land, Kreis"
21,2017,683.022614,47.093216,48.659680,565.151682,16.756757,16077,"Altenburger Land, Kreis"
22,2018,689.651346,48.691715,50.223041,567.600257,17.632437,16077,"Altenburger Land, Kreis"
23,2019,699.327688,50.339512,50.876467,574.206034,18.334769,16077,"Altenburger Land, Kreis"


In [10]:
#change year column to string to facilitate renaming columns after second pivot
pivot2['Jahr'] = pivot2['Jahr'].astype(str)

#pivot dataframe to have both energy types and years as columns
piv2_df = pivot2.pivot(index=['ort_ags','ort_name'],columns=['Jahr'],values=["Insgesamt","Krafträder","Lkw","Pkw","Zugmaschinen"])

#collapse levels of column names and join energy type to year
piv2_df.columns = piv2_df.columns.map('_'.join)

#prepend all columns with indicator name
piv2_df=piv2_df.add_prefix('mobility_cartype_density_')

#reset index to recreate AGS and Name columns
piv2_df.reset_index(inplace=True)

#rename regional id and name and strip whitespace from Name column
piv2_df.rename(columns={'ort_name':'Name','ort_ags':'AGS'},inplace=True)
piv2_df['Name']=piv2_df['Name'].str.strip()

#set AGS as index
piv2_df.set_index('AGS',inplace=True)

piv2_df

Unnamed: 0_level_0,Name,mobility_cartype_density_Insgesamt_1996,mobility_cartype_density_Insgesamt_1997,mobility_cartype_density_Insgesamt_1998,mobility_cartype_density_Insgesamt_1999,mobility_cartype_density_Insgesamt_2000,mobility_cartype_density_Insgesamt_2001,mobility_cartype_density_Insgesamt_2002,mobility_cartype_density_Insgesamt_2003,mobility_cartype_density_Insgesamt_2004,...,mobility_cartype_density_Zugmaschinen_2011,mobility_cartype_density_Zugmaschinen_2012,mobility_cartype_density_Zugmaschinen_2013,mobility_cartype_density_Zugmaschinen_2014,mobility_cartype_density_Zugmaschinen_2015,mobility_cartype_density_Zugmaschinen_2016,mobility_cartype_density_Zugmaschinen_2017,mobility_cartype_density_Zugmaschinen_2018,mobility_cartype_density_Zugmaschinen_2019,mobility_cartype_density_Zugmaschinen_2020
AGS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Deutschland,581.119334,590.903287,597.791697,605.661153,616.663672,636.670285,645.845338,650.124181,655.534696,...,24.787141,25.185999,25.415420,25.638253,25.690678,25.950704,26.214197,26.553877,26.902928,27.245315
1,Schleswig-Holstein,602.533354,610.879555,618.566067,624.799129,632.631254,651.501347,660.133989,662.550019,667.011694,...,25.317725,25.636631,25.790895,25.896334,25.891362,25.930576,26.065974,26.306723,26.618128,26.788509
2,Hamburg,470.865101,474.077142,477.280895,476.407184,514.628143,545.747331,549.493697,546.382151,553.602370,...,3.822634,3.806785,3.812541,3.874538,3.888312,3.908446,4.018936,4.140282,4.220862,4.243505
3,Niedersachsen,601.938952,611.435647,623.755759,633.801508,638.596991,659.641225,664.323614,666.640979,672.284112,...,29.192644,29.564873,29.830080,30.034731,30.072923,30.363902,30.734888,31.213232,31.695450,32.053918
4,Bremen,484.353099,490.252759,493.686046,493.806791,498.869325,511.258226,515.641189,512.091916,510.532815,...,4.793141,4.907037,4.744513,4.839187,4.829565,4.895743,4.866144,4.915181,5.045493,5.168130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16073,"Saalfeld-Rudolstadt, Kreis",539.886955,553.320902,567.930589,580.304824,596.560936,621.409762,633.901705,645.406927,656.410823,...,30.242992,31.831191,33.062272,34.374259,35.432566,35.978396,37.106028,38.201888,39.612787,40.973575
16074,Saale-Holzland-Kreis,576.753749,594.841317,615.768218,635.115661,650.139999,677.219391,687.444202,694.825102,710.757625,...,36.656943,38.644076,40.963798,42.302837,42.420867,43.371783,44.872876,45.032570,46.088005,47.490823
16075,Saale-Orla-Kreis,624.559539,637.782280,650.270722,663.836790,686.597290,712.508199,722.359663,729.030364,746.552947,...,50.739511,53.461242,55.119899,56.751964,57.841376,59.274908,60.612753,61.359252,62.742803,64.496685
16076,"Greiz, Kreis",576.863806,596.443638,612.167722,632.722362,652.568439,680.219502,691.708839,705.548117,718.795795,...,29.196661,31.017358,32.241330,33.447752,34.742963,36.152311,37.773860,39.171141,41.202078,42.651136


# 3. Save to file

In [11]:
#save dataframe to file
piv_df.to_csv("data/final_data_landkreis_mobility_fueltype.csv")

In [12]:
#save dataframe to file
piv2_df.to_csv("data/final_data_landkreis_mobility_cartype.csv")