In [1]:
import pandas as pd

#convert pandas series from German number format (1.000,5) to python number format (1000.5)
def make_column_numeric(se):
    se = se.str.replace('.','',regex=True)
    se = se.str.replace(',','.',regex=True)
    se = pd.to_numeric(se)
    return se

#INKAR data from bbsr
#https://www.inkar.de
df = pd.read_csv("data/inkar_commuting.csv",delimiter=";", header=[0,1])
df.columns = df.columns.map('_'.join)

#rename unnamed headers
df.rename({"Kennziffer_Unnamed: 0_level_1": "Kennziffer","Raumeinheit_Unnamed: 1_level_1":"Raumeinheit","Aggregat_Unnamed: 2_level_1":"Aggregated"}, axis=1,inplace=True)


years = ["2016","2017","2018","2019"]
for year in years:
    sv = "sozialversicherungspflichtig Beschäftigte am Wohnort_" + str(year)
    df[sv] = make_column_numeric(df[sv])

    
    df["Einpendler_" + str(year)] = make_column_numeric(df["Einpendler_" + str(year)])
    df["Auspendler_" + str(year)] = make_column_numeric(df["Auspendler_" + str(year)])
    df["Pendlersaldo_" + str(year)] = make_column_numeric(df["Pendlersaldo_" + str(year)])

    df["Pendler mit Arbeitsweg 50 km und mehr_" + str(year)] = make_column_numeric(df["Pendler mit Arbeitsweg 50 km und mehr_" + str(year)])
    df["Pendler mit Arbeitsweg 150 km und mehr_" + str(year)] = make_column_numeric(df["Pendler mit Arbeitsweg 150 km und mehr_" + str(year)])
    df["Pendler mit Arbeitsweg 300 km und mehr_" + str(year)] = make_column_numeric(df["Pendler mit Arbeitsweg 300 km und mehr_" + str(year)])

    df["pendler_50km_gesamt_" + str(year)] = df["Pendler mit Arbeitsweg 50 km und mehr_" + str(year)] *  df[sv]
    df["pendler_150km_gesamt_" + str(year)] = df["Pendler mit Arbeitsweg 150 km und mehr_" + str(year)] *  df[sv]
    df["pendler_300km_gesamt_" + str(year)] = df["Pendler mit Arbeitsweg 300 km und mehr_" + str(year)] *  df[sv]

df.rename({"Kennziffer":"AGS"},axis=1,inplace=True)

# Load geo coordinates of centroids of kreise (made with qgis)
geo = pd.read_csv("../general_data/kreise_geocoordinates.csv")
geo = geo.filter(['AGS','xcoord','ycoord','GEN'])

#merge on AGS
df = df.merge(geo, on="AGS",how="inner")

#save
df.to_csv("data/inkar_commuting_output.csv")
df



Unnamed: 0,AGS,Raumeinheit,Aggregated,Einpendler_1997,Einpendler_1998,Einpendler_1999,Einpendler_2000,Einpendler_2001,Einpendler_2002,Einpendler_2003,...,pendler_300km_gesamt_2017,pendler_50km_gesamt_2018,pendler_150km_gesamt_2018,pendler_300km_gesamt_2018,pendler_50km_gesamt_2019,pendler_150km_gesamt_2019,pendler_300km_gesamt_2019,xcoord,ycoord,GEN
0,1001,"Flensburg, Stadt",kreisfreie Stadt,4748,4825,4887,4967,4974,4987,5021,...,112175.45,446649.30,256357.10,128020.50,463188.59,270220.28,147716.11,9.438752,54.785155,Flensburg
1,1002,"Kiel, Stadt",kreisfreie Stadt,4348,4444,4461,4515,4598,4623,4697,...,298002.36,1204712.08,420248.40,316316.00,1188370.44,418181.40,315244.44,10.131725,54.325195,Kiel
2,1003,"Lübeck, Stadt",kreisfreie Stadt,3683,3768,3879,3964,4038,4066,4136,...,230965.84,1182692.72,308322.30,235589.86,1181316.60,318539.30,250509.55,10.727704,53.873031,Lübeck
3,1004,"Neumünster, Stadt",kreisfreie Stadt,3883,3963,4066,4218,4400,4483,4616,...,103761.84,432184.97,126407.52,104754.38,439419.60,131165.55,110155.05,9.983916,54.081674,Neumünster
4,1051,Dithmarschen,Landkreis,1172,1163,1198,1264,1489,1280,1273,...,145986.33,799231.44,188193.88,142212.00,815608.64,196115.92,150371.76,9.107561,54.134110,Dithmarschen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,16073,Saalfeld-Rudolstadt,Landkreis,1148,1206,1180,1289,1313,1339,1400,...,123556.88,465587.33,206411.79,118190.80,443376.43,194563.53,116248.65,11.312354,50.640692,Saalfeld-Rudolstadt
397,16074,Saale-Holzland-Kreis,Landkreis,2647,2689,2844,2941,3008,3178,3335,...,124087.92,391555.65,193896.99,121741.32,380363.70,190181.85,117878.48,11.731716,50.904081,Saale-Holzland-Kreis
398,16075,Saale-Orla-Kreis,Landkreis,1583,1653,1819,1897,1992,1889,1969,...,93379.20,362650.08,152462.44,91681.20,357119.29,150187.18,88345.40,11.711224,50.580767,Saale-Orla-Kreis
399,16076,Greiz,Landkreis,2988,2975,2983,2940,2997,2959,3011,...,132436.02,463841.40,219653.82,130335.60,453674.04,212196.24,122830.44,12.074323,50.748627,Greiz


In [2]:
hamburg = df.copy()
hamburg = hamburg[hamburg["AGS"].isin([2000,3359,3353,1053,1062,1060,1056])]
hamburg.to_csv("data/communiting_hamburg.csv")
hamburg

Unnamed: 0,AGS,Raumeinheit,Aggregated,Einpendler_1997,Einpendler_1998,Einpendler_1999,Einpendler_2000,Einpendler_2001,Einpendler_2002,Einpendler_2003,...,pendler_300km_gesamt_2017,pendler_50km_gesamt_2018,pendler_150km_gesamt_2018,pendler_300km_gesamt_2018,pendler_50km_gesamt_2019,pendler_150km_gesamt_2019,pendler_300km_gesamt_2019,xcoord,ycoord,GEN
5,1053,Herzogtum Lauenburg,Landkreis,3163,3256,3350,3476,3587,3582,3638,...,213355.8,1152574.48,314675.84,221905.84,1159388.16,318529.82,221914.14,10.602534,53.589533,Herzogtum Lauenburg
8,1056,Pinneberg,Landkreis,3052,3150,3305,3492,3510,3460,3507,...,407823.9,960465.2,543421.1,436000.65,984302.34,558970.65,452316.48,9.735885,53.719215,Pinneberg
12,1060,Segeberg,Landkreis,3759,3765,3837,3944,4007,3997,4032,...,375866.72,1343271.72,490981.33,396774.98,1396813.96,508241.06,416553.92,10.141374,53.920087,Segeberg
14,1062,Stormarn,Landkreis,4838,4957,4985,5098,5148,5139,5202,...,364608.7,1101496.5,490495.45,376580.0,1117396.8,506171.2,387746.24,10.331402,53.720971,Stormarn
15,2000,"Hamburg, Stadt",kreisfreie Stadt,3451,3488,3489,3561,3584,3573,3603,...,3830927.32,6756440.02,4838869.46,4022403.87,6924824.71,5003397.21,4181026.24,10.018949,53.546596,Hamburg
35,3353,Harburg,Landkreis,3215,3299,3377,3493,3574,3595,3666,...,393541.68,1106524.93,539570.36,410763.15,1122250.4,564236.8,430438.0,9.96355,53.315626,Harburg
41,3359,Stade,Landkreis,2013,2066,2138,2163,2195,2223,2296,...,239434.66,964504.71,345216.69,251434.17,981735.22,355941.1,256608.7,9.416825,53.577931,Stade
