In [29]:
### Download industry energy consumption data for Landkreise
### from regionalstatistik.de and compile into csv with Regions as rows
### and years as columns.

In [30]:
###https://www.regionalstatistik.de/genesis/online?operation=table&code=43531-01-02-4-B&levelindex=0&levelid=1658896529187#astructure
###download data for all regions and all years as "flat"

In [31]:
#import necessary libraries

import pandas as pd
import numpy as np

In [32]:
#helper function pivot original table
def createWideFormat(df,column, values):
    #pivot table to wide format data
    orte = df["AGS"].unique()

    pivot_total = pd.DataFrame()

    for ort in orte:
            snippet = df[df["AGS"] == ort]

            temp = pd.pivot_table(snippet, index='Jahr',columns=column, values=values)
            temp = temp.reset_index(level=0)

            temp["ort_ags"] = ort
            temp["ort_name"]= str(snippet["1_Auspraegung_Label"].iloc[0]).lstrip()

            pivot_total = pd.concat([pivot_total, temp])
            
    return pivot_total

In [33]:
#load data from csv
ie_df = pd.read_csv("data/industry.csv",delimiter=";",encoding="latin1")

ie_df

Unnamed: 0,Statistik_Code,Statistik_Label,Zeit_Code,Zeit_Label,Zeit,1_Merkmal_Code,1_Merkmal_Label,1_Auspraegung_Code,1_Auspraegung_Label,2_Merkmal_Code,2_Merkmal_Label,2_Auspraegung_Code,2_Auspraegung_Label,VBR001__Energieverbrauch_(einschl._nichtenergetischem_V.)__Tsd._MJ
0,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2020,KREISE,Kreise und kreisfreie Städte,DG,Deutschland,ENRNW1,Energieträger,ENRGTRNW0,insgesamt,3747109390
1,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2020,KREISE,Kreise und kreisfreie Städte,DG,Deutschland,ENRNW1,Energieträger,ENRGTRNW1,Kohle,583625959
2,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2020,KREISE,Kreise und kreisfreie Städte,DG,Deutschland,ENRNW1,Energieträger,ENRGTRNW2,Heizöl,61319731
3,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2020,KREISE,Kreise und kreisfreie Städte,DG,Deutschland,ENRNW1,Energieträger,ENRGTRNW3,Erdgas,1168708514
4,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2020,KREISE,Kreise und kreisfreie Städte,DG,Deutschland,ENRNW1,Energieträger,ENRGTRNW4,Erneuerbare Energien,153606273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77467,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2003,KREISE,Kreise und kreisfreie Städte,16077,"Altenburger Land, Kreis",ENRNW1,Energieträger,ENRGTRNW3,Erdgas,240813
77468,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2003,KREISE,Kreise und kreisfreie Städte,16077,"Altenburger Land, Kreis",ENRNW1,Energieträger,ENRGTRNW4,Erneuerbare Energien,-
77469,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2003,KREISE,Kreise und kreisfreie Städte,16077,"Altenburger Land, Kreis",ENRNW1,Energieträger,ENRGTRNW5,Strom,430856
77470,43531,Jahreserhebung ü. die Energieverwendung der Be...,JAHR,Jahr,2003,KREISE,Kreise und kreisfreie Städte,16077,"Altenburger Land, Kreis",ENRNW1,Energieträger,ENRGTRNW6,Wärme,.


In [34]:
#rename AGS and year columns for merging with population
ie_df.rename({"1_Auspraegung_Code":"AGS","2_Auspraegung_Label":"Energietyp","Zeit":"Jahr","VBR001__Energieverbrauch_(einschl._nichtenergetischem_V.)__Tsd._MJ":"Energieverbrauch"},axis=1,inplace=True)

#set empty cells to nan and convert to numeric
ie_df["Energieverbrauch"] = ie_df["Energieverbrauch"].replace({'-':0,".":np.NaN})
ie_df["Energieverbrauch"] = pd.to_numeric(ie_df["Energieverbrauch"])

#create wide format data
pivot = createWideFormat(ie_df,"Energietyp","Energieverbrauch")
pivot["Anteil_Erneuerbar"] = pivot["Erneuerbare Energien"] / pivot["insgesamt"] * 100
pivot

Energietyp,Jahr,Erdgas,Erneuerbare Energien,Heizöl,Kohle,Sonstige Energieträger,Strom,Wärme,insgesamt,ort_ags,ort_name,Anteil_Erneuerbar
0,2003,1.013851e+09,59425545.0,285107207.0,660462603.0,418927983.0,854459979.0,112208059.0,3.404443e+09,DG,Deutschland,1.745529
1,2004,1.035524e+09,84931606.0,269248048.0,838349544.0,443621664.0,878001673.0,110378173.0,3.660054e+09,DG,Deutschland,2.320501
2,2005,1.042020e+09,101389083.0,332050567.0,715638646.0,400588448.0,891448655.0,118128163.0,3.601264e+09,DG,Deutschland,2.815375
3,2006,1.038921e+09,101034069.0,315453665.0,749290655.0,602259640.0,893774767.0,153365492.0,3.854099e+09,DG,Deutschland,2.621470
4,2007,1.067395e+09,140056029.0,279773855.0,832459933.0,827447483.0,919395218.0,156783228.0,4.223310e+09,DG,Deutschland,3.316262
...,...,...,...,...,...,...,...,...,...,...,...,...
13,2016,3.669870e+05,,43101.0,,4026.0,664679.0,11674.0,1.102775e+06,16077,"Altenburger Land, Kreis",
14,2017,3.626500e+05,,41589.0,,4059.0,684864.0,13311.0,1.120508e+06,16077,"Altenburger Land, Kreis",
15,2018,3.470580e+05,0.0,40064.0,,,661013.0,12766.0,1.079857e+06,16077,"Altenburger Land, Kreis",0.000000
16,2019,3.655850e+05,0.0,52241.0,,,622395.0,27840.0,1.085825e+06,16077,"Altenburger Land, Kreis",0.000000


In [35]:
#correct AGS for Deutschland
pivot['ort_ags']=pivot['ort_ags'].replace('DG',0)

#change year column to string to facilitate renaming columns after second pivot
pivot['Jahr'] = pivot['Jahr'].astype(str)

In [36]:
#pivot data to include years and energy types as columns
piv_df = pivot.pivot(index=["ort_ags","ort_name"],columns="Jahr",values=['Erdgas', 'Erneuerbare Energien', 'Heizöl', 'Kohle',
       'Sonstige Energieträger', 'Strom', 'Wärme', 'insgesamt', 'Anteil_Erneuerbar'])

#collapse levels of column names and join energy type to year
piv_df.columns = piv_df.columns.map('_'.join)

#prepend all columns with indicator name
piv_df=piv_df.add_prefix('energy_industry_consumption_')

#reset index to recreate AGS and Name columns
piv_df.reset_index(inplace=True)

#rename regional id and name and strip whitespace from Name column
piv_df.rename(columns={'ort_name':'Name','ort_ags':'AGS'},inplace=True)
piv_df['Name']=piv_df['Name'].str.strip()
piv_df['AGS']=pd.to_numeric(piv_df['AGS'])

#set AGS as index
piv_df.set_index('AGS',inplace=True)
piv_df.sort_index(inplace=True)

piv_df

Unnamed: 0_level_0,Name,energy_industry_consumption_Erdgas_2003,energy_industry_consumption_Erdgas_2004,energy_industry_consumption_Erdgas_2005,energy_industry_consumption_Erdgas_2006,energy_industry_consumption_Erdgas_2007,energy_industry_consumption_Erdgas_2008,energy_industry_consumption_Erdgas_2009,energy_industry_consumption_Erdgas_2010,energy_industry_consumption_Erdgas_2011,...,energy_industry_consumption_Anteil_Erneuerbar_2011,energy_industry_consumption_Anteil_Erneuerbar_2012,energy_industry_consumption_Anteil_Erneuerbar_2013,energy_industry_consumption_Anteil_Erneuerbar_2014,energy_industry_consumption_Anteil_Erneuerbar_2015,energy_industry_consumption_Anteil_Erneuerbar_2016,energy_industry_consumption_Anteil_Erneuerbar_2017,energy_industry_consumption_Anteil_Erneuerbar_2018,energy_industry_consumption_Anteil_Erneuerbar_2019,energy_industry_consumption_Anteil_Erneuerbar_2020
AGS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Deutschland,1.013851e+09,1.035524e+09,1.042020e+09,1.038921e+09,1.067395e+09,1.063796e+09,961175019.0,1.083953e+09,1.075943e+09,...,3.430804,3.279284,3.169453,3.274449,3.203685,3.318351,3.189882,3.795123,3.970612,4.099327
1,Schleswig-Holstein,1.671949e+07,1.595202e+07,1.649519e+07,1.624940e+07,1.728819e+07,1.759618e+07,17799405.0,2.159016e+07,2.329601e+07,...,3.893604,3.965060,,,,,0.930357,3.680699,3.474135,
2,Hamburg,2.083285e+07,2.269981e+07,1.969420e+07,2.205925e+07,2.176798e+07,2.145118e+07,18353140.0,2.125804e+07,1.908914e+07,...,0.022723,0.037820,,0.041547,0.044498,0.040096,0.041179,0.036517,0.036706,0.033292
3,Niedersachsen,1.382405e+08,1.406649e+08,1.494298e+08,1.391713e+08,1.378556e+08,1.382421e+08,127744815.0,1.380326e+08,1.378759e+08,...,2.809387,2.289350,1.514954,1.596101,1.737446,1.622609,1.615980,1.964327,1.882437,1.913769
4,Bremen,1.379781e+07,1.342768e+07,1.190057e+07,1.350348e+07,1.351306e+07,1.355372e+07,11043752.0,1.261753e+07,1.202208e+07,...,0.003861,0.003986,0.003382,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11008008,Berlin-Neukölln,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,...,,,,,,,,,,
11009009,Berlin-Treptow-Köpenick,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,...,,,,,,,,,,
11010010,Berlin-Marzahn-Hellersdorf,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,...,,,,,,,,,,
11011011,Berlin-Lichtenberg,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.0,0.000000e+00,0.000000e+00,...,,,,,,,,,,


In [37]:
#save dataframe to file
piv_df.to_csv("data/final_data_landkreis_energy_industryenergy.csv")

In [39]:
## DATASNIPPET FOR TESTING

barnim = pivot[pivot.ort_ags == "12060"]
barnim.drop(["ort_ags","ort_name"],axis= 1,inplace=True)

melt = pd.melt(barnim,id_vars=["Jahr"])
melt.to_csv("data/snippet/energy_industry_barnim.csv")
melt

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  barnim.drop(["ort_ags","ort_name"],axis= 1,inplace=True)


Unnamed: 0,Jahr,Energietyp,value
0,2003,Erdgas,426279.0
1,2004,Erdgas,439554.0
2,2005,Erdgas,402347.0
3,2006,Erdgas,378409.0
4,2007,Erdgas,392469.0
...,...,...,...
157,2016,Anteil_Erneuerbar,
158,2017,Anteil_Erneuerbar,
159,2018,Anteil_Erneuerbar,
160,2019,Anteil_Erneuerbar,
