In [None]:
######################

#Download "data/mastr/powerProduction_alldata.csv" from the our google drive (too large for git)

#####################

import xml.etree.ElementTree as Xet
import pandas as pd
import numpy as np

#please download this file from our google drive

df_org = pd.read_csv("data/mastr/powerProduction_alldata.csv")
df_org

In [None]:
df = df_org.copy();

#only producers currently producing
df = df[df.betriebsstatus == 35]

#only registered in germany
df = df[df.land == 84]

#solve the problem with the kreisstädte double-naming
#exclude borken (see below)
kreisstaedte = ["Ansbach","Aschaffenburg","Augsburg","Bamberg","Bayreuth","Coburg","Fürth","Heilbronn","Hof","Kaiserslautern","Karlsruhe","Kassel","Landshut","Leipzig","München","Oldenburg","Osnabrück","Passau","Regensburg","Rosenheim","Rostock","Schweinfurt","Würzburg"]

def check_city(row):
    kreisstadt = row["landkreis"]
    if kreisstadt != row["gemeinde"]:
        row.landkreis = "Kreis " + kreisstadt
    else:
        row.landkreis = kreisstadt
    return row

df.update(df.loc[df['landkreis'].isin(kreisstaedte)].apply(
    lambda row: check_city(row),axis=1))


#correct mistakes in datawrapper map (germany districts 2021)
#df["landkreis"] = df['landkreis'].str.replace('Eisenach','Eifelkreis Bitburg-Prüm')
df["landkreis"] = df['landkreis'].str.replace('Borken','Kreis Borken') 

#correct mistakes in dataset
df["landkreis"] = df['landkreis'].str.replace('Osterode am Harz','Göttingen')
df.loc[df["landkreis"] == "Göttingen", "landkreis_id"] = 3159
#TODO: göttingen is not really working

#offshore windparks
df.loc[(df["wind_nordsee"] > 0) & (df["energy"] == "wind"), "landkreis"] = "offshoreNordsee"
df.loc[(df["wind_nordsee"] > 0) & (df["energy"] == "wind"), "landkreis_id"] = 000
df.loc[(df["wind_ostsee"] > 0) & (df["energy"] == "wind"), "landkreis"] = "offshoreOstsee"
df.loc[(df["wind_ostsee"] > 0) & (df["energy"] == "wind"), "landkreis_id"] = 000

other_offshore = ["11WD8BALT3W----6","BALTICERZ"]
df.loc[df["wind_kraftwerk"].isin(other_offshore) & (df["energy"] == "wind"), "landkreis_id"] = 000
df.loc[df["wind_kraftwerk"].isin(other_offshore) & (df["energy"] == "wind"), "landkreis"] = "offshoreOstsee"

#create list of other offshore unit names
x = 10
list = (["HS A"] * x) + ["HS B"] * x + (["HS C"] * x) + (["HS D"] * x) + (["HS E"] * x) + ["HS F"] * x + ["HS G"] * x
list += ["HS H"] * x + ["HS I"] * x + ["HS J"] * x + ["HS K"] * x + ["HS L"] * x + ["HS M"] * x + ["HS N"] * x + ["HS O"] * x
list += ["HS P"] * x + ["HS Q"] * x + ["HS R"] * x 

other_offshore_einheit = [item + str(i%10) for (i,item) in enumerate(list)]
other_offshore_einheit += ["AL 0" + str(i) for i in range(10)] + ["AL " + str(i) for i in range(10,60)]
other_offshore_einheit += ["HS I3a","HS H3a"]

df.loc[df["nameEinheit"].isin(other_offshore_einheit) & (df["energy"] == "wind"), "landkreis_id"] = 000
df.loc[df["nameEinheit"].isin(other_offshore_einheit) & (df["energy"] == "wind"), "landkreis"] = "offshoreNordsee"

#remove weird small things without landkreis
weird_things = ["Jan Wilkens Bioenergie","Pflanzenöl BHKW"]
df = df[~df["nameEinheit"].isin(weird_things)]

df["landkreis_id"] = df["landkreis_id"].astype(int)


In [None]:
#store all data with geo coordinates in file, so I can load in qgis
geo = df.copy()
geo = geo[geo["breitengrad"].notna()]
geo.to_csv("data/mastr/powerproduction_germany_geo.csv")

In [None]:
#this file was created with qgis
geo_to_remove = pd.read_csv("data/mastr/powerProductionOutsideGermany.csv")

#keep offshore cluster einheiten from remove list
geo_to_remove = geo_to_remove[(geo_to_remove.landkreis != "offshoreNordsee") & (geo_to_remove.landkreis != "offshoreOstsee")]

geo_to_remove

#drop data points outside of germany
cond = (df['laengengrad'].isin(geo_to_remove['laengengrad'])) & (df['breitengrad'].isin(geo_to_remove['breitengrad']))
df_germany = df.drop(df[cond].index)
df_germany.to_csv("data/mastr/powerProductionGermany.csv")


In [None]:
# bruttoleistung of different energy producers per location
# Create pivot table

print(df_germany["energy"].unique())


def createEnergyPerLandkreis(isBrutto):

    if isBrutto:
        value = 'bruttoleistung'
    else:
        value = "nettonennleistung"
    
    energyTotal = pd.pivot_table(
        df_germany,
        index='energy',
        columns=['landkreis',"landkreis_id"],
        values=value,
        aggfunc=np.sum,
        margins=False
    )

    energyTotal = energyTotal.rename_axis(None).transpose().reset_index(level=0).fillna(0)

    #sum up different energy types
    energyTotal["renewable_total"] = energyTotal[['solar','wasser','wind','biomasse',"geoSolarthermieGrubenKlaerschlammDruckentspannung"]].agg('sum', axis=1)
    energyTotal["not_renewable_total"] = energyTotal[['verbrennung','kernkraft']].agg('sum', axis=1)

    #percentage of renewables in landkreis
    energyTotal["percRenewableinRegion"] = energyTotal["renewable_total"] / (energyTotal["not_renewable_total"]+energyTotal["renewable_total"]) * 100
    
    energyTotal = energyTotal.reset_index(level = 0)
    energyTotal["landkreis_id"] = energyTotal["landkreis_id"].astype(int)
    
    return energyTotal


brutto = createEnergyPerLandkreis(True)
brutto.to_csv("data/mastr/powerPerLandkreis_brutto.csv")
brutto

netto = createEnergyPerLandkreis(True).reset_index(level=0)
netto.to_csv("data/mastr/powerPerLandkreis_netto.csv")
netto

#print(netto.landkreis.unique())
#print(netto[netto.landkreis == "offshoreNordsee"])



In [None]:
#area size dataset
area = pd.read_csv("../general_data/area.csv",delimiter=";")
area

#drop unimportant columns and prepare dataset
area.drop(area.columns.difference(['Schlüssel-nummer','Kreis/Landkreis',"Fläche\ninkm2","Bevölkerunginsgesamt"]), 1, inplace=True)
area = area.rename(columns={"Fläche\ninkm2": 'area'})
area['area'] = area['area'].str.replace(',','.')
area["area"] = pd.to_numeric(area["area"])
print(area)

#merge datasets, keep offshore
df2 = pd.merge(area,netto,how="outer",left_on="Schlüssel-nummer",right_on="landkreis_id")

#calculate renewable energy per km2
df2["renewable_pro_qkm"] = df2["renewable_total"] / df2["area"]
df2["solar_pro_qkm"] = df2["solar"] / df2["area"]
df2["wind_pro_qkm"] = df2["wind"] / df2["area"]

df2.to_csv("data/mastr/powerPerLandkreis_sqm_netto.csv")


In [None]:
#just some random testing and analyzing
df2 = pd.read_csv("data/mastr/powerPerLandkreis_sqm_netto.csv")

#calculate renewable energy per km2
df2["renewable_per_capita"] = df2["renewable_total"] / df2["Bevölkerunginsgesamt"]


#df2.to_csv("data/mastr/powerPerLandkreis_per_person.csv")

#df2[df2.landkreis == "Barnim"]
df2.loc[df2["wind_pro_qkm"].idxmax()]
#df2.loc[df2["wind_pro_qkm"] == 0.0]