In [None]:
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

#all data from barnim, splitted because of the downloads restriction of the page
#https://www.marktstammdatenregister.de/MaStR/Einheit/Einheiten/OeffentlicheEinheitenuebersicht
df_solar = pd.read_csv("data/Stromerzeuger_solar.csv", delimiter=';')
df_rest = pd.read_csv("data/Stromerzeuger_rest.csv", delimiter=';')
frames = [df_solar, df_rest]

df_org = pd.concat(frames)
df_org

In [None]:
df = df_org.copy()
df.rename(columns={'Betriebs-Status': 'betriebsstatus', 'Energieträger': 'energietraeger','Bruttoleistung der Einheit': 'bruttoleistung','Ort': 'ort','Inbetriebnahmedatum der Einheit': 'datum_inbetriebnahme'}, inplace=True)

#only producers currently producing
df = df[df.betriebsstatus == "In Betrieb"]
# for the other ones there is no date when they were shut down, so I excluded them for now

#make bruttoleistung numerical
df['bruttoleistung'] = df['bruttoleistung'].str.replace(',','.')
df['bruttoleistung'] = pd.to_numeric(df['bruttoleistung']) 

#make date_inbetriebname to datetime
df['datum_inbetriebnahme'] = pd.to_datetime(df['datum_inbetriebnahme']) 
#remove 1921 date
df = df[df.datum_inbetriebnahme > '1989-01-01']
#extract years
df['year'] = df['datum_inbetriebnahme'].dt.year

df


In [None]:
# bruttoleistung of different energy producers per location
# Create pivot table
pivot_table_df = pd.pivot_table(
    df,
    index='energietraeger',
    columns='ort',
    values='bruttoleistung',
    aggfunc=np.sum,
    margins=True
)

# Sort table
pivot_table_df.sort_values(
    by=['All'],
    inplace=True,
    ascending=False
)

pivot_table_df = pivot_table_df.rename_axis(None).transpose().reset_index(level=0).fillna(0)
pivot_table_df = pivot_table_df[pivot_table_df.ort != "All"]
pivot_table_df

fig = px.bar(pivot_table_df, x="ort", y=['Solare Strahlungsenergie','Speicher','Wind','Erdgas','Biomasse','andere Gase','Mineralölprodukte','Wasser'], title="Energy Production per Category in Barnim")
fig.show()

In [None]:
# how many energy things were registered each year in which region? ("how many" as in the energy production entitities)

year_df = df.filter(['year','energietraeger','bruttoleistung','datum_inbetriebnahme','ort'])
year_df = year_df.groupby(['year','energietraeger'], as_index=False).sum()
year_df = year_df.reset_index(level=0)
#print(year_df)

fig2 = px.line(year_df, x="year", y="bruttoleistung",color="energietraeger",markers=True, title="New Energy productions per year, using gross power production in Barnim")
fig2.show()

#accumulated sum of gross power
year_df["brutto_total_year"] = year_df.groupby("energietraeger").bruttoleistung.cumsum()

fig2 = px.line(year_df, x="year", y="brutto_total_year",color="energietraeger",markers=True, title="New Energy productions per year, using the accumulated gross power production in Barnim")
fig2.show()


In [None]:
df = px.data.gapminder().query("continent == 'Oceania'")
print(df)
fig = px.line(df, x='year', y='lifeExp', color='country', symbol="country")
fig.show()