In [None]:
import pandas as pd
import os
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objs as go

In [None]:
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', 500)

In [None]:
annuel_dfs = []
other_dfs = []

folder = r'./data_rte/Annuel/'
for file in os.listdir(folder):
    df = pd.read_csv(os.path.join(folder, file), encoding='latin-1', on_bad_lines='skip', sep='\t',na_values=['ND'], index_col=False)
    annuel_dfs.append(df)

folder = r'./data_rte/Autre/'
for file in os.listdir(folder):
    df = pd.read_csv(os.path.join(folder, file), encoding='latin-1', on_bad_lines='skip', sep='\t',na_values=['ND'], index_col=False)
    other_dfs.append(df)

annuel_df = pd.concat(annuel_dfs)
region_df = pd.concat(other_dfs)



In [None]:
annuel_df

### Modification global data frame

In [None]:
annuel_df['datetime'] = pd.to_datetime(annuel_df['Date']+ ' '+annuel_df['Heures'])
annuel_df = annuel_df.set_index('datetime')
annuel_df = annuel_df.assign(Production = annuel_df.Fioul + annuel_df.Charbon + annuel_df.Gaz + annuel_df.Nucléaire + annuel_df.Eolien + annuel_df.Solaire + annuel_df.Hydraulique + annuel_df.Bioénergies)


In [None]:
test = annuel_df.groupby('Date')['Consommation'].sum()
test.plot()

In [None]:
test2 = annuel_df.dropna().resample('M').asfreq()
test2.Consommation.plot()

In [None]:
annuel_df

In [None]:
fig = px.histogram(annuel_df, x=annuel_df.index, y=['Fioul','Charbon','Gaz','Nucléaire','Eolien','Solaire','Hydraulique','Pompage','Bioénergies'])
fig.show()

In [None]:
annuel_df['Production'].dropna().plot(label='Production',alpha=1)
annuel_df['Consommation'].dropna().plot(label='Consommation',alpha=0.6)
plt.legend()
plt.show()

In [None]:
test = annuel_df['Production'].dropna() > annuel_df['Consommation'].dropna()
test


In [None]:
(test.sum())/len(test)

In [None]:
i = 0
for truc in test:
    if truc == True:
        i += 1
i/len(test)*100

In [None]:
annuel_df = annuel_df.assign(Somme_ech = annuel_df['Ech. comm. Angleterre'] + annuel_df['Ech. comm. Espagne'] + annuel_df['Ech. comm. Italie'] + annuel_df['Ech. comm. Suisse'] + annuel_df['Ech. comm. Allemagne-Belgique'])

In [None]:
annuel_df['Production'].dropna().plot(label='Production',alpha=1)
annuel_df['Consommation'].dropna().plot(label='Consommation',alpha=0.6)
annuel_df['Somme_ech'].dropna().plot(label='Somme échange')
plt.legend()
plt.show()

        Les sources de base sont le solaire et l'eolien.
        Les sources de pointe sont tout le reste.

In [None]:
region_df['datetime'] = pd.to_datetime(region_df['Date']+ ' '+region_df['Heures'])
region_df = region_df.set_index('datetime')
region_df = region_df.sort_index()

In [None]:
region_df_consommation = region_df[['Périmètre','Consommation']]

In [None]:
region_df_consommation = region_df_consommation.pivot_table(index='datetime', columns='Périmètre', values='Consommation')

In [None]:
region_df_consommation.plot()

In [None]:
populations = {
    "Auvergne-Rhône-Alpes": 8114416,
    "Normandie": 3328364,
    "Centre-Val de Loire": 2570470,
    "Nouvelle-Aquitaine": 5960994,
    "Grand-Est": 5495238,
    "PACA": 5638035,
    "Pays-de-la-Loire": 3868828,
    "Occitanie": 6124997,
    "Ile-de-France": 12278210,
    "Hauts-de-France": 6011407,
    "Bretagne": 3384301,
    "Bourgogne-Franche-Comté": 2829343
}
superficie_regions = {
    'Auvergne-Rhône-Alpes': 69708,
    'Bourgogne-Franche-Comté': 47731,
    'Bretagne': 27208,
    'Centre-Val de Loire': 39150,
    'Grand-Est': 57906,
    'Hauts-de-France': 31811,
    'Ile-de-France': 12011,
    'Normandie': 29907,
    'Nouvelle-Aquitaine': 84175,
    'Occitanie': 72498,
    'PACA': 31813,
    'Pays-de-la-Loire': 32082
}

In [None]:
resultats_par_habitant = {}

region_df_filtre = region_df[region_df['Périmètre'] != 'RTE ne pourra être tenu responsable de l\'usage qui pourrait être fait des données mises à disposition, ni en cas de prévisions qui se révèleraient imprécises.']
region_df_somme = region_df_filtre.groupby(['Périmètre'])['Consommation'].sum()

for region, consommation in region_df_somme.items():
    population = populations[region]
    resultats_par_habitant[region] = consommation / population
    
resultats_par_habitant

In [None]:
resultats_par_superficie = {}

region_df_filtre = region_df[region_df['Périmètre'] != 'RTE ne pourra être tenu responsable de l\'usage qui pourrait être fait des données mises à disposition, ni en cas de prévisions qui se révèleraient imprécises.']
region_df_somme = region_df_filtre.groupby(['Périmètre'])['Consommation'].sum()

for region, consommation in region_df_somme.items():
    superficie = superficie_regions[region]
    resultats_par_superficie[region] = consommation / superficie
    
resultats_par_superficie

In [None]:
consumption_per_km2 = go.Bar(
    x=list(resultats_par_superficie.keys()),
    y=list(resultats_par_superficie.values()),
    name='Consommation par km²'
)

# create bar trace for consumption per capita
consumption_per_capita = go.Bar(
    x=list(resultats_par_habitant.keys()),
    y=list(resultats_par_habitant.values()),
    name='Consommation par habitant'
)

# create layout with logarithmic yaxis scale
layout = go.Layout(
    title='Consommation énergétique par région',
    yaxis=dict(
        title='Consommation énergétique',
        type='log'
    )
)

# create figure with both bar traces and layout
fig = go.Figure(data=[consumption_per_km2, consumption_per_capita], layout=layout)

# display figure
fig.show()

In [None]:
region_df['Nucléaire'] = region_df['Nucléaire'].replace("-",0, regex=True)
region_df['Eolien'] = region_df['Eolien'].replace("-",0, regex=True)
region_df['Solaire'] = region_df['Solaire'].replace("-",0, regex=True)
region_df['Hydraulique'] = region_df['Hydraulique'].replace("-",0, regex=True)
region_df['Bioénergies'] = region_df['Bioénergies'].replace("-",0, regex=True)

In [None]:
region_df = region_df.astype({'Eolien':'float','Bioénergies':'float','Solaire':'float','Hydraulique':'float'})

In [None]:
region_df = region_df.dropna()
region_df = region_df.assign(NonRenouvelable = region_df.Thermique + region_df.Nucléaire)
region_df = region_df.assign(Renouvelable = region_df.Eolien + region_df.Solaire + region_df.Hydraulique + region_df.Bioénergies)
region_df = region_df.assign(Production_sans_nucléaire = region_df.Eolien + region_df.Solaire + region_df.Hydraulique + region_df.Bioénergies + region_df.Thermique)

In [None]:
region_df.groupby(['Périmètre'])['NonRenouvelable'].sum().sort_values(ascending=True)

In [None]:
region_df.groupby([region_df.index.year, region_df.index.month]).sum().plot(y='NonRenouvelable')

In [None]:
region_df.groupby(['Périmètre'])['Nucléaire'].sum().sort_values(ascending=True)

In [None]:
region_df.groupby(['Périmètre'])['Renouvelable'].sum().sort_values(ascending=True)

In [None]:
region_df.groupby([region_df.index.year, region_df.index.month]).sum().plot(y='Renouvelable')

In [None]:
region_df

In [None]:
region_df = region_df.assign(Production = region_df.Renouvelable + region_df.NonRenouvelable)
region_df['Production'].max()

In [None]:
region_df.groupby([region_df.index.year, region_df.index.month_name(), region_df.index.day, region_df.Périmètre])['Production'].max()

In [None]:
mirouni = region_df.groupby(['Périmètre']).resample('Y').max()
mirouni = mirouni.assign(Max = mirouni.Thermique + mirouni.Eolien + mirouni.Solaire + mirouni.Hydraulique + mirouni.Bioénergies)

In [None]:
shrek = region_df.groupby(['Périmètre']).resample('Y')['Production_sans_nucléaire'].mean()/mirouni['Max']*100

In [None]:
shrek = shrek.unstack()

In [None]:
shrek.T.plot()
plt.legend(bbox_to_anchor=(1.1, 1.05))

In [None]:
df_conso = region_df.pivot_table(index='datetime', columns='Périmètre', values='Consommation')
df_prod = region_df.pivot_table(index='datetime', columns='Périmètre', values='Production')
df_diff = df_prod - df_conso 

In [None]:
px.line(df_diff.resample('M').ffill())

In [None]:
region_df.groupby(['Périmètre'])['Ech. physiques'].mean().sort_values()


In [None]:
def positif_or_negatif( nbre ) : 
    return  "positif" if nbre > 0 else "negatif"

positif_or_negatif(4)

In [None]:

positif_or_negatif_lambda = lambda nbre : "positif" if nbre > 0 else "negatif"

nbre = 8
positif_or_negatif_lambda( 8 )


In [None]:
np.min

In [None]:
import numpy as np
(
    region_df.groupby(['Périmètre', region_df.index.year])['Ech. physiques'] 
         .agg([np.min, 
               np.max, 
               np.mean,
               lambda g: "positif" if np.mean(g) > 0 else "negatif",
               np.std
               ]
        )
)

In [None]:
daily_diff = df_diff.resample('d').ffill()
px.histogram(daily_diff)

In [None]:
daily_diff.var().sort_values()

In [None]:
daily_diff.mean().sort_values()