In [1]:
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from calendar import monthrange
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import numpy as np
import math

In [2]:
df = pd.read_csv("https://jedha-final-project-jrat.s3.amazonaws.com/datameteo_france_1950-2022_clean_03.csv")

In [3]:
df_sample = df[df["region"]=="CENTRE VAL DE LOIRE"]

In [4]:
df_stations_old = df_sample.groupby(["NUM_POSTE"]).max()
print(len(df_stations_old))

34


In [5]:
# filter out station qui n'ont pas de données récentes (2022)
last_year = df_sample["Year"].max()
mask = df_stations_old["Year"]!=last_year
df_stations_old = df_stations_old[mask]
stations_old = df_stations_old.index.to_list()
df_recent = df_sample[~df_sample["NUM_POSTE"].isin(stations_old)]

In [6]:
print(len(df_recent.groupby(["NUM_POSTE"]).max()))

28


In [7]:
# check continuité des données sur 7 ans

stations = df_recent["NUM_POSTE"].unique()
to_drop = []

for num_station in stations:
    df_station = df_recent[df_recent["NUM_POSTE"]==num_station]

    for year in range(last_year,last_year-7,-1):
        nb_missing_month = 0
        try:
            df_station_year = df_station[df_station["Year"]==year]
            nb_missing_month = 12 - len(df_station_year["Month"].unique().tolist())
            if nb_missing_month > 0:
                to_drop.append(num_station)
                print(f"Drop station N°{num_station}")
                print(f"{nb_missing_month} manquants en {year}\n\n")
            else:
                print(f"Station N°{num_station} has {df_station_year["Month"].count()} months in {year}")

        except Exception as e:
            to_drop.append(num_station)
            print(e)
            print()
            print(f"Drop station N°{num_station}")
            print(f"{year} est manquante\n\n")

    print()

Station N°18015003 has 12 months in 2022
Station N°18015003 has 12 months in 2021
Station N°18015003 has 12 months in 2020
Station N°18015003 has 12 months in 2019
Station N°18015003 has 12 months in 2018
Station N°18015003 has 12 months in 2017
Station N°18015003 has 12 months in 2016

Station N°18033001 has 12 months in 2022
Station N°18033001 has 12 months in 2021
Station N°18033001 has 12 months in 2020
Station N°18033001 has 12 months in 2019
Station N°18033001 has 12 months in 2018
Station N°18033001 has 12 months in 2017
Station N°18033001 has 12 months in 2016

Station N°18092001 has 12 months in 2022
Station N°18092001 has 12 months in 2021
Station N°18092001 has 12 months in 2020
Station N°18092001 has 12 months in 2019
Station N°18092001 has 12 months in 2018
Station N°18092001 has 12 months in 2017
Station N°18092001 has 12 months in 2016

Station N°18125004 has 12 months in 2022
Station N°18125004 has 12 months in 2021
Station N°18125004 has 12 months in 2020
Station N°181

In [8]:
df_recent = df_recent[~df_recent["NUM_POSTE"].isin(to_drop)]

In [9]:
print(len(df_recent.groupby(["NUM_POSTE"]).max()))

27


In [10]:
# PREPROCESSING DATE

df_recent["AAAAMM"] = pd.to_datetime(df_recent["AAAAMM"])

def last_day_of_month(aaaamm):
    """
    prend une date format AAAAMM et retourne une string format AAAA-MM-DD avec DD = dernier jour du mois
    """
    year = aaaamm.year
    month = aaaamm.month
    last_day = monthrange(year, month)[1]
    return f"{year}-{month:02d}-{last_day:02d}"

df_recent["date"] = df_recent["AAAAMM"].apply(last_day_of_month)
df_recent["date"] = pd.to_datetime(df_recent["date"])


In [11]:
to_keep = ["NUM_POSTE","NOM_USUEL","LAT","LON","ALTI","Year","Month","vent_speed_inst_moy_mensu","departement_num","departement_name","region","date"]
to_drop = [col for col in df_sample if col not in to_keep]

In [12]:
df_recent = df_recent.drop(to_drop,axis=1)

In [13]:
def clean_continuity(dataframe):

    """
    INPUT = un dataframe, le nom de sa variable date
    --> parcours la série d'année en année en commençant par la plus récente
    --> si année en cours < 12 mois, drop l'année et toutes les années antérieures
    --> si "trou" entre année en cours et année suivante, drop N-1 et toutes les années antérieures 
    --> transforme en série temporelle à frequence mensuelle
    
    OUTPUT = série temporelle clean
    """

    print(f"Cleaning de la station {dataframe["NUM_POSTE"].unique().tolist()} ...")

    #--> check continuité des années : stop_year = N si N < 12 mois, N-1 si "trou" entre N et N-1
    years= df_station["Year"].unique().tolist()
    years.sort(reverse=True)

    month_unique = []
    stop_year = 0

    for index, year in enumerate(years):
        df_station_year = df_station[df_station["Year"] == year]
        month_unique = df_station_year["Month"].unique().tolist()
        if len(month_unique)!=12:
            stop_year = year
            print(f"Année : {year}. Continuité rompue.")
            print(f"Mois: {len(month_unique)} / 12.")
            break
        else:
            try:
                if year - years[index+1] != 1:
                    stop_year = years[index+1]
                    print(f"Année : {years[index+1]}. Continuité rompue.")
                    print(f"Année manquante : {years[index+1]}")
                    break
            except IndexError:
                print("Continuité garantie")
                pass

    print("...Terminé.")

    #--> drop
    if stop_year > 0:
        dataframe = dataframe[dataframe["Year"] > stop_year]

        print(f"Données antérieures à {stop_year+1} supprimées.")
    else:
        print(f"Aucune rupture de continuité constatée, toutes les données ont été conservées.")
    

    #--> transforme en série temporelle à frequence mensuelle
    dataframe.set_index("date", inplace=True)
    dataframe = dataframe.asfreq("ME")

    #--> check NA
    check = "NOT OK" if dataframe.isna().any().any() else "OK"
    print(f"Check valeurs manquantes : {check}")

    return dataframe
    

In [14]:
def get_wind_forecast(time, data):

    """
    INPUT =  nombre de mois (time) à prédire et le set de données

    --> prédit la force du vent à horizon "time"
    
    OUTPUT =  le df de test, les prédictions, la MAE et la MAPE
    """
    # calcule date de split 
    date_limite = data.index.max() - pd.DateOffset(months=time)

    # split train / test
    train = data[data.index <= date_limite]
    test = data[data.index > date_limite]

    # split variables exogènes / target
    to_keep = ["LON","LAT","ALTI"]

    var_exog_train = train[to_keep]
    y_train = train["vent_speed_inst_moy_mensu"]

    var_exog_test = test[to_keep]
    y_test = test["vent_speed_inst_moy_mensu"]

    # training
    model = SARIMAX(y_train, exog=var_exog_train, order=(1, 1, 1), seasonal_order=(2, 1, 2, 12))
    sarima_model = model.fit(maxiter=1000)

    # prédictions
    forecast = sarima_model.get_forecast(steps = time, exog = var_exog_test)
    predicted_values = forecast.predicted_mean

    # évaluation
    mae = mean_absolute_error(y_test, predicted_values)
    mape = mean_absolute_percentage_error(y_test, predicted_values)

    # rendu
    return test, predicted_values, mae, mape


In [15]:
# créé un dictionnaire contenant un dataframe par numéro de station
# clean continuité des dates sur le dataframe + transforme en série temporelle (clean_continuity)
# prédit (get_wind_forecast) et enregistre les prédictions horizon, la MAE et la MAPE générale
# enregistre le dataframe clean et le dataframe d'évaluation des prédictions

stations = df_recent["NUM_POSTE"].unique()
horizon = 84

dict_stations = {}

for station in stations:
    
    df_station = df_recent[df_recent["NUM_POSTE"]==station]
    df_station = clean_continuity(df_station)

    df_station_eval, predictions, mae, mape = get_wind_forecast(horizon,df_station)
    df_station_eval[f"pred_{horizon}_mois"] = predictions
    df_station_eval["mae"] = mae
    df_station_eval["mape"] = mape

    dict_stations[station] = [df_station,df_station_eval]




Cleaning de la station [18015003] ...
Année : 2006. Continuité rompue.
Mois: 10 / 12.
...Terminé.
Données antérieures à 2007 supprimées.
Check valeurs manquantes : OK


  warn('Non-invertible starting seasonal moving average'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Too few observations to es

Cleaning de la station [18033001] ...
Année : 2009. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2010 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [18092001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Non-invertible starting MA parameters found.'


Cleaning de la station [18125004] ...
Année : 2003. Continuité rompue.
Mois: 3 / 12.
...Terminé.
Données antérieures à 2004 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [18172003] ...
Année : 2004. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2005 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [18175003] ...
Année : 2003. Continuité rompue.
Mois: 3 / 12.
...Terminé.
Données antérieures à 2004 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [18187004] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [28070001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [28198001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [28206001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Non-invertible starting MA parameters found.'
  warn('Non-invertible starting seaso

Cleaning de la station [28407001] ...
Année : 2006. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2007 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [36063001] ...
Année : 1981. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 1982 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [36127002] ...
Année : 2001. Continuité rompue.
Mois: 7 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [36173002] ...
Année : 2001. Continuité rompue.
Mois: 7 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37107001] ...
Année : 2001. Continuité rompue.
Mois: 8 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37179001] ...
Année : 1964. Continuité rompue.
Mois: 8 / 12.
...Terminé.
Données antérieures à 1965 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37192001] ...
Année : 2001. Continuité rompue.
Mois: 5 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37213003] ...
Année : 2002. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2003 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37240001] ...
Année : 2001. Continuité rompue.
Mois: 4 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [37242002] ...
Année : 2001. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2002 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Non-invertible starting MA parameters found.'
  warn('Too few observations to estim

Cleaning de la station [41053001] ...
Année : 2010. Continuité rompue.
Mois: 5 / 12.
...Terminé.
Données antérieures à 2011 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [41097001] ...
Année : 1975. Continuité rompue.
Mois: 10 / 12.
...Terminé.
Données antérieures à 1976 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Too few observations to estimate starting parameters%s.'
A value is trying to be se

Cleaning de la station [41152001] ...
Année : 2011. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2012 supprimées.
Check valeurs manquantes : OK
Cleaning de la station [41281001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [45004001] ...
Année : 2004. Continuité rompue.
Mois: 6 / 12.
...Terminé.
Données antérieures à 2005 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


Cleaning de la station [45055001] ...
Continuité garantie
...Terminé.
Aucune rupture de continuité constatée, toutes les données ont été conservées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape
  warn('Too few observations to estimate starting parameters%s.'


Cleaning de la station [45340002] ...
Année : 2013. Continuité rompue.
Mois: 11 / 12.
...Terminé.
Données antérieures à 2014 supprimées.
Check valeurs manquantes : OK


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval[f"pred_{horizon}_mois"] = predictions
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mae"] = mae
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_station_eval["mape"] = mape


In [16]:
# 2ème station de la liste, dataframe initial
dict_stations[stations[1]][0]

Unnamed: 0_level_0,NUM_POSTE,NOM_USUEL,LAT,LON,ALTI,Year,Month,vent_speed_inst_moy_mensu,departement_num,departement_name,region
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2010-01-31,18033001,BOURGES,47.059167,2.359833,161,2010,1,10.8,18,Cher,CENTRE VAL DE LOIRE
2010-02-28,18033001,BOURGES,47.059167,2.359833,161,2010,2,14.8,18,Cher,CENTRE VAL DE LOIRE
2010-03-31,18033001,BOURGES,47.059167,2.359833,161,2010,3,14.0,18,Cher,CENTRE VAL DE LOIRE
2010-04-30,18033001,BOURGES,47.059167,2.359833,161,2010,4,11.9,18,Cher,CENTRE VAL DE LOIRE
2010-05-31,18033001,BOURGES,47.059167,2.359833,161,2010,5,10.4,18,Cher,CENTRE VAL DE LOIRE
...,...,...,...,...,...,...,...,...,...,...,...
2022-08-31,18033001,BOURGES,47.059167,2.359833,161,2022,8,9.0,18,Cher,CENTRE VAL DE LOIRE
2022-09-30,18033001,BOURGES,47.059167,2.359833,161,2022,9,9.4,18,Cher,CENTRE VAL DE LOIRE
2022-10-31,18033001,BOURGES,47.059167,2.359833,161,2022,10,10.4,18,Cher,CENTRE VAL DE LOIRE
2022-11-30,18033001,BOURGES,47.059167,2.359833,161,2022,11,11.5,18,Cher,CENTRE VAL DE LOIRE


In [17]:
# 2ème station de la liste, dataframe d'évaluation des prédictions
dict_stations[stations[1]][1]

Unnamed: 0_level_0,NUM_POSTE,NOM_USUEL,LAT,LON,ALTI,Year,Month,vent_speed_inst_moy_mensu,departement_num,departement_name,region,pred_84_mois,mae,mape
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2016-01-31,18033001,BOURGES,47.059167,2.359833,161,2016,1,13.3,18,Cher,CENTRE VAL DE LOIRE,11.117927,1.338485,0.119533
2016-02-29,18033001,BOURGES,47.059167,2.359833,161,2016,2,15.5,18,Cher,CENTRE VAL DE LOIRE,10.051032,1.338485,0.119533
2016-03-31,18033001,BOURGES,47.059167,2.359833,161,2016,3,13.3,18,Cher,CENTRE VAL DE LOIRE,12.265202,1.338485,0.119533
2016-04-30,18033001,BOURGES,47.059167,2.359833,161,2016,4,9.7,18,Cher,CENTRE VAL DE LOIRE,12.205785,1.338485,0.119533
2016-05-31,18033001,BOURGES,47.059167,2.359833,161,2016,5,10.8,18,Cher,CENTRE VAL DE LOIRE,9.424749,1.338485,0.119533
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-31,18033001,BOURGES,47.059167,2.359833,161,2022,8,9.0,18,Cher,CENTRE VAL DE LOIRE,9.786335,1.338485,0.119533
2022-09-30,18033001,BOURGES,47.059167,2.359833,161,2022,9,9.4,18,Cher,CENTRE VAL DE LOIRE,9.790804,1.338485,0.119533
2022-10-31,18033001,BOURGES,47.059167,2.359833,161,2022,10,10.4,18,Cher,CENTRE VAL DE LOIRE,10.057476,1.338485,0.119533
2022-11-30,18033001,BOURGES,47.059167,2.359833,161,2022,11,11.5,18,Cher,CENTRE VAL DE LOIRE,11.271272,1.338485,0.119533


In [18]:
# construction du dataframe de comparaison inter station

num_stations = []
nb_month_available = []
lat = []
lon = []
alti = []
true_val = []
pred_val = []
mae = []
mape = []

for key, value in dict_stations.items():
    num_stations.append(key)
    nb_month_available.append(len(value[0]))
    date_pred = value[1].index.max()

    data = value[1][value[1].index==date_pred]

    lat.append(data.loc[date_pred,"LAT"])
    lon.append(data.loc[date_pred,"LON"])
    alti.append(data.loc[date_pred,"ALTI"])
    true_val.append(data.loc[date_pred,"vent_speed_inst_moy_mensu"])
    pred_val.append(data.loc[date_pred,f"pred_{horizon}_mois"])
    mae.append(data.loc[date_pred,"mae"])
    mape.append(data.loc[date_pred,"mape"])

data = {
    "Num_station": num_stations,
    "Nb_mois_dispo": nb_month_available,
    "Latitude": lat,
    "Longitude": lon,
    "Altitude": alti,
    "Reel_2022-12": true_val,
    "Pred_2022-12": pred_val,
    "MAE": mae,
    "MAPE": mape,
}

df_eval_global = pd.DataFrame(data)
df_eval_global


Unnamed: 0,Num_station,Nb_mois_dispo,Latitude,Longitude,Altitude,Reel_2022-12,Pred_2022-12,MAE,MAPE
0,18015003,192,47.4995,2.427333,176,12.2,10.495685,1.268922,0.123526
1,18033001,156,47.059167,2.359833,161,12.2,12.689031,1.338485,0.119533
2,18092001,864,47.052167,2.642167,175,14.0,13.577998,1.408494,0.106141
3,18125004,228,47.457833,2.893167,139,9.0,9.55824,1.024129,0.110783
4,18172003,216,46.731,2.467333,165,7.9,7.76985,0.903406,0.122759
5,18175003,228,46.925333,2.803333,221,13.0,12.612996,1.318223,0.108555
6,18187004,204,46.425333,2.2365,462,13.3,13.852448,1.254891,0.115795
7,28070001,876,48.4605,1.501167,155,10.8,10.857489,1.020316,0.111426
8,28198001,828,48.061333,1.376333,126,16.2,15.288429,1.628754,0.120113
9,28206001,204,48.706833,1.173167,171,15.5,15.188268,1.679108,0.112663
