# RNN : LSTM


In [1]:
# import
import pickle 

import numpy as np

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error


# reproductibility
np.random.seed(42)

# fix keras seed
from tensorflow import random
random.set_seed(42)

In [2]:
## ICI ON RÈGLE QUELLE TAILLE DE FENETRE ON VEUT UTILISER (POUR NE PAS DUPLIQUER LE CODE)
taille_fenetre_to_run = "courte"
assert taille_fenetre_to_run in ["courte", "moyenne", "longue"]

In [3]:
if taille_fenetre_to_run == "courte":
    data = pickle.load(open("Data/donnees_courte.pkl", "rb"))
elif taille_fenetre_to_run == "moyenne":
    data = pickle.load(open("Data/donnees_moyenne.pkl", "rb"))
else:
    data = pickle.load(open("Data/donnees_longue.pkl", "rb"))

In [4]:
data.keys()

dict_keys(['X_np_label', 'X_np_binary', 'y_np', 'X_df_label', 'X_df_binary', 'y_df'])

In [5]:
X_np_binary = data["X_np_binary"]
y_np = data["y_np"]

In [6]:
# Vérifier que toutes les données de X_np_binary sont entre 0 et 1
for i in range(X_np_binary.shape[0]):
    for j in range(X_np_binary.shape[1]):
        for k in range(X_np_binary.shape[2]):
            assert X_np_binary[i, j, k] >= 0 and X_np_binary[i, j, k] <= 1

In [7]:
# Best: -0.709300 using 
# {'batch_size': 32, 
# 'epochs': 100, 
# 'model__activation': 'tanh',      OK 
# 'model__dropout_rate': 0.2,       OK
# 'model__learning_rate': 0.001,    OK 
# 'model__units': 100}              OK

def create_lstm(input_shape, units=100, dropout_rate=0.2, activation = 'tanh', learning_rate = 0.001):

    # pour ajouter des couches 
    model = Sequential()

    # units : 100, Plus ce nombre est élevé, plus le modèle peut capturer de relations complexes dans les données, mais cela augmente aussi le coût computationnel.
    # activation : tanh, fonction d'activation tanh (classique dans les LSTM)
    model.add(LSTM(units, input_shape=input_shape, activation=activation))

    # éviter surapprentissage
    model.add(Dropout(dropout_rate)) 

    # output pour un problème de régresssion 
    model.add(Dense(1)) 

    # optimizer adam 
    # mse : typique pour un problème de régression
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse') # mse pour un problème de régression ?

    return model

In [8]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [9]:
input_shape = (X_np_binary.shape[1], X_np_binary.shape[2])

In [10]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

mse_scores = []
mae_scores = []
mape_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X_np_binary)):
    print(f"Running fold {fold+1}/{n_splits}")

    X_train, X_test = X_np_binary[train_index], X_np_binary[test_index]
    y_train, y_test = y_np[train_index], y_np[test_index]

    model = create_lstm(input_shape)

    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)
    rmse_scores.append(rmse)

    model.save(f"Models/lstm_{taille_fenetre_to_run}_fold_{fold}.h5")

Running fold 1/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 7805.7812 - val_loss: 5867.4536
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5393.7212 - val_loss: 4473.4458
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 4105.5840 - val_loss: 3390.8867
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3085.0278 - val_loss: 2535.5161
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2301.6843 - val_loss: 1865.2052
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1678.8624 - val_loss: 1347.5916
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 1219.8511 - val_loss: 954.9217
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 854.8320 - val_loss: 663.9099
Epoch 9



Running fold 2/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 7639.4937 - val_loss: 5667.8257
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5238.1392 - val_loss: 4337.2656
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 4007.6123 - val_loss: 3302.9451
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3029.3948 - val_loss: 2481.8752
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2268.8020 - val_loss: 1835.5758
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1679.9873 - val_loss: 1333.5179
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1203.6948 - val_loss: 944.2063
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 847.7623 - val_loss: 650.7162
Epoch 9



Running fold 3/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 7717.4380 - val_loss: 5639.4741
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5194.4775 - val_loss: 4321.5171
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3976.0176 - val_loss: 3289.9558
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3009.8711 - val_loss: 2470.6711
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2245.1938 - val_loss: 1826.0819
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1646.8713 - val_loss: 1326.3270
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1194.0077 - val_loss: 945.7886
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 857.8385 - val_loss: 661.5778
Epoch 9



Running fold 4/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 7822.6694 - val_loss: 5837.7202
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 5360.0366 - val_loss: 4473.4468
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 4103.4932 - val_loss: 3402.9744
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3100.2764 - val_loss: 2540.5168
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2297.8843 - val_loss: 1869.9211
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1686.6085 - val_loss: 1352.7417
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1214.8275 - val_loss: 960.7973
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 856.5770 - val_loss: 669.6556
Epoch 9



Running fold 5/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 7628.7607 - val_loss: 5612.1001
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5172.1572 - val_loss: 4279.9307
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3936.7747 - val_loss: 3239.1826
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2963.0725 - val_loss: 2416.5493
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2200.5564 - val_loss: 1773.0751
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1610.3837 - val_loss: 1276.5818
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1146.4720 - val_loss: 902.0467
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 806.3494 - val_loss: 624.5433
Epoch 9



In [11]:
print(f"Mean MSE: {np.mean(mse_scores)}")
print(f"Mean MAE: {np.mean(mae_scores)}")
print(f"Mean MAPE: {np.mean(mape_scores)}")
print(f"Mean RMSE: {np.mean(rmse_scores)}")

Mean MSE: 11.870763217873408
Mean MAE: 1.9956375462284703
Mean MAPE: 0.021634746712863435
Mean RMSE: 2.6237389783724985


Le MSE mesure l'erreur quadratique moyenne entre les prédictions et les vraies valeurs. Plus cette valeur est faible, mieux c'est. Ici, une moyenne de 17.42 semble élevée.

Le MAE mesure l'erreur absolue moyenne, ce qui est plus interprétable que le MSE. Une erreur moyenne d'environ 2.76 indique que les prédictions diffèrent en moyenne de 2.76 unités des vraies valeurs. (donc 2.76 (%) de SOH)

Le MAPE est une mesure relative exprimée en pourcentage. Une erreur moyenne de 3 % est raisonnable.

Le RMSE est la racine carrée du MSE et est plus sensible aux grandes erreurs. Une valeur moyenne de 3.56 peut être acceptable.

In [12]:
print(f'MSE scores: {mse_scores}')
print(f'MAE scores: {mae_scores}')
print(f'MAPE scores: {mape_scores}')
print(f'RMSE scores: {rmse_scores}')

MSE scores: [np.float64(0.5709382880709342), np.float64(0.8516909114378138), np.float64(30.61238400228017), np.float64(26.78272998943851), np.float64(0.5360728981396189)]
MAE scores: [np.float64(0.5770080248327217), np.float64(0.5785638739704879), np.float64(4.1996797498149885), np.float64(4.044178182891168), np.float64(0.5787578996329859)]
MAPE scores: [np.float64(0.006117431009605447), np.float64(0.006257490702833886), np.float64(0.045918142590441476), np.float64(0.043762963749580726), np.float64(0.006117705511855646)]
RMSE scores: [np.float64(0.7556045844692408), np.float64(0.9228710156017546), np.float64(5.532845922514034), np.float64(5.175203376625745), np.float64(0.7321699926517194)]


Les scores sont très variables entre les différentes itérations de validation croisée :
- Les deux derniers folds ont des scores plus faibles : donc meilleure performance

Les faibles scores dans certaines itérations montrent que votre modèle a très bien fonctionné sur ces splits. Cependant, les scores élevés dans d'autres itérations indiquent un surapprentissage ou une distribution très variable des données entre les splits.

In [13]:
# load un modèle 
model = create_lstm(input_shape)
model.load_weights("Models/lstm_courte_fold_0.h5")


  super().__init__(**kwargs)


In [14]:
model.summary()

In [15]:
import shap

def model_predict_zoe(inputs):
    return model.predict(inputs)    

# sous échantillon 
# X_background = X_np_binary[:100]

explainer = shap.DeepExplainer(model, X_np_binary)

shap_values = explainer.shap_values(X_np_binary)

shap.summary_plot(shap_values, X_np_binary, plot_type="bar")



  from .autonotebook import tqdm as notebook_tqdm
Expected: keras_tensor_35
Received: inputs=['Tensor(shape=(4711, 50, 25))']
Expected: keras_tensor_35
Received: inputs=['Tensor(shape=(9422, 50, 25))']


StagingError: in user code:

    File "/Users/zoemarquis/Documents/projet_industrie/projet_batteries/mon_venv/lib/python3.11/site-packages/shap/explainers/_deep/deep_tf.py", line 269, in grad_graph  *
        x_grad = tape.gradient(out, shap_rAnD)

    LookupError: gradient registry has no entry for: shap_TensorListStack


In [None]:
model_predict_zoe(X_np_binary[0:1])