# RNN : LSTM


In [2]:
# import
import pickle 

import numpy as np

from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam

from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error


# reproductibility
np.random.seed(42)

In [3]:
## ICI ON RÈGLE QUELLE TAILLE DE FENETRE ON VEUT UTILISER (POUR NE PAS DUPLIQUER LE CODE)
taille_fenetre_to_run = "courte"
assert taille_fenetre_to_run in ["courte", "moyenne", "longue"]

In [4]:
if taille_fenetre_to_run == "courte":
    data = pickle.load(open("Data/donnees_courte.pkl", "rb"))
elif taille_fenetre_to_run == "moyenne":
    data = pickle.load(open("Data/donnees_moyenne.pkl", "rb"))
else:
    data = pickle.load(open("Data/donnees_longue.pkl", "rb"))

In [5]:
data.keys()

dict_keys(['X_np_label', 'X_np_binary', 'y_np', 'X_df_label', 'X_df_binary', 'y_df'])

In [6]:
X_np_binary = data["X_np_binary"]
y_np = data["y_np"]

In [7]:
# Vérifier que toutes les données de X_np_binary sont entre 0 et 1
for i in range(X_np_binary.shape[0]):
    for j in range(X_np_binary.shape[1]):
        for k in range(X_np_binary.shape[2]):
            assert X_np_binary[i, j, k] >= 0 and X_np_binary[i, j, k] <= 1

In [8]:
# Best: -0.709300 using 
# {'batch_size': 32, 
# 'epochs': 100, 
# 'model__activation': 'tanh',      OK 
# 'model__dropout_rate': 0.2,       OK
# 'model__learning_rate': 0.001,    OK 
# 'model__units': 100}              OK

def create_lstm(input_shape, units=100, dropout_rate=0.2, activation = 'tanh', learning_rate = 0.001):

    # pour ajouter des couches 
    model = Sequential()

    # units : 100, Plus ce nombre est élevé, plus le modèle peut capturer de relations complexes dans les données, mais cela augmente aussi le coût computationnel.
    # activation : tanh, fonction d'activation tanh (classique dans les LSTM)
    model.add(LSTM(units, input_shape=input_shape, activation=activation))

    # éviter surapprentissage
    model.add(Dropout(dropout_rate)) 

    # output pour un problème de régresssion 
    model.add(Dense(1)) 

    # optimizer adam 
    # mse : typique pour un problème de régression
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse') # mse pour un problème de régression ?

    return model

In [8]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [11]:
input_shape = (X_np_binary.shape[1], X_np_binary.shape[2])

In [9]:
n_splits = 5
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

mse_scores = []
mae_scores = []
mape_scores = []
rmse_scores = []

for fold, (train_index, test_index) in enumerate(kf.split(X_np_binary)):
    print(f"Running fold {fold+1}/{n_splits}")

    X_train, X_test = X_np_binary[train_index], X_np_binary[test_index]
    y_train, y_test = y_np[train_index], y_np[test_index]

    model = create_lstm(input_shape)

    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping], verbose=1)

    y_pred = model.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)
    rmse_scores.append(rmse)

    model.save(f"Models/lstm_{taille_fenetre_to_run}_fold_{fold}.h5")

Running fold 1/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 7768.9116 - val_loss: 5688.1040
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 5222.0361 - val_loss: 4320.4121
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3960.5803 - val_loss: 3260.4734
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2973.0520 - val_loss: 2426.2209
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2210.8979 - val_loss: 1775.6436
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 1618.7881 - val_loss: 1275.5603
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1134.2811 - val_loss: 899.2449
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 802.4296 - val_loss: 620.1045
Epoch 9



Running fold 2/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 7868.6187 - val_loss: 5752.7114
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5319.8560 - val_loss: 4356.9653
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3988.3579 - val_loss: 3285.9517
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3027.4768 - val_loss: 2443.4927
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2232.0032 - val_loss: 1788.4131
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1633.6123 - val_loss: 1284.8589
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1162.8999 - val_loss: 905.8093
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 821.3018 - val_loss: 625.0649
Epoch 9



Running fold 3/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 7837.0132 - val_loss: 5666.3623
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 5220.9951 - val_loss: 4315.7974
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 3972.9695 - val_loss: 3266.5479
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2995.6301 - val_loss: 2440.6775
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2205.0889 - val_loss: 1794.4038
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1619.0435 - val_loss: 1295.6797
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1174.0315 - val_loss: 918.4347
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 825.9845 - val_loss: 638.7399
Epoch 9



Running fold 4/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 7665.1484 - val_loss: 5561.7466
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 5088.2900 - val_loss: 4226.7324
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 3879.2083 - val_loss: 3192.1270
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2915.2725 - val_loss: 2376.6609
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2158.9646 - val_loss: 1739.4679
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1560.1409 - val_loss: 1249.5383
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 1124.3025 - val_loss: 880.4761
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 792.7291 - val_loss: 608.5574
Epoch 9



Running fold 5/5
Epoch 1/100


  super().__init__(**kwargs)


[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 7536.6646 - val_loss: 5584.0737
Epoch 2/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 5164.1948 - val_loss: 4280.0825
Epoch 3/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 3935.4871 - val_loss: 3258.5825
Epoch 4/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2999.9712 - val_loss: 2446.4937
Epoch 5/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 2215.8206 - val_loss: 1807.6272
Epoch 6/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 1661.2756 - val_loss: 1311.6682
Epoch 7/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 1188.8081 - val_loss: 932.8416
Epoch 8/100
[1m95/95[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 845.1750 - val_loss: 638.3076
Epoch 9



In [10]:
print(f"Mean MSE: {np.mean(mse_scores)}")
print(f"Mean MAE: {np.mean(mae_scores)}")
print(f"Mean MAPE: {np.mean(mape_scores)}")
print(f"Mean RMSE: {np.mean(rmse_scores)}")

Mean MSE: 11.919453542890642
Mean MAE: 2.0110767887014207
Mean MAPE: 0.021831802856710235
Mean RMSE: 2.673205450672733


Le MSE mesure l'erreur quadratique moyenne entre les prédictions et les vraies valeurs. Plus cette valeur est faible, mieux c'est. Ici, une moyenne de 17.42 semble élevée.

Le MAE mesure l'erreur absolue moyenne, ce qui est plus interprétable que le MSE. Une erreur moyenne d'environ 2.76 indique que les prédictions diffèrent en moyenne de 2.76 unités des vraies valeurs. (donc 2.76 (%) de SOH)

Le MAPE est une mesure relative exprimée en pourcentage. Une erreur moyenne de 3 % est raisonnable.

Le RMSE est la racine carrée du MSE et est plus sensible aux grandes erreurs. Une valeur moyenne de 3.56 peut être acceptable.

In [11]:
print(f'MSE scores: {mse_scores}')
print(f'MAE scores: {mae_scores}')
print(f'MAPE scores: {mape_scores}')
print(f'RMSE scores: {rmse_scores}')

MSE scores: [np.float64(26.597082252429743), np.float64(0.8193451960954169), np.float64(30.61211589784589), np.float64(0.747410964805562), np.float64(0.8213134032765983)]
MAE scores: [np.float64(3.9833029514553964), np.float64(0.5417766842590688), np.float64(4.19941724802085), np.float64(0.6480027163495111), np.float64(0.6828843434222767)]
MAPE scores: [np.float64(0.04320626818868852), np.float64(0.005845712246289326), np.float64(0.04591571631439033), np.float64(0.006937015007134113), np.float64(0.007254302527048901)]
RMSE scores: [np.float64(5.157235912039486), np.float64(0.9051768866334452), np.float64(5.5328216940224895), np.float64(0.8645293313737609), np.float64(0.906263429294484)]


Les scores sont très variables entre les différentes itérations de validation croisée :
- Les deux derniers folds ont des scores plus faibles : donc meilleure performance

Les faibles scores dans certaines itérations montrent que votre modèle a très bien fonctionné sur ces splits. Cependant, les scores élevés dans d'autres itérations indiquent un surapprentissage ou une distribution très variable des données entre les splits.

In [12]:
# load un modèle 
model = create_lstm(input_shape)
model.load_weights("Models/lstm_courte_fold_0.h5")


  super().__init__(**kwargs)


In [13]:
model.summary()

In [15]:
import shap

def model_predict_zoe(inputs):
    return model.predict(inputs)    

# sous échantillon 
# X_background = X_np_binary[:100]

explainer = shap.DeepExplainer(model, X_np_binary)

shap_values = explainer.shap_values(X_np_binary)

shap.summary_plot(shap_values, X_np_binary, plot_type="bar")



Expected: keras_tensor
Received: inputs=['Tensor(shape=(4711, 50, 25))']
Expected: keras_tensor
Received: inputs=['Tensor(shape=(9422, 50, 25))']


StagingError: in user code:

    File "/Users/zoemarquis/Documents/projet_industrie/projet_batteries/mon_venv/lib/python3.11/site-packages/shap/explainers/_deep/deep_tf.py", line 269, in grad_graph  *
        x_grad = tape.gradient(out, shap_rAnD)

    LookupError: gradient registry has no entry for: shap_TensorListStack


In [20]:
model_predict_zoe(X_np_binary[0:1])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


array([[95.89427]], dtype=float32)