# RNN 

In [2]:
# import

import matplotlib.pyplot as plt


import numpy as np
np.random.seed(42)

import random
random.seed(42)

import pickle

import tensorflow as tf
tf.random.set_seed(42)

# Désactiver cuDNN non déterministes
tf.config.experimental.enable_op_determinism()

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense 


from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, root_mean_squared_error
from sklearn.model_selection import KFold 


In [3]:
taille_fenetre_to_run = 20
taille_stride_to_run = 5
# 20w_5s déterminer dans search window size 

In [4]:
data = pickle.load(open(f"Data/donnees_3D_{taille_fenetre_to_run}w_{taille_stride_to_run}s.pkl", "rb"))

In [5]:
data.keys()

dict_keys(['X_np_3D', 'y_np', 'features_names_3D', 'idx_explicabilite', 'X_3D_explicabilite', 'y_explicabilite'])

In [6]:
X_3D = data["X_np_3D"]
y_np = data["y_np"]

In [7]:
X_binary_explicabilite = data["X_3D_explicabilite"]
y_explicabilite = data["y_explicabilite"]

In [8]:
features_3D = data["features_names_3D"]

In [9]:
X_binary_explicabilite.shape

(73, 20, 25)

In [10]:
# Paramètres pour la validation croisée
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)

# Listes pour stocker les scores
mse_scores = []
mae_scores = []
mape_scores = []
rmse_scores = []

# Création d'un modèle RNN
def create_lstm(input_shape):
    model = Sequential([
        LSTM(64, activation='tanh', input_shape=input_shape, return_sequences=False), 
        Dense(1)  # Sortie pour régression
    ])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

for fold, (train_index, test_index) in enumerate(kf.split(X_3D)):
    print(f"Fold {fold+1}/{n_splits}")
    X_train, X_test = X_3D[train_index], X_3D[test_index]
    y_train, y_test = y_np[train_index], y_np[test_index]
    
    # Création du modèle
    model = create_lstm(X_train.shape[1:]) 
    
    # Entraînement
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)
    
    # Prédictions
    y_pred = model.predict(X_test) #.flatten()
    
    # Calcul des scores
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    
    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)
    rmse_scores.append(rmse)
    
    print(f"MSE: {mse:.2f}")
    print(f"MAE: {mae:.2f}")
    print(f"MAPE: {mape:.2f}")
    print(f"RMSE: {rmse:.2f}")
    print()

    # save the model
    filename = f'Models/LSTM_{taille_fenetre_to_run}_fold_{fold}.sav'
    pickle.dump(model, open(filename, 'wb'))


Fold 1/5
Epoch 1/10


  super().__init__(**kwargs)
2025-01-20 16:00:50.322094: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-01-20 16:00:50.322621: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 6654.6558 - mae: 80.9354
Epoch 2/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2633.0334 - mae: 50.8164
Epoch 3/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 917.9117 - mae: 29.6691
Epoch 4/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 252.4751 - mae: 15.0366
Epoch 5/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 62.5286 - mae: 7.3061
Epoch 6/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 27.1167 - mae: 4.5220
Epoch 7/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 23.2753 - mae: 3.8132
Epoch 8/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 23.0453 - mae: 3.6792
Epoch 9/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

2025-01-20 16:01:03.386621: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 16:01:03.386896: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MSE: 22.47
MAE: 3.66
MAPE: 0.04
RMSE: 4.74

Fold 2/5
Epoch 1/10


  super().__init__(**kwargs)


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 6734.9839 - mae: 81.4393
Epoch 2/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 2774.3743 - mae: 52.1879
Epoch 3/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 998.9250 - mae: 30.9970 
Epoch 4/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 288.3467 - mae: 16.1415
Epoch 5/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 72.7805 - mae: 7.8867
Epoch 6/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 28.8358 - mae: 4.7385
Epoch 7/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 23.4936 - mae: 3.8759
Epoch 8/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.1390 - mae: 3.6994
Epoch 9/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

2025-01-20 16:01:16.206209: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 16:01:16.206484: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MSE: 22.62
MAE: 3.61
MAPE: 0.04
RMSE: 4.76

Fold 3/5
Epoch 1/10


  super().__init__(**kwargs)


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 6640.8940 - mae: 80.8381
Epoch 2/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2634.1028 - mae: 50.8316
Epoch 3/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 929.4710 - mae: 29.8657
Epoch 4/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 259.5247 - mae: 15.2620
Epoch 5/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 64.2701 - mae: 7.3952
Epoch 6/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 26.5104 - mae: 4.4600
Epoch 7/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.0740 - mae: 3.7748
Epoch 8/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 22.9275 - mae: 3.6614
Epoch 9/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

2025-01-20 16:01:29.104420: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 16:01:29.104702: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MSE: 23.10
MAE: 3.65
MAPE: 0.04
RMSE: 4.81

Fold 4/5
Epoch 1/10


  super().__init__(**kwargs)


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 6688.3682 - mae: 81.1202
Epoch 2/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2628.9241 - mae: 50.7791
Epoch 3/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 914.6214 - mae: 29.6175
Epoch 4/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 250.4572 - mae: 14.9681
Epoch 5/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 61.5962 - mae: 7.2601
Epoch 6/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 26.8364 - mae: 4.4892
Epoch 7/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.2644 - mae: 3.7777
Epoch 8/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.1070 - mae: 3.6453
Epoch 9/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

2025-01-20 16:01:42.805189: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 16:01:42.805483: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
MSE: 24.73
MAE: 3.72
MAPE: 0.04
RMSE: 4.97

Fold 5/5
Epoch 1/10


  super().__init__(**kwargs)


[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 6594.7808 - mae: 80.5217
Epoch 2/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 2547.9824 - mae: 49.9803
Epoch 3/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 874.0343 - mae: 28.9309
Epoch 4/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 235.4957 - mae: 14.4848
Epoch 5/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 58.1133 - mae: 7.0296
Epoch 6/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 26.5575 - mae: 4.4481
Epoch 7/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.4743 - mae: 3.8144
Epoch 8/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 23.3525 - mae: 3.6984
Epoch 9/10
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s

2025-01-20 16:01:56.340702: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 16:01:56.341014: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
MSE: 23.26
MAE: 3.68
MAPE: 0.04
RMSE: 4.82



In [11]:
print(f"Mean MSE: {np.mean(mse_scores)}")
print(f"Mean MAE: {np.mean(mae_scores)}")
print(f"Mean MAPE: {np.mean(mape_scores)}")
print(f"Mean RMSE: {np.mean(rmse_scores)}")

Mean MSE: 23.23629182879008
Mean MAE: 3.6646785192918054
Mean MAPE: 0.03953752336623448
Mean RMSE: 4.819699606420233


In [12]:
print(f'MSE scores: {mse_scores}')
print(f'MAE scores: {mae_scores}')
print(f'MAPE scores: {mape_scores}')
print(f'RMSE scores: {rmse_scores}')

MSE scores: [np.float64(22.47146431017031), np.float64(22.621796034146207), np.float64(23.096778457750325), np.float64(24.72811831979565), np.float64(23.263302022087895)]
MAE scores: [np.float64(3.663624748676801), np.float64(3.605135303428861), np.float64(3.6537715853952357), np.float64(3.724927811530433), np.float64(3.675933147427696)]
MAPE scores: [np.float64(0.039369011045377995), np.float64(0.038824601604248464), np.float64(0.03940556998293323), np.float64(0.04043031031764325), np.float64(0.03965812388096946)]
RMSE scores: [np.float64(4.740407610129145), np.float64(4.756237592272511), np.float64(4.8059107833739825), np.float64(4.972737507630546), np.float64(4.823204538694984)]


In [13]:
# get meilleur fold
best_fold = np.argmin(mse_scores)
print(f"Best fold: {best_fold}")

Best fold: 0


In [14]:
# save results

results = {
    "model_name": "LSTM",
    "mse": mse_scores,
    "mae": mae_scores,
    "mape": mape_scores,
    "rmse": rmse_scores,
    "best_fold": best_fold,
    "mean_mse": np.mean(mse_scores),
    "mean_mae": np.mean(mae_scores),
    "mean_mape": np.mean(mape_scores),
    "mean_rmse": np.mean(rmse_scores),
}

pickle.dump(results, open(f"Data_to_share/LSTM_results.pkl", "wb"))

In [15]:
lstm_best = pickle.load(open(f'Models/LSTM_{taille_fenetre_to_run}_fold_{best_fold}.sav', 'rb'))

In [16]:
# TODO explicabilité 3D 
# https://github.com/feedzai/timeshap