# Dans ce notebook on a essayé d'implémenter timeshap pour l'explicabilité, on a laisser les outputs d'erreur.
# Nous n'avons pas réussi à trouver une implémentation fonctionnelle, entre les différentes versions des bibliotheques de librairie.
# D'après les `issues` ouvert dans le github de timeshap, c'est un problème connu.

In [1]:
# import

import matplotlib.pyplot as plt

import numpy as np
np.random.seed(42)

import random
random.seed(42)

import pickle

import tensorflow as tf
tf.random.set_seed(42)

# Désactiver cuDNN non déterministes
tf.config.experimental.enable_op_determinism()

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, root_mean_squared_error
from sklearn.model_selection import KFold 


In [2]:
taille_fenetre_to_run = 20
taille_stride_to_run = 5
# 20w_5s déterminer dans search window size 

In [3]:
data = pickle.load(open(f"Data/donnees_3D_{taille_fenetre_to_run}w_{taille_stride_to_run}s.pkl", "rb"))

In [4]:
data.keys()

dict_keys(['X_np_3D', 'y_np', 'features_names_3D', 'idx_explicabilite', 'X_3D_explicabilite', 'y_explicabilite'])

In [5]:
X_3D = data["X_np_3D"]
y_np = data["y_np"]

In [6]:
X_binary_explicabilite = data["X_3D_explicabilite"]
y_explicabilite = data["y_explicabilite"]

In [7]:
features_3D = data["features_names_3D"]

In [8]:
X_binary_explicabilite.shape

(73, 20, 25)

In [9]:

# Création d'un modèle CNN
def create_cnn(input_shape):
    model = Sequential()

    model.add(Conv1D(64, 3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(2))

    model.add(Conv1D(128, 3, activation='relu'))
    model.add(MaxPooling1D(2))

    model.add(Conv1D(256, 3, activation='relu'))
    # model.add(MaxPooling1D(2))

    # aplatir pour passer à la couche dense
    model.add(Flatten())

    # couches fully connected
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    
    # dropout pour éviter l'overfitting
    model.add(Dropout(0.5))

    # couche de sortie
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model

In [10]:
# Paramètres pour la validation croisée
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=42, shuffle=True)

# Listes pour stocker les scores
mse_scores = []
mae_scores = []
mape_scores = []
rmse_scores = []

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

for fold, (train_index, test_index) in enumerate(kf.split(X_3D)):
    print(f"Fold {fold+1}/{n_splits}")
    X_train, X_test = X_3D[train_index], X_3D[test_index]
    y_train, y_test = y_np[train_index], y_np[test_index]
    
    # Création du modèle
    model = create_cnn(X_train.shape[1:]) 
    
    # Entraînement
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test),
                callbacks=[early_stopping])
    
    # Prédictions
    y_pred = model.predict(X_test) #.flatten()
    
    # Calcul des scores
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    
    mse_scores.append(mse)
    mae_scores.append(mae)
    mape_scores.append(mape)
    rmse_scores.append(rmse)
    
    print(f"MSE: {mse:.6f}")
    print(f"MAE: {mae:.6f}")
    print(f"MAPE: {mape:.6f}")
    print(f"RMSE: {rmse:.6f}")
    print()


    # save the model to disk
    filename = f'Models/CNN_{taille_fenetre_to_run}_fold_{fold}.sav'
    pickle.dump(model, open(filename, 'wb'))

Fold 1/5
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-01-20 22:22:22.278388: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_15}}
2025-01-20 22:22:22.278671: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, othe

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1544.9926 - mae: 28.3396 - val_loss: 132.9887 - val_mae: 11.2747
Epoch 2/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 390.8233 - mae: 15.7880 - val_loss: 141.9386 - val_mae: 11.6991
Epoch 3/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 398.8218 - mae: 15.9247 - val_loss: 228.5044 - val_mae: 15.0098
Epoch 4/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 396.5743 - mae: 15.9439 - val_loss: 9.2561 - val_mae: 2.3737
Epoch 5/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 402.3390 - mae: 15.9975 - val_loss: 7.7317 - val_mae: 2.3840
Epoch 6/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 385.6768 - mae: 15.6832 - val_loss: 4.8611 - val_mae: 1.8175
Epoch 7/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

2025-01-20 22:22:32.447486: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 22:22:32.447755: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1443.4440 - mae: 26.9678 - val_loss: 35.3514 - val_mae: 5.2893
Epoch 2/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 351.7726 - mae: 14.9762 - val_loss: 89.5854 - val_mae: 9.1812
Epoch 3/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 352.0012 - mae: 14.9892 - val_loss: 50.0521 - val_mae: 6.8437
Epoch 4/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 332.9706 - mae: 14.6652 - val_loss: 10.0156 - val_mae: 2.5289
Epoch 5/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 344.9373 - mae: 14.7887 - val_loss: 9.6987 - val_mae: 2.7060
Epoch 6/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 334.4589 - mae: 14.6204 - val_loss: 66.7158 - val_mae: 7.9109
Epoch 7/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-01-20 22:22:47.034525: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 22:22:47.034829: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1508.0939 - mae: 27.7547 - val_loss: 31.4877 - val_mae: 5.1085
Epoch 2/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 393.2734 - mae: 15.8923 - val_loss: 4.5999 - val_mae: 1.7580
Epoch 3/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 387.4924 - mae: 15.8093 - val_loss: 86.0527 - val_mae: 9.0916
Epoch 4/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 379.6147 - mae: 15.6292 - val_loss: 73.7397 - val_mae: 8.3960
Epoch 5/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 381.4380 - mae: 15.6069 - val_loss: 35.6779 - val_mae: 5.6472
Epoch 6/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 365.1543 - mae: 15.3155 - val_loss: 16.1614 - val_mae: 3.4572
Epoch 7/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

2025-01-20 22:22:54.109299: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 22:22:54.109578: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 1500.1678 - mae: 27.4542 - val_loss: 51.2229 - val_mae: 6.5779
Epoch 2/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 394.2157 - mae: 15.8361 - val_loss: 56.5196 - val_mae: 7.0808
Epoch 3/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 397.0757 - mae: 16.0404 - val_loss: 19.6946 - val_mae: 4.1239
Epoch 4/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 395.2805 - mae: 15.8979 - val_loss: 47.4908 - val_mae: 6.5703
Epoch 5/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 370.0595 - mae: 15.3440 - val_loss: 26.3287 - val_mae: 4.6489
Epoch 6/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 378.6520 - mae: 15.5521 - val_loss: 20.2091 - val_mae: 4.2121
Epoch 7/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

2025-01-20 22:23:14.630958: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 22:23:14.631248: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1530.7133 - mae: 27.9441 - val_loss: 45.0077 - val_mae: 6.2169
Epoch 2/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 391.8818 - mae: 15.8226 - val_loss: 7.4092 - val_mae: 2.2432
Epoch 3/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 373.9333 - mae: 15.5181 - val_loss: 14.1648 - val_mae: 3.1751
Epoch 4/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 363.0003 - mae: 15.2237 - val_loss: 17.8818 - val_mae: 3.9039
Epoch 5/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 352.8416 - mae: 15.0191 - val_loss: 5.5630 - val_mae: 1.9590
Epoch 6/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 373.6718 - mae: 15.4270 - val_loss: 52.6317 - val_mae: 7.0019
Epoch 7/100
[1m476/476[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

2025-01-20 22:23:24.331288: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),min=0; attr=output_types:list(type),min=1; attr=output_shapes:list(shape),min=1; attr=use_inter_op_parallelism:bool,default=true; attr=preserve_cardinality:bool,default=false; attr=force_synchronous:bool,default=false; attr=metadata:string,default=""> This may be expected if your graph generating binary is newer  than this binary. Unknown attributes will be ignored. NodeDef: {{node ParallelMapDatasetV2/_14}}
2025-01-20 22:23:24.331574: E tensorflow/core/framework/node_def_util.cc:676] NodeDef mentions attribute use_unbounded_threadpool which is not in the op definition: Op<name=MapDataset; signature=input_dataset:variant, other_arguments: -> handle:variant; attr=f:func; attr=Targuments:list(type),

In [11]:
print(f"Mean MSE: {np.mean(mse_scores)}")
print(f"Mean MAE: {np.mean(mae_scores)}")
print(f"Mean MAPE: {np.mean(mape_scores)}")
print(f"Mean RMSE: {np.mean(rmse_scores)}")

Mean MSE: 3.6742642884552454
Mean MAE: 1.5187631295628132
Mean MAPE: 0.015837294984187827
Mean RMSE: 1.8577245934576971


In [12]:
print(f'MSE scores: {mse_scores}')
print(f'MAE scores: {mae_scores}')
print(f'MAPE scores: {mape_scores}')
print(f'RMSE scores: {rmse_scores}')

MSE scores: [np.float64(4.861077942861903), np.float64(1.9175137157621525), np.float64(4.5999189221420655), np.float64(1.4298307891327402), np.float64(5.562980072377364)]
MAE scores: [np.float64(1.8174724969128386), np.float64(1.070477202834004), np.float64(1.7579558323759805), np.float64(0.9889107461048446), np.float64(1.9589993695863992)]
MAPE scores: [np.float64(0.019057943350099818), np.float64(0.011174007209482507), np.float64(0.01829628811571141), np.float64(0.010346587063994385), np.float64(0.020311649181651008)]
RMSE scores: [np.float64(2.204785237355762), np.float64(1.3847431948784412), np.float64(2.1447421574963426), np.float64(1.1957553215991723), np.float64(2.358597055958767)]


In [13]:
# get meilleur fold
best_fold = np.argmin(mse_scores)
print(f"Best fold: {best_fold}")

Best fold: 3


In [14]:
# save results

results = {
    "model_name": "CNN",
    "mse": mse_scores,
    "mae": mae_scores,
    "mape": mape_scores,
    "rmse": rmse_scores,
    "best_fold": best_fold,
    "mean_mse": np.mean(mse_scores),
    "mean_mae": np.mean(mae_scores),
    "mean_mape": np.mean(mape_scores),
    "mean_rmse": np.mean(rmse_scores),
}

pickle.dump(results, open(f"Data_to_share/CNN_results.pkl", "wb"))

In [15]:
cnn_best = pickle.load(open(f'Models/CNN_{taille_fenetre_to_run}_fold_{best_fold}.sav', 'rb'))

In [16]:
# TODO explicabilité CNN 
# https://github.com/feedzai/timeshap

In [17]:
import timeshap as ts

# model entry point

f = lambda x: cnn_best.predict(x)

In [18]:
# créer un dict pour les noms des features
features_dict = {i: features_3D[i] for i in range(len(features_3D))}

In [20]:
# gloabl report

from timeshap.explainer import global_report

pruning_dict = {'tol': [0.05, 0.075]}
event_dict = {'feature_names': features_dict}
report = global_report(f, X_binary_explicabilite, pruning_dict, event_dict,features_dict )
report

Assuming all features are model features
No path to persist pruning data provided.
No random seed provided for event-level explanations. Using default: 42
No nsamples provided for event-level explanations. Using default: 32000
No path to persist event explanations provided.
No path to persist feature explanations provided.
Calculating pruning algorithm
No time col provided, assuming dataset is ordered ascendingly by date


AttributeError: 'NoneType' object has no attribute 'shape'

In [None]:
stop 

In [52]:
"""Plots local feature explanations

    Parameters
    ----------
    feat_data: pd.DataFrame
        Feature explanations

    top_x_feats: int
        The number of feature to display.

    plot_features: dict
        Dict containing mapping between model features and display features
    """

# get feature importance
feat_imp = ts.feature_importance(X_binary_explicabilite, f)

# get feature explanations
feat_data = ts.feature_explanation(X_binary_explicabilite, f)



<function __main__.<lambda>(x)>

In [None]:
# plot global feat
from timeshap.plot  import plot_global_feat
plot_global_feat(f, X_binary_explicabilite, y_explicabilite, features_3D, title="CNN Global Feature Importance", save_path="Figures/CNN_global_feat_importance.png")

TypeError: 'function' object is not subscriptable

In [49]:
# plot_event_heatmap
from timeshap.plot import plot_feat_barplot

plot_features = {"feature_names": features_3D, "feature_importance": X_binary_explicabilite}

plot_feat_barplot(f, 10, plot_features)

TypeError: 'function' object is not subscriptable

In [42]:
# feat_explain_all

from timeshap.explainer import feat_explain_all

feat_explain_all(f, X_binary_explicabilite, y_explicabilite, features_3D)


AttributeError: 'numpy.ndarray' object has no attribute 'get'

In [6]:
# Baseline event 

from timeshap.explainer import global_report

global_report(f, X_binary_explicabilite, y_explicabilite, features_3D, n_samples=1000, n_features=10, n_shap=1000, n_permutations=1000, n_jobs=1, verbose=1)




NameError: name 'X_binary_explicabilite' is not defined