In [1]:
import os
import random
import pandas as pd
import numpy as np
import tensorflow as tf
import shap
from tensorflow.keras import Model
from sklearn.model_selection import GroupKFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

2024-07-21 21:19:02.919182: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-21 21:19:02.942263: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-21 21:19:02.949270: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-21 21:19:02.975412: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
SEED = 17
def set_seed(seed=SEED):
    random.seed(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.keras.utils.set_random_seed(seed)
    tf.config.experimental.enable_op_determinism()

set_seed(SEED)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

In [3]:
train = pd.read_csv('inputs/tpm_train.csv', index_col=0)
label_train = pd.read_csv('inputs/label_train.csv')
test = pd.read_csv('inputs/tpm_test.csv', index_col=0)
label_test = pd.read_csv('inputs/label_test.csv')

selected_features = pd.read_csv("inputs/selected_features_tpm_300.csv", header=None).iloc[1:, 0].tolist()
X_selected_test = test[selected_features]
X_selected_train = train[selected_features]

label_test.set_index('sample', inplace=True)
label_train.set_index('sample', inplace=True)

data_test = X_selected_test.join(label_test[['label', 'subject', 'batch']])
data_train = X_selected_train.join(label_train[['label', 'subject', 'batch']])

In [4]:
def euclidean_distance_squared(vectors):
    x, y = vectors
    return tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True)

def soft_triplet_loss(margin):
    def loss(y_true, y_pred):
        positive_distance = y_pred[:, 0]
        negative_distance = y_pred[:, 1]
        return tf.reduce_mean(tf.nn.softplus(positive_distance - negative_distance + margin))
    return loss

best_siamese_model = tf.keras.models.load_model('best_siamese_model.keras', custom_objects={'soft_triplet_loss': soft_triplet_loss, 'euclidean_distance_squared': euclidean_distance_squared})

base_network = best_siamese_model.get_layer(best_siamese_model.layers[3].name)

In [5]:
feature_extractor = Model(inputs=base_network.input, outputs=base_network.output)

train_features = feature_extractor.predict(data_train[selected_features], verbose=0)
test_features = feature_extractor.predict(data_test[selected_features], verbose=0)

train_labels = data_train['label'].values
test_labels = data_test['label'].values

In [6]:
clf = LogisticRegression()
clf.fit(train_features, train_labels)
test_predictions = clf.predict(test_features)

accuracy = accuracy_score(test_labels, test_predictions)
print(f'Validation accuracy: {accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Validation accuracy: 0.82
Confusion Matrix:
[[18  7]
 [ 2 23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.72      0.80        25
           1       0.77      0.92      0.84        25

    accuracy                           0.82        50
   macro avg       0.83      0.82      0.82        50
weighted avg       0.83      0.82      0.82        50



In [7]:
set_seed(SEED)
def custom_grid_search_lr(params_grid, data_train, groups, folds=5):
    kf = GroupKFold(n_splits=folds)
    best_params = None
    best_score = -np.inf

    for params in params_grid:
        fold_scores = []

        for train_index, val_index in kf.split(data_train, groups=groups):
            train_subset = data_train.iloc[train_index]
            val_subset = data_train.iloc[val_index]

            train_features = train_subset[selected_features].values
            train_labels = train_subset['label'].values

            val_features = val_subset[selected_features].values
            val_labels = val_subset['label'].values

            if params['penalty'] == 'elasticnet':
                lr = LogisticRegression(C=params['C'], penalty=params['penalty'], solver=params['solver'], l1_ratio=params['l1_ratio'], max_iter=10000)
            else:
                lr = LogisticRegression(C=params['C'], penalty=params['penalty'], solver=params['solver'], max_iter=10000)

            lr.fit(train_features, train_labels)
            val_predictions = lr.predict(val_features)

            accuracy = accuracy_score(val_labels, val_predictions)
            fold_scores.append(accuracy)
        
        mean_score = np.mean(fold_scores)
        print(f"Mean accuracy for params {params}: {mean_score:.4f}")

        if mean_score > best_score:
            best_score = mean_score
            best_params = params

    return best_params, best_score

param_grid_lr = []
incompatible_combinations = {
    'l1': ['newton-cg', 'sag', 'lbfgs'],
    'elasticnet': ['newton-cg', 'sag', 'liblinear', 'lbfgs']
}

C_values = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]
penalty_types = ['l1', 'l2', 'elasticnet']
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
l1_ratios = [0.1, 0.3, 0.5, 0.7, 0.9]

for C in C_values:
    for penalty in penalty_types:
        for solver in solvers:
            if penalty in incompatible_combinations and solver in incompatible_combinations[penalty]:
                continue
            
            if penalty == 'elasticnet':
                for l1_ratio in l1_ratios:
                    param_grid_lr.append({'C': C, 'penalty': penalty, 'solver': solver, 'l1_ratio': l1_ratio})
            else:
                param_grid_lr.append({'C': C, 'penalty': penalty, 'solver': solver})

groups = data_train['subject'].values

best_params_lr, best_score_lr = custom_grid_search_lr(param_grid_lr, data_train, groups)

print(f"Best Params (LR): {best_params_lr}")
print(f"Best Mean Accuracy: {best_score_lr:.4f}")

Mean accuracy for params {'C': 0.0001, 'penalty': 'l1', 'solver': 'liblinear'}: 0.5000
Mean accuracy for params {'C': 0.0001, 'penalty': 'l1', 'solver': 'saga'}: 0.5000
Mean accuracy for params {'C': 0.0001, 'penalty': 'l2', 'solver': 'newton-cg'}: 0.7783
Mean accuracy for params {'C': 0.0001, 'penalty': 'l2', 'solver': 'lbfgs'}: 0.7783
Mean accuracy for params {'C': 0.0001, 'penalty': 'l2', 'solver': 'liblinear'}: 0.7654
Mean accuracy for params {'C': 0.0001, 'penalty': 'l2', 'solver': 'sag'}: 0.7654
Mean accuracy for params {'C': 0.0001, 'penalty': 'l2', 'solver': 'saga'}: 0.7654
Mean accuracy for params {'C': 0.0001, 'penalty': 'elasticnet', 'solver': 'saga', 'l1_ratio': 0.1}: 0.5000
Mean accuracy for params {'C': 0.0001, 'penalty': 'elasticnet', 'solver': 'saga', 'l1_ratio': 0.3}: 0.5000
Mean accuracy for params {'C': 0.0001, 'penalty': 'elasticnet', 'solver': 'saga', 'l1_ratio': 0.5}: 0.5000
Mean accuracy for params {'C': 0.0001, 'penalty': 'elasticnet', 'solver': 'saga', 'l1_rati

In [7]:
best_params_lr = {'C': 10000, 'penalty': 'l1', 'solver': 'liblinear'}

In [8]:
best_lr = LogisticRegression(**best_params_lr)
best_lr.fit(train_features, train_labels)
test_predictions = best_lr.predict(test_features)

test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test accuracy: {test_accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Test accuracy: 0.9
Confusion Matrix:
[[22  3]
 [ 2 23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.88      0.90        25
           1       0.88      0.92      0.90        25

    accuracy                           0.90        50
   macro avg       0.90      0.90      0.90        50
weighted avg       0.90      0.90      0.90        50



In [9]:
rf_clf = RandomForestClassifier(random_state=SEED)
rf_clf.fit(train_features, train_labels)
test_predictions = rf_clf.predict(test_features)

accuracy = accuracy_score(test_labels, test_predictions)
print(f'Validation accuracy: {accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Validation accuracy: 0.86
Confusion Matrix:
[[21  4]
 [ 3 22]]
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.84      0.86        25
           1       0.85      0.88      0.86        25

    accuracy                           0.86        50
   macro avg       0.86      0.86      0.86        50
weighted avg       0.86      0.86      0.86        50



In [10]:
set_seed(SEED)
def custom_grid_search_rf(params_grid, data_train, groups, folds=5):
    kf = GroupKFold(n_splits=folds)
    best_params = None
    best_score = -np.inf

    for params in params_grid:
        fold_scores = []

        for train_index, val_index in kf.split(data_train, groups=groups):
            train_subset = data_train.iloc[train_index]
            val_subset = data_train.iloc[val_index]
            
            train_features = train_subset[selected_features]
            train_labels = train_subset['label'].values
            
            val_features = val_subset[selected_features]
            val_labels = val_subset['label'].values

            rf = RandomForestClassifier(n_estimators=params['n_estimators'], max_depth=params.get('max_depth', None),
                                        min_samples_split=params.get('min_samples_split', 2),
                                        min_samples_leaf=params.get('min_samples_leaf', 1),
                                        max_features=params.get('max_features', 'sqrt'), random_state=SEED)
            rf.fit(train_features, train_labels)

            val_predictions = rf.predict(val_features)

            accuracy = accuracy_score(val_labels, val_predictions)
            fold_scores.append(accuracy)
        
        mean_score = np.mean(fold_scores)
        print(f"Mean accuracy for params {params}: {mean_score:.4f}")

        if mean_score > best_score:
            best_score = mean_score
            best_params = params

    return best_params, best_score

param_grid_rf = [
    {'n_estimators': n_estimators, 'max_depth': max_depth, 'min_samples_split': min_samples_split,
     'min_samples_leaf': min_samples_leaf, 'max_features': max_features}
    for n_estimators in [100, 500, 1000]
    for max_depth in [None, 10, 20, 50]
    for min_samples_split in [2, 5]
    for min_samples_leaf in [1, 2]
    for max_features in ['sqrt', 'log2']
]

groups = data_train['subject'].values

best_params_rf, best_score_rf = custom_grid_search_rf(param_grid_rf, data_train, groups)

print(f"Best Params (RF): {best_params_rf}")
print(f"Best Mean Accuracy: {best_score_rf:.4f}")

Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'}: 0.8221
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2'}: 0.7846
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'}: 0.8283
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'log2'}: 0.7908
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'sqrt'}: 0.8092
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 1, 'max_features': 'log2'}: 0.7779
Mean accuracy for params {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_features': '

In [10]:
best_params_rf = {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 2, 'max_features': 'sqrt'}

In [11]:
best_rf = RandomForestClassifier(**best_params_rf, random_state=SEED)
best_rf.fit(train_features, train_labels)
test_predictions = best_rf.predict(test_features)

test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test accuracy: {test_accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Test accuracy: 0.86
Confusion Matrix:
[[21  4]
 [ 3 22]]
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.84      0.86        25
           1       0.85      0.88      0.86        25

    accuracy                           0.86        50
   macro avg       0.86      0.86      0.86        50
weighted avg       0.86      0.86      0.86        50



In [12]:
svm_clf = SVC(probability=True, random_state=SEED)
svm_clf.fit(train_features, train_labels)
test_predictions = svm_clf.predict(test_features)

accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test accuracy: {accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Test accuracy: 0.86
Confusion Matrix:
[[20  5]
 [ 2 23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.80      0.85        25
           1       0.82      0.92      0.87        25

    accuracy                           0.86        50
   macro avg       0.87      0.86      0.86        50
weighted avg       0.87      0.86      0.86        50



In [13]:
set_seed(SEED)
def custom_grid_search_svm(params_grid, data_train, groups, folds=5):
    kf = GroupKFold(n_splits=folds)
    best_params = None
    best_score = -np.inf

    for params in params_grid:
        fold_scores = []

        for train_index, val_index in kf.split(data_train, groups=groups):
            train_subset = data_train.iloc[train_index]
            val_subset = data_train.iloc[val_index]
            
            train_features = train_subset[selected_features]
            train_labels = train_subset['label'].values
            
            val_features = val_subset[selected_features]
            val_labels = val_subset['label'].values

            svm = SVC(C=params['C'], gamma=params.get('gamma', 'scale'), kernel=params['kernel'],
                      degree=params.get('degree', 3), coef0=params.get('coef0', 0.0), probability=True, random_state=SEED)
            svm.fit(train_features, train_labels)

            val_predictions = svm.predict(val_features)

            accuracy = accuracy_score(val_labels, val_predictions)
            fold_scores.append(accuracy)
        
        mean_score = np.mean(fold_scores)
        print(f"Mean accuracy for params {params}: {mean_score:.4f}")

        if mean_score > best_score:
            best_score = mean_score
            best_params = params

    return best_params, best_score

param_grid_svm = [
    {'C': C, 'gamma': gamma, 'kernel': 'rbf'}
    for C in [0.1, 1, 10, 100, 1000]
    for gamma in ['scale', 0.1, 0.01, 0.001]
] + [
    {'C': C, 'gamma': gamma, 'kernel': 'sigmoid', 'coef0': coef0}
    for C in [0.1, 1, 10, 100, 1000]
    for gamma in ['scale', 0.1, 0.01, 0.001]
    for coef0 in [0, 0.1, 0.5, 1]
] + [
    {'C': C, 'gamma': gamma, 'kernel': 'poly', 'degree': degree, 'coef0': coef0}
    for C in [0.1, 1, 10, 100, 1000]
    for gamma in ['scale', 0.1, 0.01, 0.001]
    for degree in [2, 3, 4]
    for coef0 in [0, 0.1, 0.5, 1]
] + [
    {'C': C, 'kernel': 'linear'}
    for C in [0.1, 1, 10, 100, 1000]
]

groups = data_train['subject'].values

best_params_svm, best_score_svm = custom_grid_search_svm(param_grid_svm, data_train, groups)

print(f"Best Params (SVM): {best_params_svm}")
print(f"Best Mean Accuracy: {best_score_svm:.4f}")

Mean accuracy for params {'C': 0.1, 'gamma': 'scale', 'kernel': 'rbf'}: 0.7333
Mean accuracy for params {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}: 0.5129
Mean accuracy for params {'C': 0.1, 'gamma': 0.01, 'kernel': 'rbf'}: 0.7471
Mean accuracy for params {'C': 0.1, 'gamma': 0.001, 'kernel': 'rbf'}: 0.7717
Mean accuracy for params {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}: 0.8475
Mean accuracy for params {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}: 0.5254
Mean accuracy for params {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}: 0.8033
Mean accuracy for params {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}: 0.8600
Mean accuracy for params {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}: 0.9113
Mean accuracy for params {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}: 0.5379
Mean accuracy for params {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}: 0.8033
Mean accuracy for params {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}: 0.9050
Mean accuracy for params {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}: 0.9046
Mean accurac

In [13]:
best_params_svm = {'C': 10, 'gamma': 'scale', 'kernel': 'rbf'}

In [14]:
best_svm = SVC(**best_params_svm, probability=True, random_state=SEED)
best_svm.fit(train_features, train_labels)
test_predictions = best_svm.predict(test_features)

test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test accuracy: {test_accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

Test accuracy: 0.88
Confusion Matrix:
[[21  4]
 [ 2 23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.84      0.88        25
           1       0.85      0.92      0.88        25

    accuracy                           0.88        50
   macro avg       0.88      0.88      0.88        50
weighted avg       0.88      0.88      0.88        50



In [15]:
class CombinedModel:
    def __init__(self, feature_extractor, classifier):
        self.feature_extractor = feature_extractor
        self.classifier = classifier
    
    def predict(self, data):
        features = self.feature_extractor.predict(data)
        return self.classifier.predict(features)
    
    def predict_proba(self, data):
        features = self.feature_extractor.predict(data)
        proba = self.classifier.predict_proba(features)
        return proba[:, 1]

combined_model = CombinedModel(feature_extractor, best_lr)

test_predictions = combined_model.predict(data_test[selected_features])
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test accuracy: {test_accuracy}')

conf_matrix = confusion_matrix(test_labels, test_predictions)
print('Confusion Matrix:')
print(conf_matrix)

class_report = classification_report(test_labels, test_predictions)
print('Classification Report:')
print(class_report)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Test accuracy: 0.9
Confusion Matrix:
[[22  3]
 [ 2 23]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.88      0.90        25
           1       0.88      0.92      0.90        25

    accuracy                           0.90        50
   macro avg       0.90      0.90      0.90        50
weighted avg       0.90      0.90      0.90        50



In [16]:
train_predictions = combined_model.predict(data_train[selected_features])

correct_train_indices = np.where(train_predictions == train_labels)[0]
correct_train_data = data_train.iloc[correct_train_indices]
correct_train_labels = train_labels[correct_train_indices]

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


In [17]:
test_predictions = combined_model.predict(data_test[selected_features])

correct_test_indices = np.where(test_predictions == test_labels)[0]
correct_test_data = data_test.iloc[correct_test_indices]
correct_test_labels = test_labels[correct_test_indices]

incorrect_test_indices = np.where(test_predictions != test_labels)[0]
incorrect_test_data = data_test.iloc[incorrect_test_indices]
incorrect_test_labels = test_labels[incorrect_test_indices]

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 


In [38]:
set_seed(SEED)

background_data = correct_train_data[selected_features].values

explainer = shap.KernelExplainer(combined_model.predict_proba, background_data)

shap_values_correct = explainer.shap_values(correct_test_data[selected_features].values)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 


Using 158 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/45 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m14862/14862[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m14862/14862

In [34]:
shap_values_df = pd.DataFrame(shap_values_correct, columns=correct_test_data_renamed.columns)

csv_file_path = './shap_values_correct.csv'
shap_values_df.to_csv(csv_file_path, index=False)