In [None]:
!pip install statsmodels
!pip install hyperas
!pip install hyperopt

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from keras.utils import to_categorical
from tensorflow.keras import regularizers
from keras import backend as K

from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import Session
import gc

In [None]:
def data():
    ##Import data from hd5f file
    PATH_3D_H5 = 'Images/preprocessed_data_128_112_40_outcome.hdf5'
    with h5py.File(PATH_3D_H5, 'r') as h5:
        print('H5-file: ', list(h5.keys()))

        X = h5["X"][:]
        Y_pat = h5["Y_outcome"][:]
        pat = h5["pat"][:]
    
    N_FOLDS = 5

    ## get stroke and tia indeces
    bad_outcome_idx = np.where(Y_pat == 1)
    good_outcome_idx = np.where(Y_pat == 0)

    ## shuffle indeces
    np.random.seed(2021)
    np.random.shuffle(bad_outcome_idx[0])
    np.random.shuffle(good_outcome_idx[0])

    ## split indeces into 5 parts
    splits_bad_outcome = np.array_split(bad_outcome_idx[0],N_FOLDS)
    splits_good_outcome = np.array_split(good_outcome_idx[0], N_FOLDS)

    ## define chosen splits for each fold
    test_folds = [0, 1, 2, 3, 4]
    valid_folds = [1, 2, 3, 4, 0]
    train_folds = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 4]] ## remove these splits for training data

    fold = 4
    
    ## define train, test and validation splits
    test_idx = np.concatenate((splits_bad_outcome[test_folds[fold]], splits_good_outcome[test_folds[fold]]), axis = None)
    valid_idx = np.concatenate((splits_bad_outcome[valid_folds[fold]], splits_good_outcome[valid_folds[fold]]), axis = None)

    train_bad_outcome = np.delete(splits_bad_outcome, train_folds[fold], 0)
    train_bad_outcome = [item for sublist in train_bad_outcome for item in sublist]

    train_good_outcome = np.delete(splits_good_outcome, train_folds[fold], 0)
    train_good_outcome = [item for sublist in train_good_outcome for item in sublist]

    train_idx = np.concatenate((train_good_outcome, train_bad_outcome), axis = None)

    X_train = X[train_idx]
    X_test = X[test_idx]
    X_valid = X[valid_idx]

    Y_train = Y_pat[train_idx]
    Y_test = Y_pat[test_idx]
    Y_valid = Y_pat[valid_idx]

    pat_train = pat[train_idx]
    pat_test = pat[test_idx]
    pat_valid = pat[valid_idx]
    
    ##Labels to categorical
    Y_train = to_categorical(Y_train)
    Y_valid = to_categorical(Y_valid)
    
    ###Create balanced dataset
    bool_train_labels = Y_train[:,1] != 0
    pos_features = X_train[bool_train_labels]
    neg_features = X_train[~bool_train_labels]
    pos_labels = Y_train[bool_train_labels]
    neg_labels = Y_train[~bool_train_labels]

    ids = np.arange(len(pos_features))
    choices = np.random.choice(ids, len(neg_features))
    res_pos_features = pos_features[choices]
    res_pos_labels = pos_labels[choices]

    resampled_features = np.concatenate([res_pos_features, neg_features], axis=0)
    resampled_labels = np.concatenate([res_pos_labels, neg_labels], axis=0)

    order = np.arange(len(resampled_labels))
    np.random.shuffle(order)
    X_train_balanced = resampled_features[order]
    Y_train_balanced = resampled_labels[order]

    print(X_train_balanced.shape, Y_train_balanced.shape)
    
    return X_train_balanced, Y_train_balanced, X_valid, Y_valid

In [None]:
def model(X_train, Y_train, X_valid, Y_valid):
    num_classes = 2
    model = keras.Sequential()
    model.add(layers.Convolution3D({{choice([8, 16, 32])}}, kernel_size=3, activation="relu", 
                                   batch_input_shape=(None,128,112,40,1), padding = 'same'))
    model.add(layers.MaxPooling3D(pool_size = 2))
    model.add(layers.BatchNormalization(center = True, scale = True))
    
    model.add(layers.Convolution3D({{choice([16, 32, 64])}}, kernel_size=3, activation="relu", padding = 'same'))
    model.add(layers.MaxPooling3D(pool_size = 2))
    model.add(layers.BatchNormalization(center = True, scale = True))
    
    model.add(layers.Convolution3D({{choice([16, 32, 64])}}, kernel_size=3, activation="relu", padding = 'same'))
    model.add(layers.MaxPooling3D(pool_size = 2))
    model.add(layers.BatchNormalization(center = True, scale = True))
    
    model.add(layers.Convolution3D({{choice([32, 64, 128])}}, kernel_size=3, activation="relu", padding = 'same'))
    model.add(layers.MaxPooling3D(pool_size = 2))
    model.add(layers.BatchNormalization(center = True, scale = True))
    
    model.add(layers.Convolution3D({{choice([32, 64, 128])}}, kernel_size=3, activation="relu", padding = 'same'))
    model.add(layers.MaxPooling3D(pool_size = 2))
    model.add(layers.BatchNormalization(center = True, scale = True))
    
    model.add(layers.Flatten())
    model.add(layers.Dense({{choice([32, 64, 128])}}, activation="relu"))
    model.add(layers.Dropout({{uniform(0, 0.6)}}))
    
    # If we choose 'two', add an additional second dense layer
    if {{choice(['one', 'two'])}} == 'two':
        model.add(layers.Dense({{choice([32, 64, 128])}}, activation="relu"))
        model.add(layers.Dropout({{uniform(0, 0.6)}}))
    
    model.add(layers.Dense(num_classes, activation='softmax'))
              
    #compile model
    metrics = [
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.AUC(name='auc')]

    model.compile(loss='categorical_crossentropy',
                  optimizer=keras.optimizers.Adam(learning_rate =  0.00001),
                  metrics=metrics)
    
    result = model.fit(X_train_balanced, Y_train_balanced,
                        validation_data=(X_valid, Y_valid), 
                        batch_size=4,
                        verbose=2,
                        epochs=50)
    
    #get the highest validation loss of the training epochs
    val_loss = np.amin(result.history['val_loss']) 
    print('Best validation loss of epoch:', val_loss)
              
    return {'loss': -val_loss, 'status': STATUS_OK, 'model': model}

In [None]:
best_run, best_model = optim.minimize(model=model,
                                      data=data,
                                      max_evals=50,
                                      algo=tpe.suggest,
                                      notebook_name="Modell_Outcome_Prognose_Hyperparameter_Optimierung", # Without this it can't find the notebook!
                                      trials=Trials())

In [None]:
best_run

In [None]:
best_model.summary()