# Load Libraries

In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 3686323946287165312
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 2249929524
locality {
  bus_id: 1
  links {
  }
}
incarnation: 13274804549571815497
physical_device_desc: "device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5"
xla_global_id: 416903419
]


In [2]:
MRI_Type = 'T1w'
Img_Size = 128 # 정사각형 한 변의 픽셀값 입력

In [3]:
import os
import glob

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 환경변수 설정 구문은 일단 생략

import pandas as pd
import numpy as np
from pathlib import Path

import random
from tqdm.notebook import tqdm
import pydicom # Handle MRI images

import cv2  # OpenCV - https://docs.opencv.org/master/d6/d00/tutorial_py_root.html

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn import model_selection

import tensorflow as tf
from tensorflow import keras

In [4]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers
from tensorflow.keras.initializers import RandomUniform

# Load Dataset

In [5]:
# data_dir = Path('./')

mri_types = ["FLAIR", "T1w", "T2w", "T1wCE"]
excluded_images = [109, 123, 709] # Bad images

train_df = pd.read_csv("./train_labels.csv")
test_df = pd.read_csv("./sample_submission.csv")
sample_submission = pd.read_csv("./sample_submission.csv")

train_df = train_df[~train_df.BraTS21ID.isin(excluded_images)].reset_index(drop=True)
train_df

Unnamed: 0,BraTS21ID,MGMT_value
0,0,1
1,2,1
2,3,0
3,5,1
4,6,1
...,...,...
577,1005,1
578,1007,1
579,1008,1
580,1009,0


# Utility Functions

In [6]:
def load_dicom(path, size = 224):
    ''' 
    Reads a DICOM image, standardizes so that the pixel values are between 0 and 1, then rescales to 0 and 255
    
    Not super sure if this kind of scaling is appropriate, but everyone seems to do it. 
    '''
    dicom = pydicom.read_file(path)
    data = dicom.pixel_array
    # transform data into black and white scale / grayscale
#     data = data - np.min(data)
    if np.max(data) != 0:
        data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return cv2.resize(data, (size, size))

In [7]:
def get_all_image_paths(brats21id, image_type, folder='train'): 
    '''
    Returns an arry of all the images of a particular type for a particular patient ID
    '''
    assert(image_type in mri_types)
    
    patient_path = os.path.join(
        "./%s/" % folder, 
        str(brats21id).zfill(5),
    )

    paths = sorted(
        glob.glob(os.path.join(patient_path, image_type, "*")), 
        key=lambda x: int(x[:-4].split("-")[-1]),
    )
    
    num_images = len(paths)
    
    start = int(num_images * 0.25)
    end = int(num_images * 0.75)

    interval = 3
    
    if num_images < 10: 
        interval = 1
    
    return np.array(paths[start:end:interval])

In [8]:
def get_all_images(brats21id, image_type, folder='train', size=225):
    return [load_dicom(path, size) for path in get_all_image_paths(brats21id, image_type, folder)]

In [9]:
def get_all_data_for_train(image_type, image_size=32):
    global train_df
    
    X = []
    y = []
    train_ids = []

    for i in tqdm(train_df.index):
        x = train_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'train', image_size)
        label = x['MGMT_value']

        X += images
        y += [label] * len(images)
        train_ids += [int(x['BraTS21ID'])] * len(images)
        assert(len(X) == len(y))
    return np.array(X), np.array(y), np.array(train_ids)

In [10]:
def get_all_data_for_test(image_type, image_size=32):
    global test_df
    
    X = []
    test_ids = []

    for i in tqdm(test_df.index):
        x = test_df.loc[i]
        images = get_all_images(int(x['BraTS21ID']), image_type, 'test', image_size)
        X += images
        test_ids += [int(x['BraTS21ID'])] * len(images)

    return np.array(X), np.array(test_ids)

# Load all Images

X - contains all the images for each patient 

trainidt - trainidt is a mask vector into X, y for training.  There's a patient id/BraTS21ID corresponding to each image (e.g. (0, 0, 0, 0, 2,2, 3,3,3,3,3,...) )

testidt - testidt is a mask vector into X_test for testing

In [11]:
X, y, trainidt = get_all_data_for_train(MRI_Type, image_size=Img_Size)
# X_test, testidt = get_all_data_for_test('T1wCE', image_size=32) # 어차피 안 쓸 테스트 데이터

  0%|          | 0/582 [00:00<?, ?it/s]

In [12]:
X.shape # 32 * 32 이미지 16196장

(12463, 128, 128)

In [13]:
X[0].shape

(128, 128)

In [14]:
y.shape

(12463,)

In [15]:
y[0:10] # 각 이미지의 라벨값

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [16]:
trainidt.shape

(12463,)

In [17]:
trainidt[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [18]:
trainidt[1000:1010]

array([44, 44, 44, 44, 44, 44, 44, 44, 44, 44])

# Train/Validation Split

In [19]:
# 다른 방식으로 split 필요

# X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid = train_test_split(X, y, trainidt, test_size=0.2, random_state=13)

In [20]:
train_df = pd.read_csv('./train_df.csv')
train_df

Unnamed: 0,BraTS21ID,MGMT_value
0,185,1
1,816,1
2,707,1
3,683,0
4,6,1
...,...,...
463,356,0
464,89,1
465,217,0
466,834,0


In [21]:
test_df = pd.read_csv('./test_df.csv')
test_df

Unnamed: 0,BraTS21ID,MGMT_value
0,107,1
1,753,0
2,303,1
3,106,1
4,171,1
...,...,...
112,703,0
113,21,0
114,444,0
115,95,0


In [22]:
len(list(train_df['BraTS21ID']) + list(test_df['BraTS21ID']))

585

In [23]:
len(set(list(train_df['BraTS21ID']) + list(test_df['BraTS21ID']))) # 잘 나뉘어진 것으로 확인 완료

585

In [24]:
len(X)

12463

In [25]:
train_ids = list(train_df['BraTS21ID'])
test_ids = list(test_df['BraTS21ID'])

In [26]:
len(train_ids), len(test_ids)

(468, 117)

In [27]:
# X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid 

X_train, X_valid, y_train, y_valid, trainidt_train, trainidt_valid  = [], [], [], [], [], []

for i in tqdm(range(len(X))):
    if trainidt[i] in train_ids:
        X_train.append(X[i])
        y_train.append(y[i])
        trainidt_train.append(trainidt[i])
    else:
        X_valid.append(X[i])
        y_valid.append(y[i])
        trainidt_valid.append(trainidt[i])
        
X_train = np.array(X_train, dtype='uint8')
X_valid = np.array(X_valid, dtype='uint8')
y_train = np.array(y_train, dtype='int64')
y_valid = np.array(y_valid, dtype='int64')
trainidt_train = np.array(trainidt_train)
trainidt_valid = np.array(trainidt_valid)

  0%|          | 0/12463 [00:00<?, ?it/s]

In [28]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape, trainidt_train.shape, trainidt_valid.shape

((10011, 128, 128), (2452, 128, 128), (10011,), (2452,), (10011,), (2452,))

In [29]:
X_train[0]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [30]:
X_valid[0]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [31]:
y_train

array([1, 1, 1, ..., 0, 0, 0], dtype=int64)

In [32]:
y_valid

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [33]:
trainidt_train

array([   0,    0,    0, ..., 1010, 1010, 1010])

In [34]:
trainidt_valid

array([   3,    3,    3, ..., 1009, 1009, 1009])

## Adding a Dimension

In [35]:
X_train = tf.expand_dims(X_train, axis=-1)
X_valid = tf.expand_dims(X_valid, axis=-1)
X_train.shape

TensorShape([10011, 128, 128, 1])

## One-hot encode labels

In [36]:
y_train = to_categorical(y_train)
y_valid = to_categorical(y_valid)

# Tunable Model

Using the SIREN activation layer. Refer to https://vsitzmann.github.io/siren/ for more details.

In [37]:
class SineDenseLayer(keras.layers.Layer):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, features,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.features = features
        
        if self.is_first:
            initializer = RandomUniform(-1 / self.features, 1 / self.features)   
            self.linear = keras.layers.Dense(features, kernel_initializer=initializer)
    
        else:
            initializer = RandomUniform(-np.sqrt(6 / self.features) / self.omega_0, np.sqrt(6 / self.features) / self.omega_0)
            self.linear = keras.layers.Dense(features, kernel_initializer=initializer)
     

    def call(self, input):
        return tf.math.sin(self.omega_0 * self.linear(input))
    
#     def forward_with_intermediate(self, input): 
#         # For visualization of activation distributions
#         intermediate = self.omega_0 * self.linear(input)
#         return tf.math.sin(intermediate), intermediate

In [38]:
class SineConvLayer(keras.layers.Layer):
    # See paper sec. 3.2, final paragraph, and supplement Sec. 1.5 for discussion of omega_0.
    
    # If is_first=True, omega_0 is a frequency factor which simply multiplies the activations before the 
    # nonlinearity. Different signals may require different omega_0 in the first layer - this is a 
    # hyperparameter.
    
    # If is_first=False, then the weights will be divided by omega_0 so as to keep the magnitude of 
    # activations constant, but boost gradients to the weight matrix (see supplement Sec. 1.5)
    
    def __init__(self, features, kernel_size,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.features = features
        
        if self.is_first:
            initializer = RandomUniform(-1 / self.features, 1 / self.features)            
            self.conv = keras.layers.Conv2D(features, kernel_size, kernel_initializer=initializer)
            
        else:
            initializer = RandomUniform(-np.sqrt(6 / self.features) / self.omega_0, np.sqrt(6 / self.features) / self.omega_0)
            self.conv = keras.layers.Conv2D(features, kernel_size, kernel_initializer=initializer)
            

    def call(self, input):
        return tf.math.sin(self.omega_0 * self.conv(input))
    
#     def forward_with_intermediate(self, input): 
#         # For visualization of activation distributions
#         intermediate = self.omega_0 * self.linear(input)
#         return tf.math.sin(intermediate), intermediate

In [39]:
import keras_tuner as kt

In [40]:
def make_model(hp):
    inputs = keras.Input(shape=X_train.shape[1:])
    
    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs)

#     num_block = hp.Int('num_block', min_value=2, max_value=5, step=1)
#     num_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    
#     x = keras.layers.Conv2D(64, kernel_size=(4, 4), activation="relu", name="Conv_1")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_Conv_1_' + str(0),
                                            min_value=64,
                                            max_value=256,
                                            step=32),
                            kernel_size=(4, 4),
                            activation="relu", 
                            name="Conv_1")(x)

    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_conv2_' + str(1),
                                            min_value=16,
                                            max_value=128,
                                            step=16),
                            kernel_size=(2, 2),
                            activation="relu",
                            name="Conv_2")(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
#     for i in range(num_block):
#         x = keras.layers.Conv2D(num_filters, 
#                                 kernel_size=(4, 4),
#                                 activation="relu",
#                                 )(x)
    
#         x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
#     x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)

#     h = keras.layers.Dropout(0.1)(h)
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
#     reduction_type = hp.Choice('reduction_type', ['flatten', 'avg'])
#     if reduction_type == 'flatten':
#         x = layers.Flatten()(x)
#     else:
#         x = layers.GlobalAveragePooling2D()(x)
        
#     x = keras.layers.Dense(32, activation="relu")(x)
    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=16, max_value=64, step=8),
        activation='relu'
    )(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

# Augmentation

- https://www.tensorflow.org/guide/keras/preprocessing_layers
- https://keras.io/examples/vision/image_classification_from_scratch/

In [41]:
def make_model_augmented(hp):
    input_shape = (Img_Size, Img_Size, 1)
    classes = 10

    # Create a data augmentation stage with horizontal flipping, rotations, zooms
#     data_augmentation = keras.Sequential(
#         [
#             layers.experimental.preprocessing.RandomFlip("horizontal"),
#             layers.experimental.preprocessing.RandomRotation(0.1),
#             layers.experimental.preprocessing.RandomZoom(0.1),
#         ]
#     )
    
    data_augmentation = keras.Sequential(
    [
        layers.experimental.preprocessing.RandomFlip("horizontal"),
        layers.experimental.preprocessing.RandomRotation(0.1),
    ]
)

    shape=X_train.shape[1:]
    print(f"shape={shape}") # shape=(32, 32, 1)
    
    inputs = keras.Input(shape=input_shape)
    x = data_augmentation(inputs)

    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(x)
#     x = layers.experimental.preprocessing.RandomFlip("horizontal")(x),
#     x = layers.experimental.preprocessing.RandomRotation(0.1)(x),
#     x = layers.experimental.preprocessing.RandomZoom(
#         height_factor = 0.2,
#         width_factor = -0.3,
#         fill_mode = "constant",
#         interpolation = "bilinear",
#         seed = 42
#     )(x),
#     num_block = hp.Int('num_block', min_value=2, max_value=5, step=1)
#     num_filters = hp.Int('num_filters', min_value=32, max_value=128, step=32)
    
#     x = keras.layers.Conv2D(64, kernel_size=(4, 4), activation="relu", name="Conv_1")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_Conv_1_' + str(0),
                                            min_value=64,
                                            max_value=256,
                                            step=32),
                            kernel_size=(4, 4),
                            activation="relu", 
                            name="Conv_1")(x)

    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
    x = keras.layers.Conv2D(filters=hp.Int('units_conv2_' + str(1),
                                            min_value=16,
                                            max_value=128,
                                            step=16),
                            kernel_size=(2, 2),
                            activation="relu",
                            name="Conv_2")(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
#     for i in range(num_block):
#         x = keras.layers.Conv2D(num_filters, 
#                                 kernel_size=(4, 4),
#                                 activation="relu",
#                                 )(x)
    
#         x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

#     x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x)
#     x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)

#     h = keras.layers.Dropout(0.1)(h)
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
#     reduction_type = hp.Choice('reduction_type', ['flatten', 'avg'])
#     if reduction_type == 'flatten':
#         x = layers.Flatten()(x)
#     else:
#         x = layers.GlobalAveragePooling2D()(x)
        
#     x = keras.layers.Dense(32, activation="relu")(x)
    x = layers.Dense(
        units=hp.Int('num_dense_units', min_value=16, max_value=64, step=8),
        activation='relu'
    )(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

In [42]:
import keras_tuner as kt


def make_model_siren(hp):
    inputs = keras.Input(shape=X_train.shape[1:])
    
    x = keras.layers.experimental.preprocessing.Rescaling(1.0 / 255)(inputs)

    x = SineConvLayer(features=hp.Int('features_conv_1', min_value=64, max_value=256, step=32),
                      kernel_size=hp.Int('kernel_conv_1', min_value=2, max_value=7, step=1),
                      is_first=True, 
                      omega_0=hp.Int('omega_0_conv_1', min_value=10, max_value=50, step=5))(x)
    
    x = keras.layers.MaxPool2D(pool_size=(2, 2))(x)

    x = SineConvLayer(features=hp.Int('features_conv_2', min_value=16, max_value=128, step=16),
                      kernel_size=hp.Int('kernel_conv_2', min_value=2, max_value=7, step=1),
                      is_first=False, 
                      omega_0=hp.Int('omega_0_conv_2', min_value=10, max_value=50, step=5))(x)

    x = keras.layers.MaxPool2D(pool_size=(1, 1))(x)
    
    x = layers.Dropout(
        hp.Float('dense_dropout', min_value=0., max_value=0.7)
    )(x)
    x = keras.layers.Flatten()(x)
    x = SineDenseLayer(features=hp.Int('features_dense_1', min_value=64, max_value=256, step=32),
                      is_first=False, 
                      omega_0=hp.Int('omega_0_dense_1', min_value=10, max_value=50, step=5))(x)

    outputs = keras.layers.Dense(2, activation="softmax")(x)

    model = keras.Model(inputs, outputs)

    roc_auc = tf.keras.metrics.AUC(name='roc_auc', curve='ROC')

    model.compile(
        loss="categorical_crossentropy", optimizer="adam", metrics=[roc_auc]
    )
    model.summary()
    return model

# Hyperparameter Search

In [43]:
tuner = kt.tuners.BayesianOptimization(
#     make_model_siren,
#     make_model,
    make_model_augmented,
    objective='val_loss',
    max_trials=5,  # Set to 5 to run quicker, but need 100+ for good results
    overwrite=True)

callbacks=[keras.callbacks.EarlyStopping(monitor='val_roc_acc', mode='max', patience=3, baseline=0.9)]

tuner.search(X_train, y_train, validation_split=0.2, callbacks=callbacks, verbose=1, epochs=20)

Trial 5 Complete [00h 15m 55s]
val_loss: 0.6797730922698975

Best val_loss So Far: 0.6750637888908386
Total elapsed time: 01h 09m 21s
INFO:tensorflow:Oracle triggered exit


# Find the best epoch value

In [44]:
best_hp = tuner.get_best_hyperparameters()[0]
best_model = make_model(best_hp)

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 128, 128, 1)]     0         
                                                                 
 rescaling_1 (Rescaling)     (None, 128, 128, 1)       0         
                                                                 
 Conv_1 (Conv2D)             (None, 125, 125, 192)     3264      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 62, 62, 192)      0         
 2D)                                                             
                                                                 
 Conv_2 (Conv2D)             (None, 61, 61, 96)        73824     
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 61, 61, 96)       0         
 2D)                                                       

# Save Model

In [45]:
best_model.save(f"KerasTuner_best_model_{MRI_Type}")



INFO:tensorflow:Assets written to: KerasTuner_best_model_FLAIR\assets


INFO:tensorflow:Assets written to: KerasTuner_best_model_FLAIR\assets


In [46]:
history = best_model.fit(X_train, y_train, validation_split=0.2, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Predictions on Validation Set

In [47]:
y_pred = best_model.predict(X_valid)

pred = np.argmax(y_pred, axis=1)

result = pd.DataFrame(trainidt_valid)
result[1] = pred

result.columns = ["BraTS21ID", "MGMT_value"]
result2 = result.groupby("BraTS21ID", as_index=False).mean()
result2



Unnamed: 0,BraTS21ID,MGMT_value
0,3,0.772727
1,9,0.575000
2,21,0.604651
3,22,0.837209
4,25,0.909091
...,...,...
111,823,0.200000
112,837,0.300000
113,1007,0.142857
114,1008,0.600000


In [48]:
result2 = result2.merge(test_df, on="BraTS21ID")
result2

Unnamed: 0,BraTS21ID,MGMT_value_x,MGMT_value_y
0,3,0.772727,0
1,9,0.575000,0
2,21,0.604651,0
3,22,0.837209,0
4,25,0.909091,1
...,...,...,...
111,823,0.200000,1
112,837,0.300000,0
113,1007,0.142857,1
114,1008,0.600000,1


In [49]:
auc = roc_auc_score(
    result2.MGMT_value_y,
    result2.MGMT_value_x,
)
print(f"Validation AUC={auc}")

Validation AUC=0.5386904761904762


In [50]:
# result3 = result2[:]
# result3["0.5 Pred"] = round(result3["MGMT_value_x"])
# result3

In [51]:
# result3.info()

In [52]:
# result3['0.5 Pred'] = result3['0.5 Pred'].astype('int64')
# result3.info()

In [53]:
# result3

In [54]:
# result3.loc[2, "MGMT_value_y"]

In [55]:
# confusion_matrix05 = [[0, 0], [0, 0]]

# for i in range(len(result3)):
#     confusion_matrix05[result3.loc[i, "MGMT_value_y"]][result3.loc[i, "0.5 Pred"]] += 1
        
# confusion_matrix05

In [56]:
def get_confusion_matrix(threshold=0.5):

    confusion_matrix = [[0, 0], [0, 0]]

    for i in range(len(result2)):
        tmp = 1 if result2.loc[i, "MGMT_value_x"] > threshold else 0
        confusion_matrix[result2.loc[i, "MGMT_value_y"]][tmp] += 1

    return confusion_matrix

In [57]:
def get_acc_recall(arr):
    acc = sum((arr[0][0], arr[1][1]))/sum((sum(arr[0]), sum(arr[1])))
    recall = arr[1][1] / sum(arr[1])
    print(f"Acc: {acc} \t Recall: {recall}")

In [58]:
result05 = get_confusion_matrix(threshold=0.5)
result05

[[30, 26], [33, 27]]

In [59]:
get_acc_recall(result05)

Acc: 0.49137931034482757 	 Recall: 0.45


In [60]:
result06 = get_confusion_matrix(threshold=0.3)
result06

[[21, 35], [14, 46]]

In [61]:
get_acc_recall(result06)

Acc: 0.5775862068965517 	 Recall: 0.7666666666666667


In [62]:
file_name = "KerasTunable_" + MRI_Type + "_" + str(Img_Size) + "x" + str(Img_Size) + ".csv"
file_name

'KerasTunable_T1w_128x128.csv'

In [63]:
result2.to_csv(file_name)