In [28]:
%cd '/mnt/c/Users/malin/Documents/Facultate/honours/UMCG/ICU_Augment_and_Detect'

/mnt/c/Users/malin/Documents/Facultate/honours/UMCG/ICU_Augment_and_Detect


In [42]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
import matplotlib.pyplot as plt
import cv2
import os
import sys
import numpy as np
import pickle
import sklearn.metrics
import random
from sklearn.model_selection import GridSearchCV

sys.path.append(os.getcwd())
from augment.face_org import *

In [44]:
def load_data(folder_sick, folder_healthy, image_size, ftype):
    files_healthy = os.listdir(folder_healthy)
    files_sick = os.listdir(folder_sick)
    data = []
    labels = []
    for filename in files_healthy:
        sick = np.array([0, 1])
        full_path = folder_healthy + "/" + str(filename)
        if ftype in filename and os.path.isfile(full_path) and "n2" not in filename:
            image = cv2.imread(full_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, dsize=(
                image_size, image_size), interpolation=cv2.INTER_CUBIC)
            data.append(np.asarray(image, dtype=np.int32))
            labels.append(np.asarray(sick, dtype=np.int32))
    for filename in files_sick:
        sick = np.array([1, 0])
        full_path = folder_sick + "/" + str(filename)
        if ftype in filename and os.path.isfile(full_path):
            image = cv2.imread(full_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = cv2.resize(image, dsize=(
                image_size, image_size), interpolation=cv2.INTER_CUBIC)
            data.append(np.asarray(image, dtype=np.int32))
            labels.append(np.asarray(sick, dtype=np.int32))
    return np.asarray(data, dtype=np.float64) / 255, np.asarray(labels, dtype=np.int32)


def load_shuffled_data(folder_sick, folder_healthy, image_size, ftype):
    data, labels = load_data(folder_sick, folder_healthy, image_size, ftype)
    permutation = np.random.permutation(len(data))
    return data[permutation], labels[permutation]


def make_model():
    image_size = 128
    feature = "mouth"

    model = models.Sequential()

    model.add(layers.Conv2D(image_size, (3, 3), padding="same", activation='relu',
                            input_shape=(image_size, image_size, 3),
                            name="input_" + str(feature)))

    model.add(layers.BatchNormalization(name="batch1_" + str(feature)))
    model.add(layers.Conv2D(int(image_size / 2), (3, 3),
                            activation='relu', name="conv1_" + str(feature)))
    model.add(layers.BatchNormalization(name="batch2_" + str(feature)))
    model.add(layers.MaxPooling2D((2, 2), name="max1_" + str(feature)))

    model.add(layers.Conv2D(int(image_size/4), (3, 3),
                            activation='relu', name="conv2_" + str(feature)))
    model.add(layers.BatchNormalization(name="batch3_" + str(feature)))
    model.add(layers.MaxPooling2D((2, 2), name="max2_" + str(feature)))

    model.add(layers.Conv2D(int(image_size/8), (3, 3),
                            activation='relu', name="conv5_" + str(feature)))
    model.add(layers.BatchNormalization(name="batch6_" + str(feature)))
    model.add(layers.MaxPooling2D((2, 2), name="max3_" + str(feature)))

    model.add(layers.Conv2D(int(image_size/16), (3, 3),
                            activation='relu', name="conv6_" + str(feature)))
    model.add(layers.BatchNormalization(name="batch7_" + str(feature)))
    model.add(layers.AveragePooling2D((2, 2), name="avg1_" + str(feature)))

    model.add(layers.Flatten(name="flatten_" + str(feature)))
    model.add(layers.Dense(48, activation='relu',
                           name="dense1_" + str(feature)))
    model.add(layers.Dropout(0.3, name="dropout1_" + str(feature)))

    model.add(layers.Dense(16, activation='relu',
                           name="dense2_" + str(feature)))
    model.add(layers.Dropout(0.5, name="dropout2_" + str(feature)))

    model.add(layers.Dense(1, activation='sigmoid',
                           name="dense3_" + str(feature)))

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss="binary_crossentropy",
                  metrics=['accuracy', tf.keras.metrics.AUC(), tf.keras.metrics.FalseNegatives(), 
                  tf.keras.metrics.FalsePositives(),tf.keras.metrics.TruePositives(), tf.keras.metrics.TrueNegatives()]])

    return model


def load_data_eyes(image_folder_sick, image_folder_healthy, image_size):
    images, labels = load_shuffled_data(
    image_folder_sick, image_folder_healthy, image_size, "_right")

    permutation = np.random.permutation(len(images))

    return images[permutation], labels[permutation]


def save_history(save_path, history, feature):
    with open(save_path + str(feature) + "/history.pickle", 'wb') as file_pi:
        pickle.dump(history.history, file_pi)


def plot_roc(feature, saved_model, test_images, test_labels):
    pred = saved_model.predict(test_images)
    fpr, tpr, threshold = sklearn.metrics.roc_curve(
        test_labels.argmax(axis=1), pred.argmax(axis=1))
    roc_auc = sklearn.metrics.auc(fpr, tpr)
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0, 1], [0, 1], 'r--')
    plt.xlim([0, 1])
    plt.ylim([0, 1])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.savefig("data/plots/" + str(feature) + "_auc.png")
    plt.figure()


def plot_acc(feature, history):
    plt.plot(history.history['accuracy'], label="Training accuracy")
    plt.plot(history.history['val_accuracy'], label="Validation accuracy")
    plt.legend()
    plt.ylim((0.3, 1.05))
    plt.xlim((0, len(history.history["accuracy"])))
    plt.xlabel('Training Epochs')
    plt.ylabel('Accuracy')
    plt.title("Accuracy of the " + str(feature) + " CNN")
    plt.savefig("data/plots/" + str(feature) + "_accuracy.png")
    plt.figure()

def plot_validation(model, feature, validation, test_labels):
    pred = model.predict(validation)
    acc = 0.0
    for i in len(pred):
        if pred[i] == test_labels[i]:
            acc = acc + 1
    acc = acc/len(pred)
    plt.figure(figsize=(10, 10))
    plt.title("Results " + feature + " model accuracy = " + str(acc))

    for i in range(10):
        plt.subplot(3, 4, i+1)
        plt.xticks([])
        plt.yticks([])
        plt.grid(False)
        plt.imshow(validation[i], cmap=plt.cm.binary)
        # The CIFAR labels happen to be arrays,
        # which is why you need the extra index
        result = pred[i].argmax()
        real = test_labels[i].argmax()
        plt.xlabel("%d (%.3f), real: %d" % (result, pred[i][result] * 7, real))

    plt.suptitle("Results " + feature + " model accuracy = " + str(acc))
    plt.savefig("data/plots/predictions_" + feature + ".png")
    plt.figure()

In [52]:
image_folder_sick = 'data/parsed/training/sick'
image_folder_healthy = 'data/parsed/training/healthy'
image_folder_val_sick = 'data/parsed/validation/sick'
image_folder_val_healthy = 'data/parsed/validation/healthy'
image_folder_tune_sick = 'data/parsed/tuning/sick'
image_folder_tune_healthy = 'data/parsed/tuning/healthy'

save_path = 'categorization/model_saves/'
image_size = 128
face_features = ["mouth", "face", "skin", "eyes"]

feature = "mouth"

print("[INFO] Training %s" % (feature))

if feature == "eyes":
    test_images, test_labels = load_data_eyes(
        image_folder_val_sick, image_folder_val_healthy, image_size)
    train_images, train_labels = load_data_eyes(
        image_folder_tune_sick, image_folder_tune_healthy, image_size)

else:
    test_images, test_labels = load_shuffled_data(
        image_folder_val_sick, image_folder_val_healthy, image_size, feature)
    train_images, train_labels = load_shuffled_data(
        image_folder_tune_sick, image_folder_tune_healthy, image_size, feature)

# model = make_model(image_size, feature)

model = KerasClassifier(build_fn=make_model, verbose=1)

batch_size = [1, 2, 4, 8]

param_grid = dict(batch_size=batch_size)

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, scoring="roc_auc")
grid_result = grid.fit(train_images, train_labels, epochs=10)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


[INFO] Training mouth


ValueError: 'roc_aucy' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.