In [None]:
# source: https://machinelearningmastery.com/display-deep-learning-model-training-history-in-keras/
import matplotlib.pyplot as plt
import seaborn as sns


def plot_history(history, titles):
    plot_loss(history, 1, titles[0])
    plot_accuracy(history, 2, titles[1])


def plot_loss(history, fig_num, title):
    """
    Plots the training (and dev) loss from the given training history
    :param history: the training history to plot, also containing the dev loss
    :param fig_num: integer - the plot figure index
    """
    plt.figure(fig_num)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(title)
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


def plot_accuracy(history, fig_num, title):
    """
    Plots the training (and dev) accuracy from the given training history
    :param history: the training history to plot, also containing the dev accuracy
    :param fig_num: integer - the plot figure index
    """
    plt.figure(fig_num)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(title)
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()


In [None]:
from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


def evaluate_metrics(y, y_hat):
    """
    Computes CategoricalAccuracy, Top1Accuracy and Top5Accuracy
    :param y: the ground truth values
    :param y_hat: the predicted values
    :return: CategoricalAccuracy, Top1Accuracy and Top5Accuracy
    """
    acc = accuracy_score(y, y_hat)
    print(f"Accuracy: {acc}")
    auc = roc_auc_score(y, y_hat)
    print(f"AUC: {auc}")
    print(classification_report(y, y_hat))
    cnf_mat = confusion_matrix(y, y_hat)
    ConfusionMatrixDisplay(cnf_mat, display_labels=[0, 1]).plot(values_format='d')


def evaluate_accuracy(y, y_hat):
    acc = accuracy_score(y, y_hat)
    return acc


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from shutil import copyfile
import os
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import model_from_json

def convert_illness_level(x):
    if x == 6:
        return 0
    elif x > 2.5:
        return 1  #
    else:
        return 0  # sick


def load_real_labels():
    df = pd.read_csv("/content/drive/MyDrive/resarchMethods/rating.csv")
    df = df[df.rating != -1]
    df["healthy"] = df.rating.apply(convert_illness_level)
    return df[["image", "healthy"]]


def split_sets(df):
    X_train, X_test, y_train, y_test = train_test_split(df["image"], df["healthy"], train_size=0.7,
                                                        random_state=1)
    return X_train, X_test, y_train, y_test


def basic_preprocess(image):
    """
    preprocess a given image for the DCGAN256 model.
    :param image: PIL Image - the original image
    :return: ndarray - the image after preprocess
    """
    # image = np.array(image.convert('RGB'), dtype='float32')

    # image = image / 255

    return image


def load_and_preprocess(img_path, preprocess):
    """
    Load and preprocess an image from the given img_path.
    :param img_path: String - a path to an image
    :param preprocess: function - a function for image preprocessing
    :return: preprocessed image
    """
    # load the input image using the Keras helper utility while ensuring
    # the image is resized to `image_shape`
    image = load_img(img_path, target_size=(256, 256))

    image = img_to_array(image)
    # our input image is now represented as a NumPy array of shape
    # (inputShape[0], inputShape[1], 3) however we need to expand the
    # dimension by making the shape (1, inputShape[0], inputShape[1], 3)
    # so we can pass it through the network
    image = np.expand_dims(image, axis=0)
    # pre-process the image using the appropriate function based on the
    # model that has been loaded (i.e., mean subtraction, scaling, etc.)
    image = preprocess(image)

    return image


def load_images_from_path(img_folder_path, preprocess):
    """
    Loads and preprocess images from a given folder path.
    Uses the appropriate preprocessing method for the chosen model.
    returns a list of the preprocessed images and a list of their names.
    :param preprocess: function - a function for image preprocessing
    :param input_shape: tuple - the dimensions of the image that will be loaded
    :param img_folder_path: String - a path to a folder that contains images.
    :param model_name: String - a convolutional model name
    :return: images, images_names - list - lists of the preprocessed images and the images names
    """
    images = []
    images_names = []
    for img_path in os.listdir(img_folder_path):  # load all images into a list
        full_img_path = os.path.join(img_folder_path, img_path)
        img = load_and_preprocess(full_img_path, preprocess)
        # appending
        images_names.append(img_path)
        images.append(img)
    return np.vstack(images), images_names


def set_from_names(names_in_set, samples, samples_names):
    samples_in_set = []
    for sample_name in names_in_set:
        sample_index = samples_names.index(sample_name)
        sample = samples[sample_index]
        samples_in_set.append(sample)
    return samples_in_set


# Load ground truth labels and samples names
df_labels = load_real_labels()

# Split sets
X_train_names, X_test_names, y_train, y_test = split_sets(df_labels)

X_train_names = X_train_names.values
X_test_names = X_test_names.values
y_train = y_train.values
y_test = y_test.values

# Load entire datasets
src = '/content/drive/MyDrive/resarchMethods/labeled_data'
images, images_names = load_images_from_path(
    img_folder_path="/content/drive/MyDrive/resarchMethods/labeled_data",
    preprocess=basic_preprocess)

# split samples
X_train = set_from_names(X_train_names, images, images_names)
X_test = set_from_names(X_test_names, images, images_names)

X_train = np.array(X_train)
X_test = np.array(X_test)

# Pre processing and data augmentation
data_generator_train = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=90,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
)
data_generator_test = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rescale=1. / 255,
)
# compute quantities required for featurewise normalization
# std, mean
data_generator_train.fit(X_train)
data_generator_test.fit(X_train)

# models
# Loading the pre-trained models weights
pre_trained_resnet_model = ResNet50(input_shape=(256, 256, 3), include_top=False, weights="imagenet")

pre_trained_inception_model = InceptionV3(input_shape=(256, 256, 3), include_top=False, weights="imagenet")

# Adapting the model
for layer in pre_trained_inception_model.layers:
    layer.trainable = False

for layer in pre_trained_resnet_model.layers:
    layer.trainable = False

last_layer_inception_model = pre_trained_inception_model.get_layer('mixed10')
print('inception: last layer output shape:', last_layer_inception_model.output_shape)
last_output_inception_model = last_layer_inception_model.output

last_layer_resnet_model = pre_trained_resnet_model.get_layer('conv5_block3_out')
print('resnet_model: last layer output shape:', last_layer_resnet_model.output_shape)
last_output_resnet_model = last_layer_resnet_model.output


def transfer_model_init(pre_trained_model, last_output_layer, learning_rate):
    # Flatten the output layer to 1 dimension
    x = layers.Flatten()(last_output_layer)
    # Add a fully connected layer with 2,048 hidden units and ReLU activation
    x = layers.Dense(2048, activation='relu')(x)
    # Add a fully connected layer with 1,024 hidden units and ReLU activation
    x = layers.Dense(1024, activation='relu')(x)
    # Add a final sigmoid layer for classification
    x = layers.Dense(1, activation='sigmoid')(x)

    # Configure and compile the model
    model = Model(pre_trained_model.input, x)
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(learning_rate=learning_rate),
                  metrics=['accuracy'])
    return model


# transfer_inception_model = transfer_model_init(pre_trained_inception_model, last_output_inception_model,
#                                                learning_rate=0.0001)

# transfer_resnet_model = transfer_model_init(pre_trained_resnet_model, last_output_resnet_model, learning_rate=0.0001)

# print(f"len(x_test_transformed): {len(x_test_transformed)}")
# resnet_test_y_hat = transfer_resnet_model.predict(x_test_transformed)
x_test_transformed = data_generator_test.standardize(X_test)


# fits the model on batches with real-time data augmentation:


def train_model(epochs, batch_size, learning_rate, pre_trained_model, last_output,
                iterations):  # inception_training_history
    best_model = None
    second_best_model = None
    best_model_history = None
    best_test_accuracy = 0
    for i in range(iterations):
        model = transfer_model_init(pre_trained_model, last_output,
                                    learning_rate=learning_rate)
        history = model.fit(
            data_generator_train.flow(X_train, y_train, batch_size=batch_size),
            steps_per_epoch=len(X_train) / batch_size, epochs=epochs,
            validation_data=(
                x_test_transformed, y_test))  # data_generator_test.flow(X_test, y_test, batch_size=batch_size))

        test_y_hat = model.predict(x_test_transformed)
        test_y_hat = [0 if x < 0.5 else 1 for x in test_y_hat]

        if evaluate_accuracy(y_test, test_y_hat) > best_test_accuracy:
            best_test_accuracy = evaluate_accuracy(y_test, test_y_hat)
            second_best_model = best_model
            best_model = model
            best_model_history = history
    return best_model, best_model_history, best_test_accuracy, second_best_model


def save_model(model, path, num):
    # serialize model to JSON
    model_json = model.to_json()
    with open(path + "\\model_{}.json".format(num), "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights(path + "\\model_weights_{}.h5".format(num))


def load_model(path, num):
    # load json and create model
    json_file = open(path + '\\model_{}.json'.format(num), 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    # load weights into new model
    model.load_weights(path + "\\model_weights_{}.h5".format(num))
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(),
                  metrics=['accuracy'])
    return model


# transfer_inception_model, inception_training_history, inception_best_test_accuracy, second_best_transfer_inception_model = train_model(
#     epochs=10, batch_size=32, learning_rate=0.0001, pre_trained_model=pre_trained_inception_model,
#     last_output=last_output_inception_model, iterations=5)
# print(f"inception_best_test_accuracy: {inception_best_test_accuracy}\n")
# save_model(model=transfer_inception_model,
#            path="/content/drive/MyDrive/resarchMethods/trainedModels/transfer_inception",
#            num=10)
# save_model(model=second_best_transfer_inception_model,
#            path="/content/drive/MyDrive/resarchMethods/trainedModels/transfer_inception",
#            num=11)

transfer_resnet_model, resnet_training_history, resnet_best_test_accuracy, second_best_transfer_resnet_model = train_model(
    epochs=5, batch_size=16, learning_rate=0.0001, pre_trained_model=pre_trained_resnet_model,
    last_output=last_output_resnet_model, iterations=2)
print(f"resnet_best_test_accuracy: {resnet_best_test_accuracy}\n")
save_model(model=transfer_resnet_model,
           path="/content/drive/MyDrive/resarchMethods/trainedModels/transfer_resnet",
           num=100)

# save_model(model=second_best_transfer_resnet_model,
#            path="/content/drive/MyDrive/resarchMethods/trainedModels/transfer_resnet",
#            num=11)

# RESULTS
plot_history(inception_training_history, ["InceptionV3 Loss", "InceptionV3 Accuracy"])
plot_history(resnet_training_history, ["ResNet50 Loss", "ResNet50 Accuracy"])

resnet_test_y_hat = transfer_resnet_model.predict(x_test_transformed)
resnet_test_y_hat = [0 if x < 0.5 else 1 for x in resnet_test_y_hat]
print("ResNet50 results:")
evaluate_metrics(y_test, resnet_test_y_hat)

inception_test_y_hat = transfer_inception_model.predict(x_test_transformed)
inception_test_y_hat = [0 if x < 0.5 else 1 for x in inception_test_y_hat]
print("InceptionV3 results:")
evaluate_metrics(y_test, inception_test_y_hat)

# USER STUDY
X_user_test_names = ["20200916_171401_color_0_269_7_550_323_zssr_X4.00X4.00.jpg",
                     "20200916_171639_color_0_365_31_699_297_zssr_X4.00X4.00.jpg",
                     "20200903_183547_color_0_638_256_828_403_zssr_X4.00X4.00.jpg",
                     "20200903_184103_color_0_289_250_473_422_zssr_X4.00X4.00.jpg",
                     "20200909_181531_color_0_502_31_703_285_zssr_X4.00X4.00.jpg",
                     "20200916_171959_color_0_200_58_583_307_zssr_X4.00X4.00.jpg",
                     "20200909_182356_color_0_466_32_649_199_zssr_X4.00X4.00.jpg",
                     "20200916_173050_color_0_332_17_670_342_zssr_X4.00X4.00.jpg",
                     "20200916_175634_color_0_311_39_669_328_zssr_X4.00X4.00.jpg",
                     "20200909_183002_color_0_471_295_640_463_zssr_X4.00X4.00.jpg",
                     "20200909_183524_color_0_128_124_384_385_zssr_X4.00X4.00.jpg",
                     "20200909_183925_color_0_576_38_740_228_zssr_X4.00X4.00.jpg",
                     "20200916_182123_color_0_335_102_748_376_zssr_X4.00X4.00.jpg",
                     "20200924_171334_color_0_417_177_653_390_zssr_X4.00X4.00.jpg",
                     "20200924_171405_color_0_345_149_537_380_zssr_X4.00X4.00.jpg",
                     "20200909_185532_color_0_538_127_836_431_zssr_X4.00X4.00.jpg",
                     "20200909_185756_color_0_230_13_418_156_zssr_X4.00X4.00.jpg",
                     "20200924_171412_color_0_232_160_531_381_zssr_X4.00X4.00.jpg",
                     "20200916_174842_color_0_363_194_734_441_zssr_X4.00X4.00.jpg",
                     "20200924_175935_color_0_340_101_626_412_zssr_X4.00X4.00.jpg",
                     "20200924_180124_color_0_345_170_702_407_zssr_X4.00X4.00.jpg",
                     "20200916_174948_color_0_297_96_735_339_zssr_X4.00X4.00.jpg",
                     "20200924_171726_color_0_330_266_594_469_zssr_X4.00X4.00.jpg",
                     "20200924_181100_color_0_298_163_659_400_zssr_X4.00X4.00.jpg",
                     "20200924_175552_color_0_522_19_760_232_zssr_X4.00X4.00.jpg",
                     "20200924_183303_color_0_353_98_691_430_zssr_X4.00X4.00.jpg",
                     "20200903_181929_color_0_153_182_371_455_zssr_X4.00X4.00.jpg",
                     "20200916_172836_color_0_343_120_640_417_zssr_X4.00X4.00.jpg",
                     "20200903_181824_color_0_587_226_818_398_zssr_X4.00X4.00.jpg",
                     "20200916_173040_color_0_355_169_660_415_zssr_X4.00X4.00.jpg",
                     "20200903_184822_color_0_112_229_328_411_zssr_X4.00X4.00.jpg",
                     "20200924_171743_color_0_323_90_680_468_zssr_X4.00X4.00.jpg",
                     "20200924_173933_color_0_343_211_662_466_zssr_X4.00X4.00.jpg",
                     "20200909_182350_color_0_484_18_674_215_zssr_X4.00X4.00.jpg",
                     "20200924_174418_color_0_377_117_650_361_zssr_X4.00X4.00.jpg",
                     "20200924_175450_color_0_387_247_665_467_zssr_X4.00X4.00.jpg",
                     "20200909_183653_color_0_605_289_761_469_zssr_X4.00X4.00.jpg",
                     "20200909_184634_color_0_496_85_635_215_zssr_X4.00X4.00.jpg",
                     "20200909_184742_color_0_455_151_624_283_zssr_X4.00X4.00.jpg",
                     "20200924_175935_color_0_340_101_626_412_zssr_X4.00X4.00.jpg",
                     "20200924_172008_color_0_321_105_739_466_zssr_X4.00X4.00.jpg",
                     "20200903_185411_color_0_60_238_146_323_zssr_X4.00X4.00.jpg",
                     "20200909_180753_color_0_428_280_565_397_zssr_X4.00X4.00.jpg",
                     "20200924_173000_color_0_347_200_609_452_zssr_X4.00X4.00.jpg",
                     "20200909_181849_color_0_618_185_832_418_zssr_X4.00X4.00.jpg",
                     "20200909_182055_color_0_307_199_496_443_zssr_X4.00X4.00.jpg",
                     "20200924_173805_color_0_196_62_495_274_zssr_X4.00X4.00.jpg",
                     "20200903_185154_color_0_677_266_833_389_zssr_X4.00X4.00.jpg"]

y_user_test = [0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
               0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1]

X_user_test = []

for sample_name in X_user_test_names:
    sample_index = images_names.index(sample_name)
    sample = images[sample_index]
    X_user_test.append(sample)
X_user_test = np.array(X_user_test)
X_user_test = data_generator_test.standardize(X_user_test)

y_hat_inception_user_study = transfer_inception_model.predict(X_user_test)
y_hat_inception_user_study = [0 if x < 0.5 else 1 for x in y_hat_inception_user_study]

y_hat_users_user_study = [0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0,
                          0, 1, 1, 0,
                          1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1]

# USER STUDY EVALUATION
print("InceptionV3 user study results:")
evaluate_metrics(y_user_test, y_hat_inception_user_study)

print("Users user study results:")
evaluate_metrics(y_user_test, y_hat_users_user_study)

### Statistical tests

# Model to model


# r = set_from_names(["sn1", "sn3"], ["s1", "s2", "s3"], ["sn1","sn2","sn3"])

# dst_0 = 'D:\Documents\Studies\Documents for higher education\Courses\Year 4 Semester 1\שיטות מחקר\עבודות להגשה\\user study\\0'
# dst_1 = 'D:\Documents\Studies\Documents for higher education\Courses\Year 4 Semester 1\שיטות מחקר\עבודות להגשה\\user study\\1'

# limit_0 = 27
# limit_1 = 33
# count_0 = 0
# count_1 = 0
# for idx, image_name in enumerate(X_test.values):
#     full_file_path = os.path.join(src, image_name)
#
#     if y_test.values[idx] == 0 and count_0 < limit_0:
#         dst_file_path = os.path.join(dst_0, image_name)
#         copyfile(full_file_path, dst_file_path)
#         count_0 += 1
#     elif y_test.values[idx] == 1 and count_1 < limit_1:
#         dst_file_path = os.path.join(dst_1, image_name)
#         copyfile(full_file_path, dst_file_path)
#         count_1 += 1
#
#     if count_0 + count_1 == 60:
#         break


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
inception: last layer output shape: (None, 6, 6, 2048)
resnet_model: last layer output shape: (None, 8, 8, 2048)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
resnet_best_test_accuracy: 0.7888446215139442



NameError: ignored

### Statistical tests

In [None]:
from statsmodels.stats.contingency_tables import mcnemar
import numpy as np

labels = [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0]
pred_a = [0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
pred_b = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

correct_a = np.equal(labels, pred_a)
correct_b = np.equal(labels, pred_b)

count_y_y, count_n_y, count_y_n, count_n_n = 0, 0, 0, 0
for i in range(len(correct_a)):
    if correct_a[i] == correct_b[i] and correct_a[i] == True:
        count_y_y += 1
    elif correct_a[i] == correct_b[i] and correct_a[i] == False:
        count_n_n += 1
    elif correct_a[i] != correct_b[i] and correct_a[i] == True:
        count_y_n += 1
    elif correct_a[i] != correct_b[i] and correct_a[i] == False:
        count_n_y += 1

con_table = [[count_y_y, count_y_n], [count_n_y, count_n_n]]
# con_table = [[100,150], [100,20]]
result = mcnemar(con_table, exact=True)
print('statistic=%.3f, p-value=%.3f' % (result.statistic, result.pvalue))
# interpret the p-value
alpha = 0.05
if result.pvalue > alpha:
    print('Same proportions of errors (fail to reject H0)')
else:
    print('Different proportions of errors (reject H0)')

from scipy.stats import wilcoxon

d = correct_b.astype(int) - correct_a.astype(int)
w, p = wilcoxon(d, alternative='greater')
print(f"w: {w}, p-value: {p}")
