In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

IMG_WIDTH, IMG_HEIGHT = 224, 224

import tensorflow_hub as hub
from tensorflow.keras.applications.densenet import (
    DenseNet121,
    preprocess_input,
)
import pandas as pd
import numpy as np
import os
import IPython.display as display
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from tensorflow.keras.layers import (
    Dense,
    GlobalAveragePooling2D,
    Conv2D,
    Flatten,
    GlobalMaxPooling2D,
    Dropout,
)

from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, Nadam
from datetime import datetime
from packaging import version
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import (
    TensorBoard,
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau,
)
import efficientnet.tfkeras as enet


gpus = tf.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices("GPU")
        print(
            len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs",
        )
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)


def append_extension(fn):
    return (fn + ".jpg").zfill(7)


def ordered_logit(class_number):
    # zero portability
    target = np.zeros(4, dtype=int)
    target[: class_number - 2] = 1
    return target


DATADIR = r"./adult"
CSV_PATH = r"./adult/CastControls_ALP.xlsx"
response = pd.read_excel(CSV_PATH, sheet_name=0,)[["GreenID", "Grade"]].dropna(
    axis=0, subset=["Grade"]
)
response.Grade = response.Grade.astype("int")
response.GreenID = response.GreenID.astype("str").apply(append_extension)
response = response[response.Grade != 99]
response = pd.concat(
    [response, pd.DataFrame.from_dict(dict(response.Grade.apply(ordered_logit))).T,],
    axis=1,
)


# shuffle dataset
response = response.sample(frac=1)
seed = np.random.randint(30027)


def soft_acc(y_true, y_pred):
    return K.mean(K.equal(K.round(y_true), K.round(y_pred)))


def soft_acc_multi_output(y_true, y_pred):
    return K.mean(
        K.all(
            K.equal(
                K.cast(K.round(y_true), "int32"), K.cast(K.round(y_pred), "int32"),
            ),
            axis=1,
        )
    )

from tensorflow.keras import mixed_precision

policy = tf.keras.mixed_precision.experimental.Policy("mixed_float16")
mixed_precision.experimental.set_policy(policy)

1 Physical GPUs, 1 Logical GPUs


In [8]:
def generate_train_val_test(train_index, val_index, test_index):
    train_dataset = response.iloc[train_index]
    val_dataset = response.iloc[val_index]
    test_dataset = response.iloc[test_index]
    train_gen = ImageDataGenerator(
        rotation_range=5,
        fill_mode="reflect",
        horizontal_flip=True,
        rescale=1.0 / 255.0,
        zoom_range=0.1,
    )
    valid_test_gen = ImageDataGenerator(rescale=1.0 / 255.0,)

    train_set = train_gen.flow_from_dataframe(
        dataframe=train_dataset,
        directory=DATADIR,
        x_col="GreenID",
        target_size=(224, 224),
        color_mode="rgb",
        subset="training",
        shuffle=True,
        y_col=[0, 1, 2, 3,],
        class_mode="raw",
    )

    validation_set = valid_test_gen.flow_from_dataframe(
        dataframe=val_dataset,
        directory=DATADIR,
        x_col="GreenID",
        target_size=(224, 224),
        color_mode="rgb",
        subset="training",
        shuffle=False,
        batch_size=64,
        y_col=[0, 1, 2, 3,],
        class_mode="raw",
    )

    test_set = valid_test_gen.flow_from_dataframe(
        dataframe=test_dataset,
        directory=DATADIR,
        x_col="GreenID",
        target_size=(224, 224),
        color_mode="rgb",
        subset="training",
        shuffle=False,
        batch_size=64,
        y_col=[0, 1, 2, 3,],
        class_mode="raw",
    )
    return train_set, validation_set, test_set


from sklearn.model_selection import StratifiedKFold

kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
innerkf = StratifiedKFold(n_splits=2, shuffle=True, random_state=seed)
response = response.sample(frac=1.0)
early_stopping = EarlyStopping(
    monitor="val_loss", patience=21, restore_best_weights=True,
)
reduce_lr_plateau = ReduceLROnPlateau(monitor="val_loss", patience=7, factor=0.8)


def generate_base_model(fine_tune=None):
    i = tf.keras.layers.Input([224, 224, 3], dtype = tf.uint8)
    x = tf.cast(i, tf.float32)
    x = tf.keras.applications.vgg16.preprocess_input(x)
    conv_base = tf.keras.applications.VGG16(
    include_top=False, weights='imagenet', input_tensor=None, input_shape=None,
    pooling="avg", classes=5,)
    conv_base.trainable = True
    x = conv_base(x)
    x = Dropout(0.5)(x)
    preds = Dense(4, activation="sigmoid")(x)
    model = Model(inputs=i, outputs=preds)
                                                     
    model.trainable = True
#     for layer in model.layers[:fine_tune]:
#         layer.trainable = False
#     for layer in model.layers[fine_tune:]:
#         layer.trainable = True
                                                     
                                                     
    model.compile(
        optimizer=keras.optimizers.Nadam(),
        loss="binary_crossentropy",
        metrics=[soft_acc_multi_output],
    )

    return model


In [9]:
import gc
def stratified_cv(fine_tune_layer=None):
    acc_coef_scores = []
    raw_outputs = []
    for train_index, val_test_index in kf.split(
        np.zeros(len(response)), response["Grade"]
    ):
        val_index, test_index = next(
            innerkf.split(
                np.zeros(len(val_test_index)), response["Grade"].iloc[val_test_index]
            )
        )
        val_index, test_index = val_test_index[val_index], val_test_index[test_index]
        train_set, validation_set, test_set = generate_train_val_test(
            train_index, val_index, test_index
        )
        model = generate_base_model()

        _ = model.fit(
            x=train_set,
            epochs=100,
            validation_data=validation_set,
            callbacks=[early_stopping, reduce_lr_plateau],
#             verbose=0,
        )

        batch = next(test_set)
        true_labels = batch[1]
        predictions = model.predict(batch[0])
        acc = soft_acc_multi_output(predictions, true_labels).numpy()
        corr = np.corrcoef(np.sum(predictions, axis=1), np.sum(true_labels, axis=1))[0][
            1
        ]
        acc_coef_scores.append([acc, corr])
        raw_outputs.append([np.array(response.iloc[test_index].index), true_labels, predictions])
        del train_set, validation_set, test_set, _, model, batch, true_labels, predictions, acc, corr
        tf.keras.backend.clear_session()
        gc.collect()
    return acc_coef_scores, raw_outputs

In [10]:
acc_coef_scores, raw_outputs = stratified_cv()

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 451 validated image filenames.
Found 55 validated image filenames.
Found 57 validated image filenames.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 15 steps, validate for 1 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100


  c /= stddev[:, None]
  c /= stddev[None, :]
  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 450 validated image filenames.
Found 57 validated image filenames.
Found 56 validated image filenames.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 15 steps, validate for 1 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


  c /= stddev[:, None]
  c /= stddev[None, :]
  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 450 validated image filenames.
Found 56 validated image filenames.
Found 57 validated image filenames.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 15 steps, validate for 1 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100


  c /= stddev[:, None]
  c /= stddev[None, :]
  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 450 validated image filenames.
Found 56 validated image filenames.
Found 57 validated image filenames.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 15 steps, validate for 1 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100


  c /= stddev[:, None]
  c /= stddev[None, :]
  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 451 validated image filenames.
Found 55 validated image filenames.
Found 57 validated image filenames.
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 15 steps, validate for 1 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100


  c /= stddev[:, None]
  c /= stddev[None, :]


NameError: name 'fine_tune_scores_acc_coef' is not defined

In [None]:
# maybe also include an untuned version for comparison?
# np.array([list(response.iloc[np.stack(np.array(raw_outputs)[0,:,0])[i]].index) for i in range(5)])

In [12]:
acc_coef = []
raw_outputs = []
acc_coef.append(acc_coef_scores)
raw_outputs.append(raw_outputs)

In [15]:
import numpy as np

print(len(acc_coef))


from sklearn.metrics import confusion_matrix


def show_confusion_matrix(raw_outputs):

    y_true = np.sum(raw_outputs[:, 1], axis=1)
    y_pred = np.sum(
        np.rint(raw_outputs[:, 2]), axis=1
    ).astype(int)
    return confusion_matrix(y_true, y_pred)

def show_matrix_percentage(confusion_matrix):
    return np.transpose(np.transpose(my_confusion_matrix) / np.sum(my_confusion_matrix, axis=1))

# total accuracy
def calculate_accuracy(my_confusion_matrix):
    return np.trace(my_confusion_matrix)/np.sum(my_confusion_matrix)

# max_acc_layer = np.argmax([calculate_accuracy(show_confusion_matrix(raw_outputs, i))  for i in range(len(acc_coef))])

my_confusion_matrix = show_confusion_matrix(raw_outputs)
print(my_confusion_matrix)
print("+++++++++++++++++++++++++++++++++")
print("+++++++++++++++++++++++++++++++++")
print(show_matrix_percentage(my_confusion_matrix))
print("+++++++++++++++++++++++++++++++++")
print("+++++++++++++++++++++++++++++++++")
print(calculate_accuracy(my_confusion_matrix)*100)

import matplotlib.pyplot as plt
plt.plot([i for i in range(len(acc_coef))],[calculate_accuracy(show_confusion_matrix(raw_outputs, i))  for i in range(len(acc_coef))])
print("+++++++++++++++++++++++++++++++++")
print("+++++++++++++++++++++++++++++++++")

trainable_sequence = np.array([227, 225, 217, 214, 210, 202, 199, 195, 187, 184, 180, 172, 169,
       167, 159, 156, 152, 144, 141, 137, 129, 126, 124, 116, 113, 109,
       101,  98,  94,  86,  83,  81,  73,  70,  66,  58,  55,  53,  45,
        42,  38,  30,  27,  25,  17,  14,  12,   4,   1])
# print(f"max accuracy with tuning from {trainable_sequence[max_acc_layer]} layers, or tune {233-trainable_sequence[max_acc_layer]} layers")
print([show_matrix_percentage(my_confusion_matrix)[i,i]*100 for i in range(5)])

1


TypeError: list indices must be integers or slices, not tuple

In [25]:
raw_outputs[0][0][0]

[[...]]

In [None]:
np.array(cvscores)[:, 1][:, 0][4].shape

In [None]:
np.array(cvscores)[:, 0]

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(
    y_true=K.sum(K.cast(K.round(cvscores[1][1][0]), "int32"), axis=1).numpy(),
    y_pred=K.sum(K.cast(K.round(cvscores[1][1][1]), "int32"), axis=1).numpy(),
)

In [None]:
mycsv = pd.DataFrame(
    np.hstack(
        np.array(
            [
                np.vstack(np.array(cvscores)[:, 1][:, 0]),
                np.vstack(np.array(cvscores)[:, 1][:, 1]),
            ]
        )
    )
)

In [None]:
np.allclose(mycsv[range(4, 8)].to_numpy(), np.vstack(np.array(cvscores)[:, 1][:, 1]))

In [None]:
mycsv.to_csv(
    "./stratified_cross_validation_results/effnet_multinomial.csv", index=False
)
# next time include which image?

In [None]:
mycsv = pd.read_csv("./stratified_cross_validation_results/effnet_multinomial.csv")
y_true = np.sum((mycsv[[str(i) for i in range(0, 4)]]).to_numpy(dtype=int), axis=1)
y_pred = np.sum(
    np.rint((mycsv[[str(i) for i in range(4, 8)]]).to_numpy()), axis=1
).astype(int)

In [None]:
from sklearn.metrics import confusion_matrix
my_confusion_matrix = confusion_matrix(y_true, y_pred,)
my_confusion_matrix

In [None]:
np.transpose(np.transpose(my_confusion_matrix) / np.sum(my_confusion_matrix, axis=1))

In [None]:
# coef
np.corrcoef(y_true, np.sum((mycsv[[str(i) for i in range(4, 8)]]).to_numpy(), axis=1))

In [None]:
# acc
sum(np.isclose(y_true, y_pred)) / len(y_pred)

In [None]:
# from tensorflow.keras.utils import plot_model

# plot_model(model, to_file="effnet.png", show_shapes=True)
# from IPython.display import Image

# Image(filename="effnet.png")

In [None]:
response = response.sample(frac=1.0)

test_set = valid_gen.flow_from_dataframe(
    dataframe=response,
    directory=DATADIR,
    x_col="GreenID",
    target_size=(224, 224),
    color_mode="rgb",
    subset="validation",
    shuffle=False,
    batch_size=56,
    y_col=[0, 1, 2, 3,],
    class_mode="raw",
    #     seed = seed
)

batch = next(test_set)
true_labels = batch[1]
predictions = model.predict(batch[0])

print(model.metrics_names)
print(model.evaluate(test_set, verbose=0))  # loss/accuracy

In [None]:
np.corrcoef(np.sum(predictions, axis=1), np.sum(true_labels, axis=1))

In [None]:
fig, ax = plt.subplots(dpi=80)
batch = next(test_set)

y_true = batch[1]
y_pred = model.predict(batch[0])
print(soft_acc_multi_output(y_true, y_pred))

# print examples from the validation set
for i in range(len(batch[1])):
    img = batch[0][i]
    label = batch[1][i]
    assert (label == y_true[i]).all()
    right = K.all(
        K.equal(K.cast(K.round(label), "int32"), K.cast(K.round(y_pred[i]), "int32"),)
    )
    plt.imshow(img)
    plt.show()
    print(f"true label: {label}; rounded pred: {y_pred[i]}; Correct: {right}")

In [None]:
list_ds = tf.data.Dataset.list_files(
    str("C:/Users/feroc/OneDrive - The University of Melbourne/Dataset/adult/*")
)


def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.image.convert_image_dtype(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])


def get_label(file_path):
    # convert the path to a list of path components
    image_id = tf.strings.split(file_path, os.path.sep)[-1]
    return response.loc[] 

list(list_ds.take(1).as_numpy_iterator())[0]
tf.strings.split(list(list_ds.take(1).as_numpy_iterator())[0],os.path.sep)[-1].numpy()