In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import datetime
import math
import numpy
import os
import pandas
import pathlib
import skimage.io
import skimage.transform
import sklearn.model_selection
import tensorflow
import time


from files import create_folder, save_fit_history, save_lossgraph, save_figs
from metrics import dice_coef, jaccard_distance
from model import evaluate, unet_model, get_loss_function

In [7]:
class CreateSequence(tensorflow.keras.utils.Sequence):
    def __init__(self, batch_size, img_size, input_img_paths, target_img_paths):
        self.batch_size = batch_size
        self.img_size = img_size
        self.input_img_paths = input_img_paths
        self.target_img_paths = target_img_paths

    def __len__(self):
        return len(self.target_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_target_img_paths = self.target_img_paths[i : i + self.batch_size]

        x = numpy.zeros((self.batch_size,) + self.img_size + (3,), dtype="float32")
        for j, path in enumerate(batch_input_img_paths):
            img = skimage.io.imread(path.resolve())
            img = numpy.float32(img/255)
            x[j] = img

        y = numpy.zeros((self.batch_size,) + self.img_size + (1,), dtype="float32")
        for j, path in enumerate(batch_target_img_paths):
            mask = skimage.io.imread(path.resolve())
            mask = numpy.expand_dims(mask, 2)
            mask = numpy.float32(mask/255)
            y[j] = mask

        return x, y

# GPU

In [8]:
gpus = tensorflow.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            print(f"GPU: {gpu.name}")
            tensorflow.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

GPU: /physical_device:GPU:0


In [9]:
cfg = {
    "channel": 3,
    "batch_size": 4,
    "fold": 5,
    "epochs": 75,
    "image_size": 256,
    "learning_rate": 0.001,
    "random_state": 1234,
    "test_size": 0.2,
    "val_size": 0.05,
    "path_dataset": "dataset",
    "path_out": "out",
    "loss_function": "dice",
    "data_augmentation": False
}
images_folder = os.path.join(cfg["path_dataset"], "IMAGEM_ORIGINAL", "CONVERTIDAS", "RGB", "256", "OUT")
masks_folder = os.path.join(cfg["path_dataset"], "MASK", "BITMAP", "256", "OUT")

In [10]:
x = numpy.array(list([file for file in sorted(pathlib.Path(images_folder).rglob("*")) if file.is_file()]))
y = numpy.array(list([file for file in sorted(pathlib.Path(masks_folder).rglob("*")) if file.is_file()]))

kf = sklearn.model_selection.KFold(n_splits=cfg["fold"], shuffle=True, random_state=cfg["random_state"])
# data = CreateSequence(cfg["batch_size"], (cfg["image_size"], cfg["image_size"]), x, y)

In [11]:
models = []
list_evaluate = list([])
current_datetime = datetime.datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
path = os.path.join(cfg["path_out"], current_datetime)
create_folder(list([path]))
for fold, (train_index, test_index) in enumerate(kf.split(x)):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    x_train, x_val, y_train, y_val = sklearn.model_selection.train_test_split(x_train, y_train, test_size=cfg["val_size"], random_state=cfg["random_state"])

    train_data = CreateSequence(cfg["batch_size"], (cfg["image_size"], cfg["image_size"]), x_train, y_train)
    val_data = CreateSequence(cfg["batch_size"], (cfg["image_size"], cfg["image_size"]), x_val, y_val)
    test_data = CreateSequence(cfg["batch_size"], (cfg["image_size"], cfg["image_size"]), x_test, y_test)

    print(x_train.shape)
    print(x_val.shape)
    print(x_test.shape)
    print(x.shape)

    path_fold = os.path.join(path, str(fold))
    create_folder(list([path_fold]))

    steps_per_epoch = math.ceil(x_train.shape[0] / cfg["batch_size"])
    reduce_learning_rate = tensorflow.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)
    filename_model = os.path.join(path_fold, "unet.h5")
    checkpointer = tensorflow.keras.callbacks.ModelCheckpoint(filename_model, verbose=1, save_best_only=True)
    strategy = tensorflow.distribute.MirroredStrategy()

    with strategy.scope():
        model = unet_model(cfg)
        adam_opt = tensorflow.keras.optimizers.Adam(learning_rate=cfg["learning_rate"])
        model.compile(optimizer=adam_opt, loss=get_loss_function(cfg["loss_function"]), metrics=[dice_coef, jaccard_distance, tensorflow.keras.metrics.Precision(), tensorflow.keras.metrics.Recall()])

    tensorflow.keras.backend.clear_session()
    start_time = time.time()
    fit = model.fit(train_data, steps_per_epoch=steps_per_epoch, epochs=cfg["epochs"], validation_data=val_data, callbacks=[checkpointer, reduce_learning_rate])
    end_time = time.time() - start_time

    save_fit_history(fold, fit, path_fold)
    save_lossgraph(fold, fit, path_fold)
    # evaluate(train_data)
    # list_evaluate.append(evaluate(end_time, fold, model, x_train, x_val, x_test, y_train, y_val, y_test))

    # models.append(model)

    # model = tensorflow.keras.models.load_model("unet_rgb.h5", custom_objects = {"dice_loss": dice_loss, "dice_coef": dice_coef, "jaccard_distance": jaccard_distance })

    # save_figs(cfg, list_images_names, test_index, model, path_fold, x)
    break
tensorflow.keras.backend.clear_session()

(285,)
(15,)
(75,)
(375,)
INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:l

2022-09-11 10:43:33.194367: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-11 10:43:33.194880: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-11 10:43:33.195031: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-11 10:43:33.195135: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Epoch 1/75


2022-09-11 10:43:34.141916: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_2"
op: "FlatMapDataset"
input: "TensorDataset/_1"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_flat_map_fn_2318"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\020FlatMapDataset:1"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
      }
      shape {
        dim {
          size: -1
        }
        dim {
          size: -1




2022-09-11 10:43:45.232987: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_2"
op: "FlatMapDataset"
input: "TensorDataset/_1"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_flat_map_fn_10706"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\021FlatMapDataset:23"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
      }
      shape {
        dim {
          size: -1
        }
        dim {
          size: -


Epoch 1: val_loss improved from inf to 0.64121, saving model to out/11-09-2022-10-43-33/0/unet.h5
out/11-09-2022-10-43-33/0/fold0-fit.pckl created
out/11-09-2022-10-43-33/0/fold0-lossgraph.png created


In [None]:
columns = ["batch_size", "epochs", "learning_rate", "loss_function", "images", "masks", "len_images", "len_masks", "channel", "image_size", "fold", "test_size", "val_size", "random_state", "path_dataset", "path_out", "data_augmentation"]
data = [cfg["batch_size"], cfg["epochs"], cfg["learning_rate"], cfg["loss_function"], images_folder, masks_folder, len(x), len(y), cfg["channel"], cfg["image_size"], cfg["fold"], cfg["test_size"], cfg["val_size"], cfg["random_state"], cfg["path_dataset"], cfg["path_out"], cfg["data_augmentation"]]

dataframe_cfg = pandas.DataFrame(data, columns)
dataframe_cfg.to_csv(os.path.join(path, "cfg.csv"), sep=";", na_rep=" ")
dataframe_cfg

In [None]:
def get_mean(key, list_evaluate):
    return str(numpy.mean(list([evaluate[key] for evaluate in list_evaluate])))

def get_std(key, list_evaluate):
    return str(numpy.std(list([evaluate[key] for evaluate in list_evaluate])))

dataframe_mean = pandas.DataFrame({"mean_train": [get_mean("loss_train", list_evaluate),
                                                  get_mean("dice_train", list_evaluate),
                                                  get_mean("jaccard_train", list_evaluate),
                                                  get_mean("precision_train", list_evaluate),
                                                  get_mean("recall_train", list_evaluate)],
                                   "std_train": [get_std("loss_train", list_evaluate),
                                                  get_std("dice_train", list_evaluate),
                                                  get_std("jaccard_train", list_evaluate),
                                                  get_std("precision_train", list_evaluate),
                                                  get_std("recall_train", list_evaluate)],
                                   "mean_val": [get_mean("loss_val", list_evaluate),
                                                  get_mean("dice_val", list_evaluate),
                                                  get_mean("jaccard_val", list_evaluate),
                                                  get_mean("precision_val", list_evaluate),
                                                  get_mean("recall_val", list_evaluate)],
                                   "std_val": [get_std("loss_val", list_evaluate),
                                                  get_std("dice_val", list_evaluate),
                                                  get_std("jaccard_val", list_evaluate),
                                                  get_std("precision_val", list_evaluate),
                                                  get_std("recall_val", list_evaluate)],
                                   "mean_test": [get_mean("loss_test", list_evaluate),
                                                  get_mean("dice_test", list_evaluate),
                                                  get_mean("jaccard_test", list_evaluate),
                                                  get_mean("precision_test", list_evaluate),
                                                  get_mean("recall_test", list_evaluate)],
                                   "std_test": [get_std("loss_test", list_evaluate),
                                                  get_std("dice_test", list_evaluate),
                                                  get_std("jaccard_test", list_evaluate),
                                                  get_std("precision_test", list_evaluate),
                                                  get_std("recall_test", list_evaluate)],
                                   }, index=["loss", "dice", "jaccard", "precision", "recall"])
dataframe_mean.to_csv(os.path.join(path, "mean.csv"), sep=";", na_rep=" ")
dataframe_mean

In [None]:
for evaluate in list_evaluate:
    filename_fold = os.path.join(path, str(evaluate["fold"]), "metrics.csv")

    dataframe_fold = pandas.DataFrame({"metrics_train": [evaluate["loss_train"],
                                                  evaluate["dice_train"],
                                                  evaluate["jaccard_train"],
                                                  evaluate["precision_train"],
                                                  evaluate["recall_train"]],
                                   "metrics_val": [evaluate["loss_val"],
                                                  evaluate["dice_val"],
                                                  evaluate["jaccard_val"],
                                                  evaluate["precision_val"],
                                                  evaluate["recall_val"]],
                                   "metrics_test": [evaluate["loss_test"],
                                                  evaluate["dice_test"],
                                                  evaluate["jaccard_test"],
                                                  evaluate["precision_test"],
                                                  evaluate["recall_test"]],
                                   }, index=["loss", "dice", "jaccard", "precision", "recall"])

    dataframe_fold_info = pandas.DataFrame({evaluate["fold"], evaluate["time"]}, index=["fold", "time"], columns=["info"])

    dataframe_fold = pandas.concat([dataframe_fold, dataframe_fold_info], axis=0)

    dataframe_fold.to_csv(filename_fold, sep=";", na_rep=" ")

dataframe_fold