In [1]:
import csv
import cv2
import datetime
import ipynbname
import math
import numpy
import os
import pandas
import pathlib
import skimage.io
import skimage.transform
import sklearn.model_selection
import tensorflow
import time

from albumentations import (
    Compose, HorizontalFlip, ShiftScaleRotate, ElasticTransform,
    RandomBrightness, RandomContrast, RandomGamma
)

from metrics import dice_coef, jaccard_distance
from model import unet_model, get_loss_function
from AugmentationSequence import AugmentationSequence

2022-09-12 20:43:24.340538: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-12 20:43:24.505706: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2022-09-12 20:43:25.109186: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/xandao/miniconda3/lib/python3.8/site-packages/cv2/../../lib64:
2022-09-12 20:43:25.109246: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.s

# GPU

In [2]:
gpus = tensorflow.config.experimental.list_physical_devices("GPU")
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            print(f"GPU: {gpu.name}")
            tensorflow.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

GPU: /physical_device:GPU:0


2022-09-12 20:43:25.894225: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-12 20:43:25.949768: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-12 20:43:25.949955: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero


In [3]:
cfg = {
    "channel": 3,
    "batch_size": 4,
    "fold": 5,
    "epochs": 75,
    "image_size": 512,
    "learning_rate": 0.001,
    "random_state": 1234,
    "test_size": 0.2,
    "val_size": 0.05,
    "path_dataset": "dataset",
    "path_out": "out",
    "loss_function": "dice",
    "data_augmentation": True
}
images_folder = os.path.join(cfg["path_dataset"], "IMAGEM_ORIGINAL", "CONVERTIDAS", "RGB", "512", "OUT")
masks_folder = os.path.join(cfg["path_dataset"], "MASK", "BITMAP", "512", "OUT")

In [4]:
list_labels = list([])
list_images = list([])
list_images_names = list([])
def load_files():
    for file in sorted(pathlib.Path(masks_folder).rglob("*")):
        mask = skimage.io.imread(file.resolve())
        mask = numpy.float32(mask/255)
        list_labels.append(mask)

        image = skimage.io.imread(os.path.join(images_folder, f"{file.stem}.png"))
        image = numpy.float32(image/255)
        list_images.append(image)

        list_images_names.append(str(file.stem))

load_files()
print(len(list_labels), len(list_images), len(list_images_names))

375 375 375


In [5]:
x = numpy.array(list_images).reshape((len(list_images), cfg["image_size"], cfg["image_size"], cfg["channel"]))
y = numpy.array(list_labels).reshape((len(list_labels), cfg["image_size"], cfg["image_size"], 1))

x_train, x_val, y_train, y_val = sklearn.model_selection.train_test_split(x, y, test_size=cfg["val_size"], random_state=cfg["random_state"])

print(x.shape, y.shape)

(375, 512, 512, 3) (375, 512, 512, 1)


In [None]:
current_datetime = datetime.datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
path_model = os.path.join(cfg["path_out"], "train", current_datetime)
pathlib.Path(path_model).mkdir(parents=True, exist_ok=True)

augment = Compose([
    HorizontalFlip(),
    ShiftScaleRotate(rotate_limit=45, border_mode=cv2.BORDER_CONSTANT),
    ElasticTransform(border_mode=cv2.BORDER_CONSTANT),
    RandomBrightness(),
    RandomContrast(),
    RandomGamma()
])
steps_per_epoch = math.ceil(x_train.shape[0] / cfg["batch_size"])
train_generator = AugmentationSequence(x_train, y_train, cfg["batch_size"], augment)
reduce_learning_rate = tensorflow.keras.callbacks.ReduceLROnPlateau(monitor="loss", factor=0.5, patience=3, verbose=1)
filename_model = os.path.join(path_model, "unet.h5")
checkpointer = tensorflow.keras.callbacks.ModelCheckpoint(filename_model, verbose=1, save_best_only=True)
strategy = tensorflow.distribute.MirroredStrategy()

with strategy.scope():
    model = unet_model(cfg)
    adam_opt = tensorflow.keras.optimizers.Adam(learning_rate=cfg["learning_rate"])
    model.compile(optimizer=adam_opt, loss=get_loss_function(cfg["loss_function"]), metrics=[dice_coef, jaccard_distance, tensorflow.keras.metrics.Precision(), tensorflow.keras.metrics.Recall()])

tensorflow.keras.backend.clear_session()
start_time = time.time()
fit = model.fit(train_generator, steps_per_epoch=steps_per_epoch, epochs=cfg["epochs"], validation_data=(x_val, y_val), callbacks=[checkpointer, reduce_learning_rate])
end_time = time.time() - start_time

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/

2022-09-12 20:43:29.971014: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-12 20:43:29.971625: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-12 20:43:29.971818: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-12 20:43:29.971926: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:980] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Epoch 1/75


2022-09-12 20:43:30.956782: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_2"
op: "FlatMapDataset"
input: "TensorDataset/_1"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_flat_map_fn_2408"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\020FlatMapDataset:1"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
        dim {
          size: -1
        }
      }
      shape {
        dim {
          size: -1
        }
        dim {
          size: -1


Epoch 1: val_loss improved from inf to 0.35091, saving model to out/train/12-09-2022-20-43-29/unet.h5
Epoch 2/75
Epoch 2: val_loss did not improve from 0.35091
Epoch 3/75
Epoch 3: val_loss did not improve from 0.35091
Epoch 4/75
Epoch 4: val_loss did not improve from 0.35091
Epoch 5/75
Epoch 5: val_loss improved from 0.35091 to 0.12408, saving model to out/train/12-09-2022-20-43-29/unet.h5
Epoch 6/75
Epoch 6: val_loss did not improve from 0.12408
Epoch 7/75
Epoch 7: val_loss improved from 0.12408 to 0.09972, saving model to out/train/12-09-2022-20-43-29/unet.h5
Epoch 8/75
Epoch 8: val_loss improved from 0.09972 to 0.06737, saving model to out/train/12-09-2022-20-43-29/unet.h5
Epoch 9/75
Epoch 9: val_loss did not improve from 0.06737
Epoch 10/75
Epoch 10: val_loss did not improve from 0.06737
Epoch 11/75
Epoch 11: val_loss did not improve from 0.06737
Epoch 12/75
Epoch 12: val_loss did not improve from 0.06737

Epoch 12: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.

In [None]:
columns = ["batch_size", "epochs", "learning_rate", "loss_function", "images", "masks", "len_images", "len_masks", "channel", "image_size", "fold", "test_size", "val_size", "random_state", "path_dataset", "path_out", "data_augmentation", "filename_script", "time"]
data = [cfg["batch_size"], cfg["epochs"], cfg["learning_rate"], cfg["loss_function"], images_folder, masks_folder, len(list_images), len(list_labels), cfg["channel"], cfg["image_size"], cfg["fold"], cfg["test_size"], cfg["val_size"], cfg["random_state"], cfg["path_dataset"], cfg["path_out"], cfg["data_augmentation"], ipynbname.name(), time.strftime("%H:%M:%S", time.gmtime(end_time))]

dataframe_cfg = pandas.DataFrame(data, columns)
dataframe_cfg = dataframe_cfg.applymap(lambda x: str(x).replace(".", ",") if isinstance(x,float) else x)
dataframe_cfg.to_csv(os.path.join(path_model, "cfg.csv"), decimal=",", sep=";", na_rep=" ", header=False, quoting=csv.QUOTE_ALL)
dataframe_cfg