<a href="https://colab.research.google.com/github/zardemostoles/zardemostoles-uoc.edu/blob/master/TFG_Calculo_Tipo_Tono_Piel_ITA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Carga de módulos que se emplean en el notebook

In [1]:
# Se importan los módulos que se van a utilizar en el notebook

# Módulos básicos
import math
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

# Módulos de tratamiento de imágenes
import PIL
import cv2
from skimage import io, color

In [2]:
# Se comprueba la versión de TensorFlow
print(tf.__version__)
print(tf.keras.__version__)

# Preprocesamiento y entrenamiento de TensorFlow (prefetch) automático
AUTO = tf.data.AUTOTUNE

2.9.2
2.9.0


In [3]:
# Se monta el Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Definición de funciones



### Funciones de tratamiento de TFRecords y Datasets

In [4]:
# Función para convertir una imagen en un tensor, creando un tensor con las 
# dimensiones requeridas y normalizado.
# image: representación RGB de la imagen 
# size: dimensiones del tensor a los que se redimensiona (variable global)
def prepare_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    # Dimensiona a las dimensiones del tensor
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    # Normaliza los pixeles en el rango [0, 1]
    image = tf.cast(image, tf.float32) / 255.0 
    return image

# Función para extraer de un TFRecord la imagen, el nombre de la imagen y 
# el diagnóstico.
def read_labeled_tfrecord_with_file_name(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), 
        "image_name": tf.io.FixedLenFeature([], tf.string),  
        "patient_id": tf.io.FixedLenFeature([], tf.int64),
        "sex": tf.io.FixedLenFeature([], tf.int64),
        "age_approx": tf.io.FixedLenFeature([], tf.int64),
        "anatom_site_general_challenge": tf.io.FixedLenFeature([], tf.int64),
        "diagnosis": tf.io.FixedLenFeature([], tf.int64),
        "target": tf.io.FixedLenFeature([], tf.int64),
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = prepare_image(example['image'])
    image_name = example['image_name']
    target = tf.cast(example['target'], tf.int32)
    return image, image_name, target


# Función para filtrar los registros cuyo nombre de imágen no termine con
# '_downsample'
def filter_images(image, image_name, target):
  return not tf.strings.regex_full_match(image_name,"^((.*_downsampled))")

# Función para filtrar los registros con diagnóstico de melanoma
def filter_images_melanoma(image, image_name, target):
  return target == 1

# Función para filtrar los registros con diagnóstico de no melanoma
def filter_images_no_melanoma(image, image_name, target):
  return target == 0

# Función para suprimir el nombre del fichero de los registros
def remove_file_name(image, file, target):
  return image, target

# Función para cargar en un dataset los TFRecords de un conjunto de registros
def load_dataset(filenames):
    # Lecturas en paralelo
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO)
    # Construye el dataset con el formato definido,
    # descartando los que sean 'downsample'
    dataset = dataset.map(read_labeled_tfrecord_with_file_name, 
                          num_parallel_calls=AUTO).filter(filter_images)
    # Devuekve un dataset con (image, image_name, target)
    return dataset

## Funciones para cálculo del tipo de tono de piel ITA

In [5]:
# Detecting Melanoma Fairly: Skin Tone Detection and Debiasing 
# for Skin Lesion Classification
# https://github.com/pbevan1/Detecting-Melanoma-Fairly/blob/main/preprocessing.py
# Hair removal for ITA calculation
def hair_remove(image):
    # Convert image to grayScale
    grayScale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # Kernel for morphologyEx
    kernel = cv2.getStructuringElement(1, (17, 17))
    # Apply MORPH_BLACKHAT to grayScale image
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    # Apply thresholding to blackhat
    _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
    # Inpaint with original image and threshold image
    final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)
    return final_image

# Calculates Fitzpatrick skin type of an image using Kinyanjui et al.'s thresholds
def get_sample_ita_kin(img):
    ita_bnd_kin = -1
    try:
        rgb = img
        rgb = hair_remove(rgb)
        lab = color.rgb2lab(rgb)
        ita_lst = []
        ita_bnd_lst = []

        # Taking samples from different parts of the image
        L1 = lab[230:250, 115:135, 0].mean()
        b1 = lab[230:250, 115:135, 2].mean()

        L2 = lab[5:25, 115:135, 0].mean()
        b2 = lab[5:25, 115:135, 2].mean()

        L3 = lab[115:135, 5:25, 0].mean()
        b3 = lab[115:135, 5:25, 2].mean()

        L4 = lab[115:135, 230:250, 0].mean()
        b4 = lab[115:135, 230:250, 2].mean()

        L5 = lab[216:236, 216:236, 0].mean()
        b5 = lab[216:236, 216:236, 2].mean()

        L6 = lab[216:236, 20:40, 0].mean()
        b6 = lab[216:236, 20:40, 2].mean()

        L7 = lab[20:40, 20:40, 0].mean()
        b7 = lab[20:40, 20:40, 2].mean()

        L8 = lab[20:40, 216:236, 0].mean()
        b8 = lab[20:40, 216:236, 2].mean()

        L_lst = [L1, L2, L3, L4, L5, L6, L7, L8]
        b_lst = [b1, b2, b3, b4, b5, b6, b7, b8]

        # Calculating ITA values
        for L, b in zip(L_lst, b_lst):
            ita = math.atan((L - 50) / b) * (180 / math.pi)
            ita_lst.append(ita)

        # Using max ITA value (lightest)
        ita_max = max(ita_lst)

        # Getting skin shade band from ITA
        if ita_max > 55:
            ita_bnd_kin = 1
        if 41 < ita_max <= 55:
            ita_bnd_kin = 2
        if 28 < ita_max <= 41:
            ita_bnd_kin = 3
        if 19 < ita_max <= 28:
            ita_bnd_kin = 4
        if 10 < ita_max <= 19:
            ita_bnd_kin = 5
        if ita_max <= 10:
            ita_bnd_kin = 6
    except Exception:
        pass

    return ita_bnd_kin

### Funciones de visualización de imágenes

In [6]:
# Dots per Inch para mostrar imágenes en un monitor de resolución 1366x768
MY_DPI = 96

# Función que muestra en pantalla las imágenes contenidas en un dataset con
# su correspondiente diagnóstico (etiqueta)
def show_sample_images(thumb_size, ds):
  for img, file, target in iter(ds):
    plt.figure(figsize=(thumb_size/MY_DPI, thumb_size/MY_DPI), dpi=MY_DPI)
    file_name = file.numpy().decode('ascii')
    target_diag = "Melanoma" if target.numpy() else "No melanoma"
    image = np.clip(img.numpy() * 255, 0, 255).astype(np.uint8)
    image = hair_remove(image)
    print(get_sample_ita_kin(image))
    image = PIL.Image.fromarray(image)
    print("File: {} Size: {} Diagnóstico: {}".format(file_name, image.size,target_diag))
    plt.imshow(image)
    plt.show()
    print("\n")


# Función para mostrar un mosaico con imágenes
def show_mosaic_images(thumb_size, cols, rows, ds):
    mosaic = PIL.Image.new(mode='RGB', size=(thumb_size*cols + (cols-1), thumb_size*rows + (rows-1)))
    for idx, data in enumerate(iter(ds)):
        img, target = data[0], data[1]
        ix  = idx % cols
        iy  = idx // cols
        img = np.clip(img.numpy() * 255, 0, 255).astype(np.uint8)
        img = hair_remove(img)
        img = PIL.Image.fromarray(img)
        img = img.resize((thumb_size, thumb_size), resample=PIL.Image.BILINEAR)
        mosaic.paste(img, (ix*thumb_size + ix, 
                           iy*thumb_size + iy))
        if idx > cols*rows: break
    display(mosaic)

## Configuración del entorno hardware

In [7]:
# Detecta hardware, devuelve la estrategia de distribución apropiada el HW.
try:
    # TPU detectada 
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('TPU disponible ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    # default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()
    print("GPUs disponibles: ", len(tf.config.list_physical_devices('GPU')))

print("Réplicas:", strategy.num_replicas_in_sync)

# Configura el tamaño del batch
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
print("Tamaño de batch:", BATCH_SIZE)

TPU disponible  grpc://10.107.29.162:8470
Réplicas: 8
Tamaño de batch: 128


## Carga de los ficheros de TFRecords en datasets

In [8]:
# Ruta a los ficheros de TFRecords en GCS
GCS_PATHS = {
  "256x256": "gs://kds-dffbfdcb6ca951891c5be0f6c65fef4772c9b97e418bf13d4744901b",
}

# Resoluciones que se van a utilizar para el entrenamiento del modelo
RESOLUTIONS = {
  "256x256": [256, 256]
}

# Resolución del imágenes modelo
MODEL_RESOLUTION = "256x256"
IMAGE_SIZE = RESOLUTIONS[MODEL_RESOLUTION]

# Carga los TFRecords 
dataset_files = {}
for resolution, path in GCS_PATHS.items():
    print("Resolución: {}, path: {}".format(resolution, path))
    dataset_files[resolution] = tf.io.gfile.glob(path + '/train*.tfrec')
    print("Ficheros TFRecord: ({})".format(len(dataset_files[resolution])))

Resolución: 256x256, path: gs://kds-dffbfdcb6ca951891c5be0f6c65fef4772c9b97e418bf13d4744901b
Ficheros TFRecord: (30)


In [9]:
# Carga de los conjuntos de datos desde los ficheros TFRecord
full_datasets = {}
for resolution, files in dataset_files.items():
  IMAGE_SIZE = RESOLUTIONS[MODEL_RESOLUTION]
  full_datasets[resolution] = load_dataset(files)
  print(resolution, full_datasets[resolution])


256x256 <FilterDataset element_spec=(TensorSpec(shape=(256, 256, 3), dtype=tf.float32, name=None), TensorSpec(shape=(), dtype=tf.string, name=None), TensorSpec(shape=(), dtype=tf.int32, name=None))>


## Cálculo de tono de piel ITA

In [10]:
# Prepara un fichero CSV con el nombre del fichero de fotografía, 
# y el tono de piel ITA
import csv  
!mv /content/drive/MyDrive/TFG/TFG_mejor_modelo_pred.csv /content/drive/MyDrive/TFG/TFG_tono_piel_ITA.csv.back
header = ['image_name', 'tipo_ITA']
with open('/content/drive/MyDrive/TFG/TFG_tono_piel_ITA.csv', 'w', encoding='UTF8') as f:
  writer = csv.writer(f)
  # Cabecera
  writer.writerow(header)
  idx = 0
  for img, file, target in iter(full_datasets[MODEL_RESOLUTION]):
    image = np.clip(img.numpy() * 255, 0, 255).astype(np.uint8)
    image = hair_remove(image)
    ITA = get_sample_ita_kin(image)
    writer.writerow([file.numpy().decode('ascii'), ITA])
    idx = idx + 1

mv: cannot stat '/content/drive/MyDrive/TFG/TFG_mejor_modelo_pred.csv': No such file or directory


  ita = math.atan((L - 50) / b) * (180 / math.pi)
Exception ignored in: <function Executor.__del__ at 0x7fac047420d0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/executor.py", line 46, in __del__
    self.wait()
  File "/usr/local/lib/python3.8/dist-packages/tensorflow/python/eager/executor.py", line 65, in wait
    pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
tensorflow.python.framework.errors_impl.OutOfRangeError: End of sequence


Este notebook está inspirado por:
- Detecting Melanoma Fairly: Skin Tone Detection and Debiasing for Skin Lesion Classification <br>
https://github.com/pbevan1/Detecting-Melanoma-Fairly/blob/main/preprocessing.py