# Setup

## Imports

In [2]:
import tensorflow as tf
from tensorflow.keras import backend as K

from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report

import tensorflow.keras as keras
from tensorflow.keras.metrics import BinaryAccuracy, AUC, Precision, Recall
from tensorflow.train import Feature, Features, Example
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, InputLayer
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import applications
from tensorflow.keras import callbacks
from tensorflow.keras.models import Sequential

import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd

import re
import math
import os
import sys
import warnings
import logging
import random

from datetime import datetime
from IPython.display import display 
from io import BytesIO
from PIL import Image
from skimage import io
from typing import List, Tuple

# local vs. colab
IS_LOCAL = False

if IS_LOCAL:
    # jupyter offline
    PATH_KAGGLE_MEL = './../kaggle_symlink_offline/melanoma/'
    REPO_ROOT = './../../'
    REPO_TEMP = REPO_ROOT + 'temp/'
    TFREC_DIR = REPO_ROOT + 'tim/kaggle_symlink_offline/melanoma/tfrecords/triple_stra_{}/'
else:
    # http://tiny.cc/8jjjsz
    # loading from Kaggle to save $$$ (vs. loading from my own GCS)
    KAGGLE_TFREC_GCS ={
        128: 'gs://kds-659708bf9143f303ebfd1c862eb9e842090662d9004190208d007cc9',
        192: 'gs://kds-f53b5775dce9868747163621afd1adc2815412f220130261292837a3',
        256: 'gs://kds-f64cfd42bcb769b2eeeecd53d5a52df83d43c19c1184989ed762e30f',
        384: 'gs://kds-e73569ee9d44308363027e79908294593e80b1e12e18e57ef065397c',
        512: 'gs://kds-4f5e437bc05e29f3e95419fa289ea3a6b01ac2fefcb772ca07cc3b5f',
        768: 'gs://kds-49b793da52a884d00e33c11613f3f24261d8e53e1b8c16de8c868509'}

    # colab online
    PATH_KAGGLE_MEL = '/content/gdrive/My Drive/Kaggle/melanoma/'
    REPO_ROOT = '/content/gdrive/My Drive/melanoma/'
    REPO_TEMP = REPO_ROOT + 'temp/'
    
    # mount google drive only on colab
    from google.colab import drive
    drive.mount('/content/gdrive')

warnings.filterwarnings("ignore") # suppress warning messages
plt.style.use('ggplot')

PROCESSOR = "GPU" if IS_LOCAL else "TPU"
SEED = 207 # used for creating k-fold

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Initializing TPU

In [3]:
if PROCESSOR == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU: ', tpu.master())
    except ValueError:
        print("Error: Unable to connect to TPU...")
        tpu = None

    if tpu:
        try:
            print("Initializing TPU...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized!")
        except _:
            print("Error: Failed to initialize TPU...")
    else:
        PROCESSOR = "GPU"

if PROCESSOR != "TPU":
    print("Using default strategy for CPU/GPU...")
    strategy = tf.distribute.get_strategy()

if PROCESSOR == "GPU":
    print("# of GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
            
# https://tinyurl.com/yao4obsb
# A single Cloud TPU device consists of four chips, each of which has two TPU cores. 
# Therefore, for efficient utilization of Cloud TPU, a program should make use of 
# each of the eight cores.
REPLICAS = strategy.num_replicas_in_sync
print(f'# of replicatas in sync: {REPLICAS}')

connecting to TPU...
Running on TPU:  grpc://10.121.33.2:8470
Initializing TPU...
INFO:tensorflow:Initializing the TPU system: grpc://10.121.33.2:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.121.33.2:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


TPU initialized!
# of replicatas in sync: 8


## Utility Functions

In [31]:
def read_labeled_tfrecord(example: Example) -> Tuple['tf.string', 'tf.int64']:
    """Extract image & label from the tfrecord.
    """
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
        'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
        'sex'                          : tf.io.FixedLenFeature([], tf.int64),
        'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
        'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
        'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    }           
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['target']

def read_unlabeled_tfrecord(example: Example, 
                            return_img_name: bool) -> Tuple['tf.string', 'tf.int64']:
    """Label is unavailable, extract image & image name from the tfrecord.
    """
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    if return_img_name:
        return example['image'], example['image_name']
    else:
        return example['image'], 0

def aug_image(img, augment=True, dim=256):
    """Apply random transformation.
    """
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) / 255.0
    
    if augment:
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
        img = tf.image.random_flip_up_down(img)
        img = tf.clip_by_value(img, 0.0, 1.0)
                      
    img = tf.reshape(img, [dim,dim, 3])
            
    return img

def count_examples(file_names: List[str]):
    """Note that the name of each tfrecord file is sufixed with the number of 
    images included.
    """
    n = [int(re.compile(r"-([0-9]*)\.").search(f).group(1)) 
         for f in file_names]
    return np.sum(n)
                  
def get_dataset(tfrec_files: List[str], 
                dim: int,
                batch_size: int = 16,
                augment: bool = False, 
                shuffle: bool = False, 
                repeat: bool = False, 
                labeled: bool = True, 
                return_img_names: bool = True,
                drop_remainder: bool = False,
                replicas: int = strategy.num_replicas_in_sync) -> Tuple[tf.data.TFRecordDataset, int]:
    """Return a TFRecordDataset by loading tfrecord files.
    Args:
        tfrec_files (List[str]): List of paths to the tfrecord files.
    Returns:
        [0](TFRecordDataset): Two possible column combinations... 
            - 'image' & 'target' for training dataset
            - 'image' & 'image_name' for test dataset
        [1](int): Number of steps to complete an epoch.
    """
    
    AUTO = tf.data.experimental.AUTOTUNE
    ds = tf.data.TFRecordDataset(tfrec_files, 
                                 num_parallel_reads=AUTO)
    ds = ds.cache()
    
    if repeat:
        ds = ds.repeat()
    
    if shuffle: 
        ds = ds.shuffle(buffer_size=1024) # 1024 to optimize TPU performance
        opt = tf.data.Options()
        opt.experimental_deterministic = False # ensure a true random shuffle
        ds = ds.with_options(opt)
    
    if labeled: 
        # extract image & label
        # ds inlcudes just one column of examples
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        # extract image & image name
        # ds inlcudes just one column of examples
        map_ = lambda example: read_unlabeled_tfrecord(example, return_img_names)
        ds = ds.map(map_, num_parallel_calls=AUTO)      
    
    # transform image
    # ds has two columns: "image (intf.string)" and "label or image name"
    map_ = lambda img, _: (aug_image(img, augment=augment, dim=dim), _)
    ds = ds.map(map_, num_parallel_calls=AUTO)
    
    # https://tinyurl.com/yao4obsb
    # A single Cloud TPU device consists of four chips, each of which has two TPU cores. 
    # Therefore, for efficient utilization of Cloud TPU, a program should make use of 
    # each of the eight cores.
    #
    # https://tinyurl.com/y99kjyh5
    # Model processing performance
    # For optimum memory usage, use the largest batch size that will fit in memory. 
    # Each TPU core uses a 128 x 128 memory cell matrix for processing. In general, 
    # your batch sized should be evenly divisible by 128 to most effectively use the TPU memory.
    #
    # https://tinyurl.com/yawn2acn
    # Batch Size Too Small
    # The batch size of any model should always be at least 64 (8 per TPU core) 
    # because TPU always pads the tensors to this size. The ideal batch size when 
    # training on the TPU is 1024 (128 per TPU core), since this eliminates inefficiencies 
    # related to memory transfer and padding.
    #
    # https://tinyurl.com/y9nojpa2
    # Minimal requirement: A multiple of 8!
    if PROCESSOR == 'TPU':
        if batch_size < 64:
            # better
            print('Warning: Batch size {} is smaller than 64...'.format(batch_size))
        if batch_size % 8 > 0:
            # min requirement
            print('Error: Batch size {} is not a multiple of 8...'.format(batch_size))
    ds = ds.batch(batch_size, drop_remainder=drop_remainder) 
    
    num_images = count_examples(tfrec_files)
    steps = num_images // batch_size
    if num_images % batch_size > 0:
        # require one more step to loop through the entire dataset
        steps += 1
    
    # From tf doc (https://tinyurl.com/yavczqkr):
    # Most dataset input pipelines should end with a call to prefetch. This allows 
    # later elements to be prepared while the current element is being processed. This 
    # often improves latency and throughput, at the cost of using additional memory to 
    # store prefetched elements.
    ds = ds.prefetch(AUTO)
    
    return ds, steps

def build_logistics(dim: int, 
                    sig_thd: float = 0.5,
                    lr: float = 0.001,
                    label_smoothing: float = 0.05) -> Sequential:

    loss_func = tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing)
    
    m = Sequential()
    m.add(InputLayer(input_shape=(dim, dim, 3)))
    m.add(Flatten())
    m.add(Dense(1, activation='sigmoid')) 
    
    # compile model
    m.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr), 
              loss=loss_func, 
              metrics=[AUC(),
                       BinaryAccuracy(threshold=sig_thd),
                       Recall(thresholds=sig_thd)])
    return m

# Logistic Regression Using Keras

## Training

In [34]:
BATCH_SIZE = 64
EPOCH = 5
IMG_SIZE = 512 # 128, 192, 256, 384, 512, 768, 1024

SIGMOID_THD = 0.2
DEV_SET_FILE_COUNT = 3
LABEL_SMOOTHING = 0.05

LEARN_RATE = 0.001
SIGMOID_THD = 0.2

# prep datasets
if IS_LOCAL:
    TFREC_TEST_SET = tf.io.gfile.glob(TFREC_DIR.format(IMG_SIZE) + 'train*.tfrec')
else:
    TFREC_TRAIN_SET = tf.io.gfile.glob(KAGGLE_TFREC_GCS[IMG_SIZE] + '/train*.tfrec')
random.shuffle(TFREC_TRAIN_SET)
TFREC_DEV_SET = TFREC_TRAIN_SET[:DEV_SET_FILE_COUNT]
TFREC_TRAIN_SET = TFREC_TRAIN_SET[DEV_SET_FILE_COUNT:]

SAVE_DIR = REPO_ROOT + 'tim/cnn/_saves/cnn_efn/'

# --------------------------------------------------------------------------
# BUILD MODEL

print('Building model...')
keras.backend.clear_session() 
with strategy.scope():
    model = build_logistics(dim=IMG_SIZE,
                            sig_thd=SIGMOID_THD,
                            lr=LEARN_RATE,
                            label_smoothing = LABEL_SMOOTHING)

# create save dir
st = datetime.utcnow().strftime('logi_utc_%m%d_%H%M%S')
save_dir = SAVE_DIR + st
os.mkdir(save_dir)
save_dir += '/'
        
# --------------------------------------------------------------------------
# TRAINING

print('Training...')
ds_train, steps_train = get_dataset(TFREC_TRAIN_SET, 
                                    augment=False, 
                                    shuffle=True, 
                                    repeat=True,
                                    dim=IMG_SIZE, 
                                    batch_size=BATCH_SIZE)
history = model.fit(ds_train, 
                    epochs=EPOCH, 
                    steps_per_epoch=steps_train,
                    verbose=1)

Building model...
Training...
Epoch 1/5




Epoch 2/5




Epoch 3/5




Epoch 4/5




Epoch 5/5






## Predicting

In [36]:
print('\nPredicting...')
# perform TTA
ds_valid, steps_valid = get_dataset(TFREC_DEV_SET,
                                    labeled=False,
                                    return_img_names=False,
                                    augment=False,
                                    repeat=True,
                                    shuffle=False,
                                    dim=IMG_SIZE,
                                    batch_size=BATCH_SIZE)

preds = model.predict(ds_valid,
                        steps=steps_valid,
                        verbose=1)

# cut off the "reminders" in the last batch 
ct_valid = count_examples(TFREC_DEV_SET)
preds = preds[:ct_valid,] 

# get oof probabilities
preds_reshape = preds
preds_avg = np.mean(preds_reshape, axis=1)

# get oof targets
ds_targets, _ = get_dataset(TFREC_DEV_SET, 
                            augment=False, 
                            repeat=False, 
                            labeled=True, 
                            dim=IMG_SIZE,
                            batch_size=BATCH_SIZE)
targets = [target.numpy() for img, target in iter(ds_targets.unbatch())]
targets = np.array(targets)

# print oof auc
auc_no_tta = np.max(history.history['auc'])
print('AUC =', auc_no_tta)

# print report
pred_labels = np.zeros(preds_avg.shape[0])
pred_labels[preds_avg >= SIGMOID_THD] = 1
report = classification_report(targets, pred_labels, output_dict=True)
print(classification_report(targets, pred_labels))


Predicting...
AUC = 0.5126039981842041
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      6426
           1       0.00      0.00      0.00       116

    accuracy                           0.98      6542
   macro avg       0.49      0.50      0.50      6542
weighted avg       0.96      0.98      0.97      6542



## Testing Different Sigmoid Thresholds 

In [37]:
THDS = [0.1, 0.2, 0.5]

for thd in THDS:
    pred_labels = np.zeros(preds_avg.shape[0])
    pred_labels[preds_avg >= thd] = 1
    report = classification_report(targets, pred_labels, output_dict=True)
    print("\nSigmoid THD = {:.2f}:".format(thd))
    print(classification_report(targets, pred_labels))


Sigmoid THD = 0.10:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      6426
           1       0.00      0.00      0.00       116

    accuracy                           0.98      6542
   macro avg       0.49      0.50      0.50      6542
weighted avg       0.96      0.98      0.97      6542


Sigmoid THD = 0.20:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      6426
           1       0.00      0.00      0.00       116

    accuracy                           0.98      6542
   macro avg       0.49      0.50      0.50      6542
weighted avg       0.96      0.98      0.97      6542


Sigmoid THD = 0.50:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      6426
           1       0.00      0.00      0.00       116

    accuracy                           0.98      6542
   macro avg       0.49      0.50      0.50      6542
weighted av