Thanks to https://www.kaggle.com/xhlulu/ranzcr-efficientnet-tpu-training

In [None]:
!pip install efficientnet

In [None]:
!pip install tf_clahe

In [None]:
import os

import tf_clahe
import efficientnet.tfkeras as efn
import random, re, math
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import accuracy_score, roc_auc_score


import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.applications.efficientnet import EfficientNetB7 as Efnb7
import tensorflow.keras.backend as K

import cv2
import albumentations as alb

from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GroupKFold

In [None]:
def auto_select_accelerator():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy


def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg', aug=False):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        
        # if np.random.rand() <= 0.6:
        #     img = tfa.image.random_hsv_in_yiq(img, max_delta_hue=4,
        #                             lower_saturation=2, upper_saturation=4,
        #                             lower_value=2, upper_value=4, seed=2021)
        
        if aug:
            pass
            #img = tf.image.random_flip_up_down(img)
            #img = tf.image.random_flip_left_right(img)

            #img = tf.image.random_brightness(img, 0.2)
            #img = tf.image.adjust_gamma(img, 1.0, 1.0) 
            #img = tf.image.adjust_saturation(img, 0.2)
        
        """
        if np.random.rand() <= 0.6:
            img = tf_clahe.clahe(img, tile_grid_size=(16, 16), clip_limit=6.0)
            
        if np.random.rand() <= 0.6:
            img = tfa.image.sharpness(img, 6.0)
        
        # img = tfa.image.equalize(img)
        # img = tf.image.random_contrast(img, 2, 4)
        # img = tfa.image.mean_filter2d(img)
        # img = tfa.image.median_filter2d(img)\
        
        if np.random.rand() <= 0.6:
            img = tfa.image.gaussian_filter2d(img, sigma=1.0, filter_shape=(4,4))
        """
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_flip_left_right(img)
        #img = tf.image.random_brightness(img, 0.3)
        #img = tf.image.adjust_saturation(img, 0.5)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=2048, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    
    # if np.random.rand() <= 0.6:
    # dset = dset.map(transform, num_parallel_calls=AUTO) if augment else dset
    
    dset = dset.repeat() if repeat else dset
    # dset = dset.batch(bsize)
    
    # dset = dset.map(transform2, num_parallel_calls=AUTO) if augment else dset
    
    # dset = dset.unbatch()
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear = math.pi * shear / 180.
    
    # ROTATION MATRIX
    c1 = tf.math.cos(rotation)
    s1 = tf.math.sin(rotation)
    one = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    rotation_matrix = tf.reshape( tf.concat([c1,s1,zero, -s1,c1,zero, zero,zero,one],axis=0),[3,3] )
        
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)
    shear_matrix = tf.reshape( tf.concat([one,s2,zero, zero,c2,zero, zero,zero,one],axis=0),[3,3] )    
    
    # ZOOM MATRIX
    zoom_matrix = tf.reshape( tf.concat([one/height_zoom,zero,zero, zero,one/width_zoom,zero, zero,zero,one],axis=0),[3,3] )
    
    # SHIFT MATRIX
    shift_matrix = tf.reshape( tf.concat([one,zero,height_shift, zero,one,width_shift, zero,zero,one],axis=0),[3,3] )
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), K.dot(zoom_matrix, shift_matrix))

In [None]:
def transform(image, label):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = IMAGE_SIZE[0]
    XDIM = DIM%2 #fix for size 331
    
    rot = 15. * tf.random.normal([1],dtype='float32')
    shr = 5. * tf.random.normal([1],dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    w_zoom = 1.0 + tf.random.normal([1],dtype='float32')/10.
    h_shift = 16. * tf.random.normal([1],dtype='float32') 
    w_shift = 16. * tf.random.normal([1],dtype='float32') 
  
    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x = tf.repeat( tf.range(DIM//2,-DIM//2,-1), DIM )
    y = tf.tile( tf.range(-DIM//2,DIM//2),[DIM] )
    z = tf.ones([DIM*DIM],dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m,tf.cast(idx,dtype='float32'))
    idx2 = K.cast(idx2,dtype='int32')
    idx2 = K.clip(idx2,-DIM//2+XDIM+1,DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack( [DIM//2-idx2[0,], DIM//2-1+idx2[1,]] )
    d = tf.gather_nd(image,tf.transpose(idx3))
        
    return tf.reshape(d,[DIM,DIM,3]),label

In [None]:
def batch_mixup(images, labels, PROBABILITY=1.0, batch_size=0):

    DIM = IMAGE_SIZE[0]
    CLASSES = 4
    
    if batch_size == 0:
        batch_size = BATCH_SIZE
    
    # Do `batch_mixup` with a probability = `PROBABILITY`
    # This is a tensor containing 0 or 1 -- 0: no mixup.
    # shape = [batch_size]
    do_mixup = tf.cast(tf.random.uniform([batch_size], 0, 1) <= PROBABILITY, tf.int32)

    # Choose random images in the batch for cutmix
    # shape = [batch_size]
    new_image_indices = tf.cast(tf.random.uniform([batch_size], 0, batch_size), tf.int32)
    
    # ratio of importance of the 2 images to be mixed up
    # shape = [batch_size]
    a = tf.random.uniform([batch_size], 0, 1) * tf.cast(do_mixup, tf.float32)  # this is beta dist with alpha=1.0
                
    # The second part corresponds to the images to be added to the original images `images`.
    new_images =  (1-a)[:, tf.newaxis, tf.newaxis, tf.newaxis] * images + a[:, tf.newaxis, tf.newaxis, tf.newaxis] * tf.gather(images, new_image_indices)

    # Make labels
    if len(labels.shape) == 1:
        labels = tf.one_hot(labels, CLASSES)
    new_labels =  (1-a)[:, tf.newaxis] * labels + a[:, tf.newaxis] * tf.gather(labels, new_image_indices)

    return new_images, new_labels

In [None]:
def batch_cutmix(images, labels, PROBABILITY=1.0, batch_size=0):
    
    DIM = IMAGE_SIZE[0]
    CLASSES = 4
    
    if batch_size == 0:
        batch_size = BATCH_SIZE
    
    # DO CUTMIX WITH PROBABILITY DEFINED ABOVE
    # This is a tensor containing 0 or 1 -- 0: no cutmix.
    # shape = [batch_size]
    do_cutmix = tf.cast(tf.random.uniform([batch_size], 0, 1) <= PROBABILITY, tf.int32)
    
    # Choose random images in the batch for cutmix
    # shape = [batch_size]
    new_image_indices = tf.cast(tf.random.uniform([batch_size], 0, batch_size), tf.int32)
    
    # Choose random location in the original image to put the new images
    # shape = [batch_size]
    new_x = tf.cast(tf.random.uniform([batch_size], 0, DIM), tf.int32)
    new_y = tf.cast(tf.random.uniform([batch_size], 0, DIM), tf.int32)
    
    # Random width for new images, shape = [batch_size]
    b = tf.random.uniform([batch_size], 0, 1) # this is beta dist with alpha=1.0
    new_width = tf.cast(DIM * tf.math.sqrt(1-b), tf.int32) * do_cutmix
    
    # shape = [batch_size]
    new_y0 = tf.math.maximum(0, new_y - new_width // 2)
    new_y1 = tf.math.minimum(DIM, new_y + new_width // 2)
    new_x0 = tf.math.maximum(0, new_x - new_width // 2)
    new_x1 = tf.math.minimum(DIM, new_x + new_width // 2)
    
    # shape = [batch_size, DIM]
    target = tf.broadcast_to(tf.range(DIM), shape=(batch_size, DIM))
    
    # shape = [batch_size, DIM]
    mask_y = tf.math.logical_and(new_y0[:, tf.newaxis] <= target, target <= new_y1[:, tf.newaxis])
    
    # shape = [batch_size, DIM]
    mask_x = tf.math.logical_and(new_x0[:, tf.newaxis] <= target, target <= new_x1[:, tf.newaxis])    
    
    # shape = [batch_size, DIM, DIM]
    mask = tf.cast(tf.math.logical_and(mask_y[:, :, tf.newaxis], mask_x[:, tf.newaxis, :]), tf.float32)

    # All components are of shape [batch_size, DIM, DIM, 3]
    new_images =  images * tf.broadcast_to(1 - mask[:, :, :, tf.newaxis], [batch_size, DIM, DIM, 3]) + \
                    tf.gather(images, new_image_indices) * tf.broadcast_to(mask[:, :, :, tf.newaxis], [batch_size, DIM, DIM, 3])

    a = tf.cast(new_width ** 2 / DIM ** 2, tf.float32)    
        
    # Make labels
    if len(labels.shape) == 1:
        labels = tf.one_hot(labels, CLASSES)
        
    new_labels =  (1-a)[:, tf.newaxis] * labels + a[:, tf.newaxis] * tf.gather(labels, new_image_indices)        
        
    return new_images, new_labels

In [None]:
def transform2(image, label):
    # THIS FUNCTION APPLIES BOTH CUTMIX AND MIXUP
    DIM = IMAGE_SIZE[0]
    CLASSES = 4
    SWITCH = 0.5
    CUTMIX_PROB = 0.7
    MIXUP_PROB = -1.0
    # FOR SWITCH PERCENT OF TIME WE DO CUTMIX AND (1-SWITCH) WE DO MIXUP
    image2, label2 = batch_cutmix(image, label, CUTMIX_PROB)
    image3, label3 = batch_mixup(image, label, MIXUP_PROB)
    imgs = []; labs = []
    for j in range(AUG_BATCH):
        P = tf.cast( tf.random.uniform([],0,1)<=SWITCH, tf.float32)
        imgs.append(P*image2[j,]+(1-P)*image3[j,])
        labs.append(P*label2[j,]+(1-P)*label3[j,])
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR (maybe use Python typing instead?)
    image4 = tf.reshape(tf.stack(imgs),(AUG_BATCH,DIM,DIM,3))
    label4 = tf.reshape(tf.stack(labs),(AUG_BATCH,CLASSES))
    return image4,label4

In [None]:
#COMPETITION_NAME = "siimcovid19-512-img-png-600-study-png"
COMPETITION_NAME = "617-pseudo-labelling"

DATASET=1
strategy = auto_select_accelerator()
BATCH_SIZE = 4 * strategy.num_replicas_in_sync
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
train_df = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
ohe_hot_df = pd.read_csv('../input/617-pseudo-labelling/617_Pseudo_Labelling.csv')
df=pd.concat([train_df, ohe_hot_df], ignore_index=True)

label_cols = df.columns[1:5]

gkf  = GroupKFold(n_splits = 5)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.id.tolist())):
    df.loc[val_idx, 'fold'] = fold

In [None]:
COMPETITION_NAME = "siimcovid19-1024-img-png-1024-study-png"

DATASET=1
strategy = auto_select_accelerator()
BATCH_SIZE = strategy.num_replicas_in_sync * 4
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
df = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
label_cols = df.columns[1:5]

gkf  = GroupKFold(n_splits = 5)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(df, groups = df.id.tolist())):
    df.loc[val_idx, 'fold'] = fold

In [None]:
GCS_DS_PATH

In [None]:
COMPETITION_NAME = "siimcovid19-600-study-only-jpg-extend"
DATASET=2
strategy = auto_select_accelerator()
BATCH_SIZE = 4 * strategy.num_replicas_in_sync
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
label_df = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv')
label_cols = label_df.columns[1:5]

df = pd.read_csv('../input/siimcovid19-600-study-only-jpg-extend/df.csv').drop(['Unnamed: 0'], axis=1)
for index in range(df['id'].shape[0]):
    df['id'].iloc[index] = df['id'].iloc[index].replace(".dcm.jpg", "")

In [None]:
# Learning rate schedule for TPU, GPU and CPU.
# Using an LR ramp up because fine-tuning a pre-trained model.
# Starting with a high LR would break the pre-trained weights.

IMSIZE = (224, 240, 260, 300, 380, 456, 528, 600)
IMS = 7
AUG_BATCH = BATCH_SIZE
IMAGE_SIZE = [IMSIZE[IMS], IMSIZE[IMS]]
EPOCHS=20
LR_START =  0.00001
LR_MAX =  0.00005 * strategy.num_replicas_in_sync
LR_MIN = 0.00001
LR_RAMPUP_EPOCHS = 5
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = .8

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
    return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)

rng = [i for i in range(20 if EPOCHS<20 else EPOCHS)]
y = [lrfn(x) for x in rng]
plt.plot(rng, y)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))

In [None]:
%%time

all_labels = []
all_prob = []
all_pred = []

for i in range(5):
    
    if DATASET==1:
        valid_paths = GCS_DS_PATH + '/study/' + df[df['fold'] == i]['id'] + '.png' #"/train/"
        train_paths = GCS_DS_PATH + '/study/' + df[df['fold'] != i]['id'] + '.png' #"/train/"
    
    elif DATASET==2:
        valid_paths = GCS_DS_PATH + '/test/' + df[df['fold'] == i]['id'] + '.jpg' #"/train/"
        train_paths = GCS_DS_PATH + '/test/' + df[df['fold'] != i]['id'] + '.jpg' #"/train/"
    
    valid_labels = tf.cast(df[df['fold'] == i][label_cols].values, tf.float32)
    train_labels = tf.cast(df[df['fold'] != i][label_cols].values, tf.float32)
    
    decoder = build_decoder(with_labels=True, 
                            target_size=(IMSIZE[IMS], IMSIZE[IMS]), ext='png', aug=True)
    
    val_decoder = build_decoder(with_labels=True, 
                            target_size=(IMSIZE[IMS], IMSIZE[IMS]), ext='png', aug=False)
    
    test_decoder = build_decoder(with_labels=False, target_size=(IMSIZE[IMS], IMSIZE[IMS]),ext='png')

    train_dataset = build_dataset(
        train_paths, train_labels, bsize=BATCH_SIZE, decode_fn=decoder
    )

    valid_dataset = build_dataset(
        valid_paths, valid_labels, bsize=BATCH_SIZE, decode_fn=val_decoder,
        repeat=False, shuffle=False, augment=False
    )

    try:
        n_labels = train_labels.shape[1]
    except:
        n_labels = 1

    with strategy.scope():
        input_tensor = tf.keras.layers.Input(shape=(IMSIZE[IMS], IMSIZE[IMS], 3))
        base_model = efn.EfficientNetB7(input_tensor=input_tensor, 
                                        weights='noisy-student',
                                        include_top=False)
        bm_output = base_model.output
        
        x = tf.keras.layers.GlobalAveragePooling2D()(bm_output)
        x = tf.keras.layers.Dropout(0.5)(x)
        output = tf.keras.layers.Dense(n_labels, activation='softmax', dtype='float32')(x)
        
        model = tf.keras.models.Model(inputs=input_tensor, outputs=output)

        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
            loss='categorical_crossentropy',
            metrics=['categorical_accuracy', tf.keras.metrics.AUC(name='auc', multi_label=True)]
        )

    steps_per_epoch = train_paths.shape[0] // BATCH_SIZE
     
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'model{i}.h5', save_best_only=False, 
        monitor='val_loss', mode='min') # val_categorical_accuracy, max
    
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=15, verbose=1, mode='min'
    )
    
    lr_reducer = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", patience=5, min_lr=1e-6, mode='min')
    
    print(f'######## Start of {i+1} Fold ########')
    history = model.fit(
        train_dataset,
        #epochs=EPOCHS,
        epochs=100,
        verbose=2,                        # lr_callback, early_stopping
        #callbacks=[checkpoint, lr_callback],
        callbacks=[checkpoint, lr_reducer, early_stopping],
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_dataset)
    
    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'history{i}.csv')
    print(f'######## End of {i+1} Fold ########')
    
    prob = model.predict(valid_dataset, verbose=1)
    all_labels.append(np.argmax(valid_labels, axis=-1))
    all_prob.append(prob)
    all_pred.append(np.argmax(prob, axis=-1))
    
cm_correct_labels = np.concatenate(all_labels)
cm_probabilities = np.concatenate(all_prob)
cm_predictions = np.concatenate(all_pred)

In [None]:
# cmat = confusion_matrix(cm_correct_labels, cm_predictions, labels=range(n_labels))
score = f1_score(cm_correct_labels, cm_predictions, labels=range(n_labels), average='macro')
precision = precision_score(cm_correct_labels, cm_predictions, labels=range(n_labels), average='macro')
recall = recall_score(cm_correct_labels, cm_predictions, labels=range(n_labels), average='macro')
acc = accuracy_score(cm_correct_labels, cm_predictions)
# display_confusion_matrix(cmat, score, precision, recall)
print('accuracy: {:.3f}, f1 score: {:.3f}, precision: {:.3f}, recall: {:.3f}'.format(acc, score, precision, recall)); print()