<a href="https://colab.research.google.com/github/utsavnandi/Kaggle-SIIM-ISIC-Melanoma-Classification/blob/master/TPU_SIIM_ISIC_Melanoma_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## One-time


In [1]:
# import os
# assert os.environ['COLAB_TPU_ADDR']
# VERSION = "nightly"  #@param ["1.5" , "20200516", "nightly"]
# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
# !python pytorch-xla-env-setup.py --version $VERSION # --apt-packages libomp5 libopenblas-dev


In [2]:
# %%time
# !pip uninstall kaggle -y
# !pip install kaggle==1.5.6 -q
# !pip install -U catalyst -q
# !pip install -U git+https://github.com/albu/albumentations -q
# !pip install -U git+https://github.com/rwightman/pytorch-image-models -q
# !pip install -U git+https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer -q

# !mkdir ~/.kaggle/
# !cp ./kaggle.json  ~/.kaggle/kaggle.json
# !chmod 600 ~/.kaggle/kaggle.json
# !kaggle datasets download -d shonenkov/melanoma-merged-external-data-512x512-jpeg
# !unzip melanoma-merged-external-data-512x512-jpeg.zip -d ./data/
# !rm melanoma-merged-external-data-512x512-jpeg.zip
# !kaggle competitions download siim-isic-melanoma-classification -f sample_submission.csv
# !kaggle competitions download siim-isic-melanoma-classification -f test.csv
# !kaggle competitions download siim-isic-melanoma-classification -f train.csv
# !unzip train.csv -d ./data/
# !mv ./test.csv ./data/
# !mv ./sample_submission.csv ./data/
# !rm train.csv.zip
# !mkdir ./logs/

## Setup

In [3]:
import gc
import time
import datetime
import random
import warnings
warnings.simplefilter("ignore")
#os.environ['XLA_USE_BF16'] = "0"

import numpy as np
import cv2
import pandas as pd

#from google.colab import auth
#from google.cloud import storage

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms, models

import torch_xla
import torch_xla.core.xla_model as xm
import torch_xla.utils.serialization as xser
import torch_xla.debug.metrics as met
import torch_xla.distributed.parallel_loader as pl
import torch_xla.distributed.xla_multiprocessing as xmp
import torch_xla.utils.utils as xu
from torch.utils.data.distributed import DistributedSampler

from ranger import Ranger
from catalyst.data.sampler import DistributedSamplerWrapper, BalanceClassSampler
import timm

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    xm.set_rng_state(seed, device=xm.xla_device())



numpy.ufunc size changed, may indicate binary incompatibility. Expected 192 from C header, got 216 from PyObject



In [4]:
DATA_DIR = '/content/data/'

In [5]:
df_train = pd.read_csv(DATA_DIR+'folds.csv')
df_test = pd.read_csv(DATA_DIR+'test.csv').rename(columns={'image_name':'image_id'})
sample_submission = pd.read_csv(DATA_DIR+'sample_submission.csv')

In [6]:
df_train['fold'].value_counts()

0    12219
1    12106
2    12072
4    12048
3    12042
Name: fold, dtype: int64

In [7]:
fold_no = 1
X_train = df_train[df_train['fold'] != fold_no][[col for col in df_train.columns if col != 'target']]
y_train = df_train[df_train['fold'] != fold_no][[col for col in df_train.columns if col == 'target']]
X_val = df_train[df_train['fold'] == fold_no][[col for col in df_train.columns if col != 'target']]
y_val = df_train[df_train['fold'] == fold_no][[col for col in df_train.columns if col == 'target']]

In [8]:
print('X_train', X_train.shape)
print('y_train', y_train.shape)
print('X_val', X_val.shape)
print('y_val', y_val.shape)

X_train (48381, 8)
y_train (48381, 1)
X_val (12106, 8)
y_val (12106, 1)


In [9]:
print('Train target distribution: ')
print(y_train['target'].value_counts())
print('Val target distribution: ')
print(y_val['target'].value_counts())

Train target distribution: 
0    43997
1     4384
Name: target, dtype: int64
Val target distribution: 
0    11011
1     1095
Name: target, dtype: int64


##  Dataset

In [10]:
class MelanomaDataset(Dataset):

    def __init__(self, df, labels, istrain=False, transforms=None):
        super().__init__()
        self.image_id = df['image_id'].values
        self.transforms = transforms
        self.labels = labels.values
        self.neg_indices = np.where(self.labels==0)[0]
        self.pos_indices = np.where(self.labels==1)[0]
        self.istrain = istrain

    def __len__(self):
        return len(self.image_id)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        
        image, target = self.load_image(index)
        
        if not self.istrain:
            if self.transforms:
                image = self.transforms(image=image)['image']
                return image, target

        if np.random.random() < 0.5:
            image, target = self.cutmix(image, target)

        if self.transforms:
            image = self.transforms(image=image)['image']

        return image, target

    def load_image(self, index):
        if torch.is_tensor(index):
            index = index.tolist()
        image_name = DATA_DIR + f'512x512-dataset-melanoma/512x512-dataset-melanoma/{self.image_id[index]}.jpg'
        image = cv2.imread(image_name, cv2.IMREAD_COLOR).astype(np.uint8)
        target = self.labels[index].astype(np.float32)
        return image, target

    def cutmix(self, data, target, alpha=1):
        rand_index = self.get_rand_index()
        random_image, random_target = self.load_image(rand_index)
        lam = np.random.beta(alpha, alpha)
        bbx1, bby1, bbx2, bby2 = self.rand_bbox(data.shape, lam)
        data[:, bbx1:bbx2, bby1:bby2] = random_image[ :, bbx1:bbx2, bby1:bby2]
        lam = 1 - ((bbx2 - bbx1) * (bby2 - bby1) / (data.shape[-1] * data.shape[-2]))
        new_target = lam * target + (1-lam) * random_target
        return data, new_target

    def mixup(self, data, target, alpha=1):
        rand_index = self.get_rand_index()
        random_image, random_target = self.load_image(rand_index)
        lam = np.random.beta(alpha, alpha)
        data = data * lam + random_image * (1 - lam)
        new_target = lam * target + (1-lam) * random_target
        return data, new_target

    def get_rand_index(self):
        if random.random()>0.5:
            rand_index = np.random.choice(self.pos_indices)
        else:
            rand_index = np.random.choice(self.neg_indices)
        return rand_index

    def rand_bbox(self, size, lam):
        W = size[1]
        H = size[2]
        cut_rat = np.sqrt(1. - lam)
        cut_w = np.int(W * cut_rat)
        cut_h = np.int(H * cut_rat)
        cx = np.random.randint(W)
        cy = np.random.randint(H)
        bbx1 = np.clip(cx - cut_w // 2, 0, W)
        bby1 = np.clip(cy - cut_h // 2, 0, H)
        bbx2 = np.clip(cx + cut_w // 2, 0, W)
        bby2 = np.clip(cy + cut_h // 2, 0, H)
        return bbx1, bby1, bbx2, bby2

def get_datasets():
    datasets = {}
    datasets['train'] = MelanomaDataset(
        X_train, y_train, istrain=True, transforms=get_train_transforms()
    )
    datasets['valid'] = MelanomaDataset(
        X_val, y_val, istrain=False, transforms=get_valid_transforms()
    )
    return datasets


## Augmentations

In [11]:
#%%writefile augmentations.txt
# Reference IMG_SIZE
# B0    - 224
# B1    - 240
# B2    - 260
# B3    - 300
# B4    - 380
# B5    - 456
# B6    - 520
# B7    - 600
# B8    - 672
# L2 NS - 475
# L2    - 800

# Transforms
IMG_SIZE = 224
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

def get_train_transforms(p=1.0):
    return A.Compose([
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=2, always_apply=True, p=1),
        A.RandomResizedCrop(
            IMG_SIZE, IMG_SIZE, scale=(0.8, 1.2), interpolation=2, p=0.33
        ),
        A.Flip(p=0.5),
        A.Transpose(p=0.25),
        A.OneOf([
            A.MedianBlur(blur_limit=3, p=0.5),
            A.Blur(blur_limit=3, p=0.5),
        ], p=0.5),
        A.ShiftScaleRotate(
            interpolation=2,
            shift_limit=0.0625, scale_limit=0.15, 
            rotate_limit=15, p=0.3
        ),
        A.OneOf([
            A.OpticalDistortion(p=0.3),
            A.GridDistortion(p=.1),
            A.IAAPiecewiseAffine(p=0.3),
        ], p=0.5),
        A.OneOf([
            A.CLAHE(clip_limit=2),
            A.IAASharpen(),
            A.IAAEmboss(),
            A.RandomBrightnessContrast(),            
        ], p=0.5),
        A.HueSaturationValue(
            hue_shift_limit=20, sat_shift_limit=30, 
            val_shift_limit=20, p=0.33
        ),
        A.MultiplicativeNoise(
            multiplier=[0.75, 1.25], 
            elementwise=True, p=0.33
        ),
        A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0),
    ], p=p)

def get_valid_transforms():
    return A.Compose([
        A.Resize(IMG_SIZE, IMG_SIZE, interpolation=2, always_apply=True, p=1),
        A.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
        ToTensorV2(p=1.0),
    ])


## Model

In [12]:
def gem(x, p=3, eps=1e-6):
    return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)

class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM,self).__init__()
        self.p = Parameter(torch.ones(1)*p)
        self.eps = eps
    def forward(self, x):
        return gem(x, p=self.p, eps=self.eps)       
    def __repr__(self):
        return self.__class__.__name__ + '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + ', ' + 'eps=' + str(self.eps) + ')'


In [13]:
class EfficientNet(nn.Module):

    def __init__(self, name='tf_efficientnet_b0_ns'):
        super().__init__()
        self.model = timm.create_model(name, pretrained=True)
        self.model.global_pool = GeM()
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(in_features, 1)

    def forward(self, x):
        return self.model(x)

class SEResNext50_32x4d(nn.Module):

    def __init__(self):
        super().__init__()
        self.model = timm.create_model('gluon_seresnext50_32x4d', pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, 1)

    def forward(self, x):
        return self.model(x)

class SEResNext101_32x4d(nn.Module):

    def __init__(self):
        super().__init__()
        self.model = timm.create_model('gluon_seresnext101_32x4d', pretrained=True)
        in_features = self.model.fc.in_features
        self.model.fc = nn.Linear(in_features, 1)

    def forward(self, x):
        return self.model(x)


## Custom Losses

In [14]:
class FocalLoss(nn.Module):
    def __init__(self, alpha=1, gamma=2, logits=True, reduce=True):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.logits = logits
        self.reduce = reduce

    def forward(self, inputs, targets):
        if self.logits:
            BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        else:
            BCE_loss = F.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

def ohem_loss(cls_pred, cls_target, rate):
    ohem_cls_loss = F.binary_cross_entropy_with_logits(cls_pred, cls_target, reduction='none')
    batch_size = cls_pred.size(0)
    sorted_ohem_loss, idx = torch.sort(ohem_cls_loss, descending=True)
    keep_num = min(sorted_ohem_loss.size()[0], int(batch_size*rate))
    if keep_num < sorted_ohem_loss.size()[0]:
        keep_idx_cuda = idx[:keep_num]
        ohem_cls_loss = ohem_cls_loss[keep_idx_cuda]
    cls_loss = ohem_cls_loss.sum() / keep_num
    return cls_loss

def bce_criterion(y_pred, y_true):
    return nn.BCEWithLogitsLoss()(y_pred, y_true)

def focal_criterion(y_pred, y_true):
    return FocalLoss(alpha=(43997/4384))(y_pred, y_true)


## Metric

In [15]:
class RocAucMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.y_true = np.array([0,1])
        self.y_pred = np.array([0.5,0.5])
        self.score = 0

    def update(self, y_true, y_pred):
        y_true = y_true.cpu().numpy()
        y_pred = torch.flatten(torch.sigmoid(y_pred)).data.cpu().numpy()
        self.y_true = np.append(self.y_true, y_true)
        self.y_pred = np.append(self.y_pred, y_pred)
        self.score = roc_auc_score(self.y_true, self.y_pred)

    @property
    def avg(self):
        return self.score


## Train script

In [16]:
# optimizer = Ranger(
#     model.parameters(),
#     lr=FLAGS['learning_rate'] * xm.xrt_world_size(), 
#     alpha=0.5, k=6, N_sma_threshhold=5,
#     weight_decay=FLAGS['weight_decay']
# )

In [17]:
def train_model(data, fold_no):
    seed_everything(43)

    def get_datasets(data):
        X_train, y_train, X_val, y_val = data
        datasets = {}
        datasets['train'] = MelanomaDataset(
            X_train, y_train, istrain=True, transforms=get_train_transforms()
        )
        datasets['valid'] = MelanomaDataset(
            X_val, y_val, istrain=False, transforms=get_valid_transforms()
        )
        return datasets

    datasets = SERIAL_EXEC.run(
        lambda: get_datasets(data)
    )

    labels_vcount = y_train['target'].value_counts()
    class_counts = [labels_vcount[0].astype(np.float32), labels_vcount[1].astype(np.float32)]
    num_samples = sum(class_counts)
    class_weights = [num_samples/class_counts[i] for i in range(len(class_counts))]
    weights = [class_weights[y_train['target'].values[i]] for i in range(int(num_samples))]
    wrsampler = WeightedRandomSampler(
        torch.DoubleTensor(weights), int(num_samples)
    )
    #BalanceClassSampler(labels=y_train['target'].values, mode="downsampling"),

    train_sampler = DistributedSamplerWrapper(
        sampler=wrsampler,
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=True
    )
    validation_sampler = DistributedSampler(
        datasets['valid'],
        num_replicas=xm.xrt_world_size(),
        rank=xm.get_ordinal(),
        shuffle=False
    )
    train_loader = DataLoader(
        datasets['train'],
        batch_size=FLAGS['batch_size'], 
        num_workers=FLAGS['num_workers'],
        sampler=train_sampler,
        drop_last=True,
    )
    val_loader = DataLoader(
        datasets['valid'],
        batch_size=FLAGS['batch_size'],
        num_workers=FLAGS['num_workers'],
        sampler=validation_sampler,
        drop_last=True
    )

    device = xm.xla_device()
    model = WRAPPED_MODEL.to(device)

    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=FLAGS['learning_rate'] * xm.xrt_world_size(),
        weight_decay=FLAGS['weight_decay']
    )

    criterion = bce_criterion
 
    def train_one_epoch(loader):
        model.train()
        running_loss = 0
        max_idx = 0
        xm.master_print('-'*40)
        xm.master_print('Step\t|\tTime')
        xm.master_print('-'*40)
        for idx, (images, targets) in enumerate(loader):
            optimizer.zero_grad()
            y_pred = model(images.float())
            loss = criterion(y_pred, targets)
            running_loss += float(loss)
            loss.backward()
            xm.optimizer_step(optimizer)
            # xm.mark_step() call everystep for grad accum
            max_idx = float(idx)
            if idx % FLAGS['log_steps'] == 0 and idx !=0:
                xm.master_print('({})\t|\t{}'.format(
                    idx, time.asctime(time.localtime()))
                )
        xm.master_print('-'*40)
        return running_loss/(max_idx+1)

    def val_one_epoch(loader):
        model.eval()
        running_loss = 0
        max_idx = 0
        roc_auc_scores = RocAucMeter()
        with torch.no_grad():
            for idx, (images, targets) in enumerate(loader):
                y_pred = model(images.float())
                loss = criterion(y_pred, targets)
                running_loss += float(loss)
                max_idx = float(idx)
                roc_auc_scores.update(targets, y_pred)
        score = roc_auc_scores.avg
        return running_loss/(max_idx+1), score

    def _reduce_fn(x):
        return np.array(x).mean()

    best_score = 0
    xm.master_print('='*26 + f'Fold #{fold_no} started' + '='*27)
    for epoch in range(0, FLAGS['num_epochs']):
        xm.master_print('-'*26 + f'Epoch #{epoch+1} started' + '-'*26)
        xm.master_print(f'Epoch start {time.asctime(time.localtime())}')
        train_start = time.time()
        para_loader = pl.ParallelLoader(train_loader, [device])
        train_loss = train_one_epoch(para_loader.per_device_loader(device))
        xm.master_print(f"finished training epoch {epoch+1}")
        elapsed_time = int(time.time() - train_start)
        xm.master_print(
            f'elapsed time: {(elapsed_time)//60}mins {(elapsed_time)%60}s'
        )
        reduced_loss = xm.mesh_reduce('train_loss', train_loss, _reduce_fn)
        xm.master_print(f"reduced loss {reduced_loss:.5f}")
        if (epoch+1) % FLAGS['val_freq'] == 0:
            val_start = time.time()
            para_loader = pl.ParallelLoader(val_loader, [device])
            val_loss, auc_score = val_one_epoch(para_loader.per_device_loader(device))    
            xm.master_print(f"finished validating epoch {epoch+1}")
            reduced_val_loss = xm.mesh_reduce('val_loss', val_loss, _reduce_fn)
            reduced_auc_score = xm.mesh_reduce('auc_score', auc_score, _reduce_fn)
            xm.master_print(f"reduced val loss {reduced_val_loss:.5f}")
            xm.master_print(f"reduced auc score {reduced_auc_score:.5f}")
            val_elapsed_time = int(time.time() - val_start)
            xm.master_print(
                f'elapsed time: {(val_elapsed_time)//60}mins {(val_elapsed_time)%60}s'
            )
            if best_score < reduced_auc_score:
                best_score = reduced_auc_score
                file_name = f"./{FLAGS['exp_name']}_fold_{fold_no+1}_epoch_{epoch+1}_auc_{reduced_auc_score:.5f}.pth"
                xm.save(model.state_dict(), file_name)
                xm.master_print(f'saved model...')
                xm.master_print(f'new best score: {best_score:.5f}')

        xm.master_print(f'Epoch end {time.asctime(time.localtime())}')
        xm.master_print('-'*27 + f'Epoch #{epoch+1} ended' + '-'*27)

    xm.master_print('='*27 + f'Fold #{fold_no} ended' + '='*27)


In [18]:
def _mp_fn(rank, flags, data, fold_no):
    global FLAGS
    global WRAPPED_MODEL
    global SERIAL_EXEC
    FLAGS = flags
    torch.set_default_tensor_type('torch.FloatTensor')
    train_model(data, fold_no)


## Train

In [None]:
FLAGS = {}
FLAGS['batch_size'] = 32
FLAGS['num_workers'] = 8
FLAGS['learning_rate'] = 2e-4
FLAGS['num_epochs'] = 10
FLAGS['weight_decay'] = 1e-4
FLAGS['log_steps'] = 45
FLAGS['img_size'] = IMG_SIZE
FLAGS['loss'] = 'BCE'
FLAGS['optimizer'] = 'AdamW'
FLAGS['exp_name'] = 'enet_b0'
FLAGS['fold'] = 1
FLAGS['val_freq'] = 1
FLAGS['num_cores'] = 8

WRAPPED_MODEL = xmp.MpModelWrapper(EfficientNet('tf_efficientnet_b0_ns'))
SERIAL_EXEC = xmp.MpSerialExecutor()

for fold_no in range(0, 6):
    X_train = df_train[df_train['fold'] != fold_no][[col for col in df_train.columns if col != 'target']]
    y_train = df_train[df_train['fold'] != fold_no][[col for col in df_train.columns if col == 'target']]
    X_val = df_train[df_train['fold'] == fold_no][[col for col in df_train.columns if col != 'target']]
    y_val = df_train[df_train['fold'] == fold_no][[col for col in df_train.columns if col == 'target']]
    data = X_train, y_train, X_val, y_val
    xmp.spawn(
        _mp_fn, args=(FLAGS, data, fold_no), 
        nprocs=FLAGS['num_cores'], start_method='fork'
    )


--------------------------Epoch #1 started--------------------------
Epoch start Fri Jun 26 04:12:44 2020
----------------------------------------
Step	|	Time
----------------------------------------
(45)	|	Fri Jun 26 04:15:06 2020
(90)	|	Fri Jun 26 04:16:12 2020
(135)	|	Fri Jun 26 04:17:18 2020
(180)	|	Fri Jun 26 04:18:25 2020
----------------------------------------
finished training epoch 1
elapsed time: 5mins 45s
reduced loss 0.50609
finished validating epoch 1
reduced val loss 0.29496
reduced auc score 0.91307
elapsed time: 1mins 7s
saved model...
new best score: 0.913065164865634
Epoch end Fri Jun 26 04:19:37 2020
---------------------------Epoch #1 ended---------------------------
--------------------------Epoch #2 started--------------------------
Epoch start Fri Jun 26 04:19:37 2020
----------------------------------------
Step	|	Time
----------------------------------------
(45)	|	Fri Jun 26 04:21:00 2020
(90)	|	Fri Jun 26 04:22:04 2020
(135)	|	Fri Jun 26 04:23:10 2020
(180)	