* [SIIM-ISIC Melanoma Classification](https://www.kaggle.com/c/siim-isic-melanoma-classification)
* [
Melanoma. Pytorch starter. EfficientNet](https://www.kaggle.com/nroman/melanoma-pytorch-starter-efficientnet)
* [Melanoma. Pytorch starter. EfficientNet (copy)](https://www.kaggle.com/valentinaliferov/melanoma-pytorch-starter-efficientnet)
* [JPEG Melanoma 256x256](https://www.kaggle.com/cdeotte/jpeg-melanoma-256x256)
* [melanoma external malignant 256](https://www.kaggle.com/nroman/melanoma-external-malignant-256)

No sigmoid in forward because we are going to use  
**BCEWithLogitsLoss** which applies sigmoid for us when calculating a loss  

In [10]:
!pip3 install -q --upgrade pip gdown onnxruntime efficientnet-pytorch

In [None]:
!rm -rf sample_data
!gdown --id 134YXo0-sJyKO6UqPto8nWFuBmfp3fzx0
!unzip -qo melanoma_external_malignant_256.zip

In [2]:
import gc
import os
import time
import random
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image, ImageOps

In [3]:
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import StratifiedKFold, GroupKFold

In [4]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [5]:
import onnxruntime
from efficientnet_pytorch import EfficientNet

In [31]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(47)

#### NN Architecture

In [7]:
class Net(nn.Module):
    
    def __init__(self, n_meta_features):
        
        super(Net, self).__init__()
        
        self.arch = EfficientNet.from_pretrained('efficientnet-b1')
        self.arch._fc = nn.Linear(1280, 500, bias=True)
        self.arch.set_swish(memory_efficient=False)
        
        self.meta = nn.Sequential(
            nn.Linear(n_meta_features, 500),
            nn.BatchNorm1d(500),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(500, 250),
            nn.BatchNorm1d(250),
            nn.ReLU(),
            nn.Dropout(p=0.2))
        
        self.ouput = nn.Linear(500 + 250, 1)
        
    def forward(self, x, meta):
        cnn_features = self.arch(x)
        meta_features = self.meta(meta)
        features = torch.cat((cnn_features, meta_features), dim=1)
        output = self.ouput(features)
        return output

In [37]:
model = Net(n_meta_features=12)

Loaded pretrained weights for efficientnet-b1


#### Saving, Loading, and Checking the Model (Torch)

In [38]:
path = 'model.pt'
torch.save(model.state_dict(), path)

dev = torch.device('cpu')
state = torch.load(path, map_location=dev)
model = Net(n_meta_features=12).eval()
model.load_state_dict(state)
meta = torch.rand(1, 12)
x = torch.rand(1, 3, 256, 256)
nn.Sigmoid()(model(x, meta)).item()

Loaded pretrained weights for efficientnet-b1


0.48173946142196655

#### Saving, Loading, and Checking the Model (ONNX)

In [45]:
path = 'model.onnx'
meta = torch.rand(1,12)
x = torch.rand(1,3,256,256)
torch.onnx.export(model, (x, meta), path, input_names=['x','meta'])

def sigmoid(x): return 1 / (1 + np.exp(-x))
onnx_model = onnxruntime.InferenceSession(path)
meta = np.random.randn(1, 12).astype(np.float32)
x = np.random.randn(1, 3, 256, 256).astype(np.float32)
inps = {'x': x, 'meta': meta}
outs = onnx_model.run(None, inps)
sigmoid(outs[0])[0][0]

0.46756554

#### Datasets and Dataloaders

In [5]:
def pad(im):
    w, h = im.size; m = np.max([w, h])
    hp, hpr = (m - w) // 2, (m - w) % 2
    vp, vpr = (m - h) // 2, (m - h) % 2
    return (hp + hpr, vp + vpr, hp, vp)

def norm(x):
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    return (x - mean) / std

def load_image(path, size, transforms):
    im = Image.open(path)
    im.thumbnail((size,size), Image.ANTIALIAS)
    im = ImageOps.expand(im, pad(im))
    if transforms: im = transforms(im)
    x = np.array(im) / 255.
    x = np.float32(norm(x))
    return x.transpose(2,0,1)

def load_data(path):
    df = pd.read_csv(path)
    df.rename(columns={'age_approx':'age'}, inplace=True)
    df.rename(columns={'anatom_site_general_challenge':'site'}, inplace=True)
    df['site'] = df['site'].str.replace(' ','_')
    df['site'] = df['site'].str.replace('/','_')
    df.update(df[['sex','site']].fillna('NaN'))
    df.update(df['age'].fillna(0))
    return df

def get_meta_features(sex, age, site):
    sites = (
        'anterior_torso','head_neck','lateral_torso',
        'lower_extremity','oral_genital','palms_soles',
        'posterior_torso','torso','upper_extremity','NaN')
    max_age = 90
    age = age / max_age
    sex = {'male':1,'female':0,'unknown':-1}[sex]
    m = [sex, age] + [int(s == site) for s in sites]
    return np.array(m)

class MelanomaDataset(Dataset):

    def __init__(self, df, path, size, transforms):
        super().__init__()
        self.df = df
        self.path = path
        self.size = size
        self.transforms = transforms

    def __getitem__(self, index):
        row = self.df.iloc[index]
        image = f'{self.path}/{row.image_name}.jpg'
        x = load_image(image, self.size, self.transforms)
        meta = get_meta_features(row.sex, row.age, row.site)
        return (x, meta, row.target) if 'target' in self.df.columns else (x, meta)

    def __len__(self):
        return len(self.df)

In [9]:
# max_age = train_df['age'].max()
# sites = sorted(train_df['site'].unique().tolist())
# sites.remove('NaN'); sites.append('NaN');

train_transforms = transforms.Compose([
    transforms.RandomResizedCrop(256, (0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ColorJitter(brightness=32. / 255., saturation=0.5)])

# train_dataset = MelanomaDataset(train_df, 'train', 256, train_transforms)
# test_dataset = MelanomaDataset(test_df, 'test', 256, None)

# train_labels = train_df['target'].to_list()
# train_counts = np.unique(train_labels, return_counts=True)[1]
# train_weights = torch.Tensor([1. / train_counts[c] for c in train_labels]).double()
# train_sampler = torch.utils.data.WeightedRandomSampler(train_weights, len(train_dataset))
# train_loader = DataLoader(train_dataset, batch_size=32, num_workers=2, sampler=train_sampler)
# test_loader = DataLoader(test_dataset, batch_size=32, num_workers=2, shuffle=False)

In [24]:
test_df = load_data('test.csv')
train_df = load_data('train.csv')

In [26]:
skf = GroupKFold(n_splits=5)
test = MelanomaDataset(test_df, 'test', 256, None)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Training the Model

In [None]:
epochs = 1  # Number of epochs to run
model_path = 'trained_model.pt'  # Path and filename to save model to
es_patience = 3  # Early Stopping patience - for how many epochs with no improvements to wait
TTA = 3 # Test Time Augmentation rounds

oof = np.zeros((len(train_df), 1))  # Out Of Fold predictions
preds = torch.zeros((len(test), 1), dtype=torch.float32, device=device)  # Predictions for test test

# We stratify by target value, thus, according to sklearn StratifiedKFold documentation
# We can fill `X` with zeroes of corresponding length to use it as a placeholder
# since we only need `y` to stratify the data
# for fold, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train_df)), y=train_df['target']), 1):
for fold, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train_df)), y=train_df['target'], groups=train_df['patient_id'].tolist()), 1):
    print('=' * 20, 'Fold', fold, '=' * 20)
    
    best_val = None  # Best validation score within this fold
    patience = es_patience  # Current patience counter
    model = Net(n_meta_features=12)  # New model for each fold
    model = model.to(device)
    
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
    criterion = nn.BCEWithLogitsLoss()
    
    df = train_df.iloc[train_idx].reset_index(drop=True)
    train = MelanomaDataset(df, 'train', 256, train_transforms)
    
    df = train_df.iloc[val_idx].reset_index(drop=True)
    val = MelanomaDataset(df, 'train', 256, None)
    
    train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(dataset=val, batch_size=16, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset=test, batch_size=16, shuffle=False, num_workers=2)
    
    for epoch in range(epochs):
        start_time = time.time()
        correct = 0
        epoch_loss = 0
        model.train()
        
        for x, meta, y in train_loader:
            x = torch.tensor(x, device=device, dtype=torch.float32)
            meta = torch.tensor(meta, device=device, dtype=torch.float32)
            y = torch.tensor(y, device=device, dtype=torch.float32)
            optim.zero_grad()
            z = model(x, meta)
            loss = criterion(z, y.unsqueeze(1))
            loss.backward()
            optim.step()
            pred = torch.round(torch.sigmoid(z))  # round off sigmoid to obtain predictions
            correct += (pred.cpu() == y.cpu().unsqueeze(1)).sum().item()  # tracking number of correctly predicted samples
            epoch_loss += loss.item()
        train_acc = correct / len(train_idx)

        model.eval()  # switch model to the evaluation mode
        val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
        with torch.no_grad():  # Do not calculate gradient since we are only predicting
            # Predicting on validation set
            for j, (x_val, meta_val, y_val) in enumerate(val_loader):
                x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
                meta_val = torch.tensor(meta_val, device=device, dtype=torch.float32)
                y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
                z_val = model(x_val, meta_val)
                val_pred = torch.sigmoid(z_val)
                val_preds[j*x_val.shape[0]:j*x_val.shape[0] + x_val.shape[0]] = val_pred
            val_acc = accuracy_score(train_df.iloc[val_idx]['target'].values, torch.round(val_preds.cpu()))
            val_roc = roc_auc_score(train_df.iloc[val_idx]['target'].values, val_preds.cpu())
            
            print('Epoch {:03}: | Loss: {:.3f} | Train acc: {:.3f} | Val acc: {:.3f} | Val roc_auc: {:.3f} | Training time: {}'.format(
            epoch + 1, 
            epoch_loss, 
            train_acc, 
            val_acc, 
            val_roc, 
            str(datetime.timedelta(seconds=time.time() - start_time))[:7]))
            
            scheduler.step(val_roc)
            # During the first iteration (first epoch) best validation is set to None
            if not best_val:
                best_val = val_roc  # So any validation roc_auc we have is the best one for now
                torch.save(model.state_dict(), model_path)  # Saving the model
                continue
                
            if val_roc >= best_val:
                best_val = val_roc
                patience = es_patience  # Resetting patience since we have new best validation accuracy
                torch.save(model.state_dict(), model_path)  # Saving current best model
            else:
                patience -= 1
                if patience == 0:
                    print('Early stopping. Best Val roc_auc: {:.3f}'.format(best_val))
                    break
                
    # model = torch.load(model_path)  # Loading best model of this fold
    model.load_state_dict(torch.load(model_path))
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(val_idx), 1), dtype=torch.float32, device=device)
    with torch.no_grad():
        # Predicting on validation set once again to obtain data for OOF
        for j, (x_val, meta_val, y_val) in enumerate(val_loader):
            x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
            meta_val = torch.tensor(meta_val, device=device, dtype=torch.float32)
            y_val = torch.tensor(y_val, device=device, dtype=torch.float32)
            z_val = model(x_val, meta_val)
            val_pred = torch.sigmoid(z_val)
            val_preds[j*x_val.shape[0]:j*x_val.shape[0] + x_val.shape[0]] = val_pred
        oof[val_idx] = val_preds.cpu().numpy()
        
        # Predicting on test set
        for _ in range(TTA):
            for i, (x_test, meta_test) in enumerate(test_loader):
                x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
                meta_test = torch.tensor(meta_test, device=device, dtype=torch.float32)
                z_test = model(x_test, meta_test)
                z_test = torch.sigmoid(z_test)
                preds[i*x_test.shape[0]:i*x_test.shape[0] + x_test.shape[0]] += z_test
        preds /= TTA
        
    del train, val, train_loader, val_loader, x, y, x_val, y_val
    gc.collect()
    
preds /= skf.n_splits

In [None]:
print('OOF: {:.3f}'.format(roc_auc_score(train_df['target'], oof)))

In [None]:
sns.kdeplot(pd.Series(preds.cpu().numpy().reshape(-1,)));

#### Making a submission

In [None]:
sub = pd.read_csv('sample_submission.csv')
sub['target'] = preds.cpu().numpy().reshape(-1,)
sub.to_csv('submission.csv', index=False)