In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# os.environ['KMP_DUPLICATE_LIB_OK']='True'

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
import random
import time
import datetime
import math
from tqdm import tqdm
from collections import defaultdict

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image, ImageOps, ImageEnhance
import cv2 
import numba.cuda as cuda
from sklearn.model_selection import StratifiedKFold

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset
import torchvision
import torchvision.transforms as transforms
from torch.optim.swa_utils import AveragedModel, SWALR

import albumentations as A
import albumentations.pytorch

import timm

from optimizers import SAM, Lookahead, Ralamb, AdamP
from losses import (bi_tempered_logistic_loss, SmoothCrossEntropyLoss, FocalCosineLoss,
                    SymmetricCrossEntropy, TaylorCrossEntropyLoss)
from augmentations import snapmix, SnapMixLoss, RandomAugMix, GridMask

pd.set_option('display.max_rows', 5)

In [3]:
class CFG :
    time = datetime.datetime.now().strftime(format='%Y%m%d_%H%M%S')
    debug = False
    
    epochs = 40
    batch_size = 64
    device = 'cuda:0'
    verbose = 2
    seed = 666
    n_fold = 5
    
    img_size = 586
    crop_ratio = 0.875
    crop_size = int(img_size*crop_ratio)
    classes = [0,1,2,3,4]
    train_cache = True
    valid_cache = True
    num_workers = 7
    
    snapmix_pct = 0.5
    snapmix_alpha = 5
    gridmask_pct = 0.25
    gridmask_num = 3
    gridmask_mode = 3
    augmix_pct = 0.5
    augmix_severity = 2
    augmix_width = 3
    augmix_alpha = 0.1
    
    model = 'tf_efficientnet_b1_ns'
    
    optimizer = 'adamw'
    lr = 1e-04
    momentum = 0.9
    eps = 1e-08
    betas = (0.9, 0.99)
    weight_decay = 1e-6
    amsgrad = True
    lookahead = False
    swa = False
    
    # LOSS
    # cross_entropy, bi_tempered_logistic, smooth_cross_entropy
    # focal_cosine, symmetric_cross_entropy, taylor_cross_entropy
    loss = 'bi_tempered_logistic'
    t0 = 0.8
    t1 = 1.2
    label_smoothing=0.2
    
    scheduler = 'cosine'
    cos_t0 = 10
    min_lr = 1e-6
    
    early_stop_epochs = epochs
    save_best = True
    
CFG.crop_size

512

In [4]:
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)

seed_everything(CFG.seed)

In [5]:
# TRAIN_IMG_PATH = '../input/cassava-leaf-disease-classification/train_images'
# TRAIN_DF = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
TRAIN_IMG_PATH = '../input/cassava-leaf-disease-merged/train_images'
TRAIN_DF = pd.read_csv('../input/cassava-leaf-disease-merged/train.csv')
TRAIN_DF['path'] = [os.path.join(TRAIN_IMG_PATH, fn) for fn in TRAIN_DF.image_id]

In [6]:
if CFG.debug :
    CFG.epochs = 1
    CFG.n_fold = 3
    CFG.save_best = False
    TRAIN_DF = TRAIN_DF.iloc[:int(0.05*len(TRAIN_DF))]
    print('Debug Mode Activated', len(TRAIN_DF))

# DataSet

In [7]:
class CassavaDataset(Dataset):
    def __init__(self, file_paths, labels,
                 transform=None, cache_ds=False, use_cache=False):
        self.file_paths = list(file_paths)
        self.labels = list(labels)
        self.transform = transform
        self.cached_images = []
        self.cached_labels = []
        self.cache_ds = cache_ds
        self.use_cache = use_cache
        
    def __len__(self):
        return len(self.file_paths)
    
    def set_use_cache(self, use_cache):
        self.use_cache = use_cache

    def __getitem__(self, idx):
        if not self.use_cache or self.cache_ds :
            label = self.labels[idx]
            file_path = self.file_paths[idx]

            # Read an image with OpenCV
            image = cv2.imread(file_path)

            # By default OpenCV uses BGR color space for color images,
            # so we need to convert the image to RGB color space.
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#             image = cv2.resize(image, dsize=(CFG.img_size, CFG.img_size), 
#                                interpolation=cv2.INTER_CUBIC)

            if not self.cache_ds :
                self.cached_images.append(image)
                self.cached_labels.append(label)
        
        else :
            image = self.cached_images[idx]
            label = self.cached_labels[idx]

        if self.transform:
            augmented = self.transform(image=image) 
            image = augmented['image']
                
        return image, label

## Data Augmentations

### Set Transformations

In [8]:
train_transform = A.Compose([
    A.RandomResizedCrop(CFG.crop_size, CFG.crop_size),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=1),
    A.ShiftScaleRotate(p=0.5),
    
    RandomAugMix(
        severity=CFG.augmix_severity,
        width=CFG.augmix_width,
        alpha=CFG.augmix_alpha,
        p=CFG.augmix_pct),
    
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
    A.CoarseDropout(p=0.5),
#     A.Cutout(p=0.5),
    A.OneOf([
        GridMask(num_grid=CFG.gridmask_num,
                 mode=CFG.gridmask_mode, 
                 rotate=15),
        GridMask(num_grid=CFG.gridmask_num,
                 mode=CFG.gridmask_mode,
                 rotate=15),
    ], p=CFG.gridmask_pct),
    A.pytorch.ToTensor(),
])

valid_transform = A.Compose([
#     A.CenterCrop(CFG.crop_size, CFG.crop_size, p=1.),
    A.RandomResizedCrop(CFG.crop_size, CFG.crop_size),
    A.Resize(CFG.crop_size, CFG.crop_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
    A.pytorch.ToTensor(),
])

inference_transform = A.Compose([
    A.RandomResizedCrop(CFG.crop_size, CFG.crop_size),
    A.HorizontalFlip(p=0.5),
    A.RandomRotate90(p=1),
    A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
    A.pytorch.ToTensor(),
])

In [9]:
# train_dataset = CassavaDataset(
#     file_paths=TRAIN_DF.path,
#     labels=TRAIN_DF.label,
#     transform=train_transform,
# )

# train_loader = torch.utils.data.DataLoader(
#     train_dataset, 
#     batch_size=CFG.batch_size,
#     shuffle=True, 
#     num_workers=0
# )

# num_samples = 8

# def imshow(img):
# #     img = img / 2 + 0.5     # unnormalize
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.axis('off')
#     plt.show()


# # get some random training images
# dataiter = iter(train_loader)
# images, labels = dataiter.next()

# # show images
# plt.figure(figsize=(20,10))
# imshow(torchvision.utils.make_grid(images[:num_samples], nrow=4))
# print(labels[:num_samples])

# del num_samples, images, labels, dataiter
# del train_dataset, train_loader

# Model, Loss, Optimizer
## Model

In [10]:
class Net(nn.Module):
    # https://www.kaggle.com/sachinprabhu/pytorch-resnet50-snapmix-train-pipeline
    def __init__(self, model_arch, n_class, pretrained=False) :
        super().__init__()
        backbone = timm.create_model(model_arch, pretrained=pretrained)
        if 'rexnet' in model_arch or 'regnet' in model_arch :
            last_layer = list(backbone.children())[-1]
            n_features = last_layer.fc.in_features
            self.backbone = nn.Sequential(*backbone.children())[:-1]
        elif 'dpn' in model_arch or 'dla' in model_arch :
            last_layer = list(backbone.children())[-1]
            n_features = last_layer.in_channels
            self.backbone = nn.Sequential(*backbone.children())[:-1]
        else :
            last_layer = list(backbone.children())[-1]
            n_features = last_layer.in_features
            self.backbone = nn.Sequential(*backbone.children())[:-2]
        self.classifier = nn.Linear(n_features, n_class)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        
    def forward_features(self, x):
        x = self.backbone(x)
        return x

    def forward(self, x):
        feats = self.forward_features(x)
        x = self.pool(feats).view(x.size(0),-1)
        x = self.classifier(x)
        return x, feats
  
# dpn68b
# dla60_res2next
# dla60x
# Net('dla60_res2next', len(CFG.classes))

## Set Loss, Optimizer, Scheduler

In [11]:
""" LOSS """
def set_loss() :
    if CFG.loss == 'cross_entropy' :
        criterion = nn.CrossEntropyLoss().to(CFG.device)
    elif CFG.loss == 'bi_tempered_logistic' :
        def bi_tempered_logistic_loss_fn(outputs, labels) :
            return bi_tempered_logistic_loss(outputs, labels,\
                        CFG.t0, CFG.t1, label_smoothing=CFG.label_smoothing)
        criterion = bi_tempered_logistic_loss_fn
    elif CFG.loss == 'smooth_cross_entropy' :
        criterion = SmoothCrossEntropyLoss(smoothing=CFG.label_smoothing).to(CFG.device)
    elif CFG.loss == 'focal_cosine' :
        criterion = FocalCosineLoss().to(CFG.device)
    elif CFG.loss == 'symmetric_cross_entropy' :
        criterion = SymmetricCrossEntropy(num_classes=len(CFG.classes)).to(CFG.device)
    elif CFG.loss == 'taylor_cross_entropy' :
        criterion = TaylorCrossEntropyLoss().to(CFG.device)
    else : 
        criterion = None
        
    assert criterion is not None
    return criterion

""" Optimizers """
def set_optimizer(model) :
    if CFG.optimizer == 'rmsprop' :
        optimizer = optim.RMSprop(model.parameters(), 
                                  lr=CFG.lr, momentum=CFG.momentum)
    elif CFG.optimizer == 'sgd' :
        optimizer = optim.SGD(model.parameters(), 
                              lr=CFG.lr, momentum=CFG.momentum)
    elif CFG.optimizer == 'adam' :
        optimizer = optim.Adam(model.parameters(), lr=CFG.lr)
    elif CFG.optimizer == 'adamw' :
        optimizer = optim.AdamW(model.parameters(), 
                                lr=CFG.lr, betas=CFG.betas,
                               eps=CFG.eps, weight_decay=CFG.weight_decay, 
                                amsgrad=CFG.amsgrad)
    elif CFG.optimizer == 'ralamb' :
        optimizer = Ralamb(model.parameters(), lr=CFG.lr, 
                           weight_decay=CFG.weight_decay)
    elif CFG.optimizer == 'sam' :
        optimizer = SAM(model.parameters(), optim.SGD,
                        lr=CFG.lr, momentum=CFG.momentum)
    elif CFG.optimizer == 'adamp' :
        optimizer = AdamP(model.parameters(), 
                          lr=CFG.lr, betas=CFG.betas,
                          weight_decay=CFG.weight_decay)
    else :
        optimizer = None
        
    if CFG.lookahead :
        optimizer = Lookahead(optimizer)
        
    if CFG.swa :
        optimizer = optim.swa_utils.SWALR(optimizer, anneal_epochs=10, swa_freq=2, swa_lr=CFG.min_lr, ) ## SWA
    
    assert optimizer is not None
    return optimizer

""" Scheduler """
def set_scheduler(optimizer) :
    if CFG.scheduler == 'cosine' :
#         scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10, 2, eta_min=1e-6) # 1e-6  ### Cosine Warm 
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 
                                                                   T_0=CFG.cos_t0, T_mult=1,
                                                                   eta_min=CFG.min_lr, last_epoch=-1)
    elif CFG.scheduler == 'steplr' :
        scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer,
                                              step_size=5,
                                              gamma=0.5),
    else :
        scheduler = None
    
    return scheduler

In [12]:
import nvidia_smi

nvidia_smi.nvmlInit()
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(0)
# card id 0 hardcoded here, there is also a call to get all available card ids, so we could iterate

## Train & Validate

In [None]:
%%time
%%capture cap --no-stderr
################ Generate Dataset & Dataloader ###########################
train_set = TRAIN_DF.reset_index(drop=True)

train_dataset = CassavaDataset(
    file_paths=train_set.path,
    labels=train_set.label,
    transform=train_transform,
    cache_ds=CFG.train_cache
)

if not CFG.train_cache :
    train_loader.num_workers = CFG.num_workers

models = []; batches=[]

# models += [f'tf_efficientnet_b{i}_ns' for i in range(5)]
# batches += [128, 96, 80, 64, 48]

# models += ['mixnet_l', 'mixnet_xl']
# batches += [64, 48]

# models += ['rexnet_130', 'rexnet_150', 'rexnet_200']
# batches += [80, 72, 56]

# models += ['regnetx_032', 'regnety_032']
# batches += [112, 96]

models += ['dpn68b','dla60_res2next','dla60x']
batches += [96, 64, 80]

for m, mod in enumerate(models) :
#     if m < 8 : continue
    CFG.model = mod
    CFG.batch_size = batches[m]
    filename = f'{CFG.model}_{CFG.seed}'
    print(filename, CFG.batch_size)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset, 
        batch_size=CFG.batch_size,
        shuffle=True, 
        num_workers=0
    )
    ################# Set Model, Loss, Optimizer, Scaler, Scheduler ###########################
    model = Net(CFG.model, len(CFG.classes), pretrained=True).to(CFG.device)
    criterion = set_loss()
    snapmix_criterion = SnapMixLoss().to(CFG.device)
    optimizer = set_optimizer(model)
    scaler = GradScaler()
    scheduler = set_scheduler(optimizer)
    
    swa_model = AveragedModel(model)
    swa_start = 5
    swa_scheduler = SWALR(optimizer, swa_lr=CFG.lr)

    cached = False
    
    ## Early Stopping
    best_val_loss = np.inf
    best_epoch = 0
    
    for j, epoch in enumerate(range(CFG.epochs)):  # loop over the dataset multiple times

        ############## TRAIN ####################
        stime = time.time()
        running_loss = 0.0
        correct = 0
        total = 0

        model.train()
        for data in train_loader :

            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs = inputs.to(CFG.device)
            labels = labels.to(CFG.device)

            # forward + backward + optimize
            with autocast() :
                rand = np.random.rand()
                if rand > (1.0-CFG.snapmix_pct):
                    inputs, ya, yb, lam_a, lam_b = snapmix(inputs, labels, CFG.snapmix_alpha, model)
                    outputs, _ = model(inputs)
                    loss = snapmix_criterion(criterion, outputs, ya, yb, lam_a, lam_b)
                else:
                    outputs, _ = model(inputs)
                    loss = criterion(outputs, labels)
                
            scaler.scale(loss).backward()
    
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # print statistics
            running_loss += loss.item()
        
        ## Scheduler
        if epoch > swa_start:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()
        
        train_loss = running_loss/len(train_loader)
        train_acc = correct/total*100
        
        training_time = time.time()-stime
        print(f'[EPOCH {epoch+1}/{CFG.epochs}] time: {training_time:.2f}sec -',
              f'loss: {train_loss:.4E} - acc: {train_acc:.2f}%')

        if not cached and (CFG.train_cache or CFG.valid_cache) :
            if CFG.train_cache :
                train_loader.dataset.set_use_cache(use_cache=True)
                train_loader.num_workers = CFG.num_workers
            cached = True
    
#     info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
#     print(info.used//1024//1024)
    
    torch.save(swa_model.state_dict(), f'../output/{filename}.pt')        
    del model, swa_model, optimizer, scaler, scheduler, swa_scheduler
    torch.cuda.empty_cache()
        
print('Finished Training')


In [15]:
with open('output.txt', 'w') as f:
    f.write(cap.stdout)