Reference:

https://www.kaggle.com/artgor/segmentation-in-pytorch-using-convenient-tools  
https://www.kaggle.com/ryches/turbo-charging-andrew-s-pytorch

In [0]:
!pip install git+https://github.com/qubvel/segmentation_models.pytorch

Collecting git+https://github.com/qubvel/segmentation_models.pytorch
  Cloning https://github.com/qubvel/segmentation_models.pytorch to /tmp/pip-req-build-f6p5vvp4
  Running command git clone -q https://github.com/qubvel/segmentation_models.pytorch /tmp/pip-req-build-f6p5vvp4
Collecting pretrainedmodels==0.7.4 (from segmentation-models-pytorch==0.0.3)
[?25l  Downloading https://files.pythonhosted.org/packages/84/0e/be6a0e58447ac16c938799d49bfb5fb7a80ac35e137547fc6cee2c08c4cf/pretrainedmodels-0.7.4.tar.gz (58kB)
[K     |████████████████████████████████| 61kB 4.0MB/s 
[?25hCollecting efficientnet-pytorch==0.4.0 (from segmentation-models-pytorch==0.0.3)
  Downloading https://files.pythonhosted.org/packages/12/f8/35453605c6c471fc406a137a894fb381b05ae9f174b2ca4956592512374e/efficientnet_pytorch-0.4.0.tar.gz
Building wheels for collected packages: segmentation-models-pytorch, pretrainedmodels, efficientnet-pytorch
  Building wheel for segmentation-models-pytorch (setup.py) ... [

## Importing and parameters

In [0]:
import os, gc, time, json, copy
import pickle, random, itertools, collections
from tqdm import tqdm, tqdm_notebook
from functools import partial

import numpy as np
import pandas as pd
from PIL import Image
import cv2

from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import TensorDataset, DataLoader, Dataset
from torch.utils.data.sampler import SubsetRandomSampler
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

import albumentations as albu
from albumentations import pytorch as AT
import segmentation_models_pytorch as smp

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [0]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SEED = 2019

label_classes = ["Fish", "Flower", "Gravel", "Sugar"]

path = '../input/understanding_cloud_organization'
img_paths = '../input/understanding-clouds-resized'
# model0_path = '../input/cloud-im-small/'
# model1_path = '../input/cld-rn50-unet-1/'
# model2_path = '../input/cld-rn50-unet-2/'
# model3_path = '../input/cld-rn50-unet-3/'
# model4_path = '../input/cld-rn50-unet-4/'
# model_paths = [model0_path, model1_path, model2_path, model3_path, model4_path]
# model0_path = '../input/cloud-im-small/'
# model1_path = '../input/cloud-ckpt-rn50-0/'
# model2_path = '../input/cld-rn50-unet-0-ladam/'
# model3_path = '../input/cld-rn50-unet-0-lalamb/'
# model_paths = [model0_path, model1_path, model2_path, model3_path]
model_un_0 = '../input/cloud-im-small/'
model_un_1 = '../input/cld-rn50-unet-1/'
model_un_4 = '../input/cld-rn50-unet-4/'
model_fpn_0 = '../input/cld-rn50-fpn-0/'
model_fpn_2 = '../input/cld-rn50-fpn-2/'
model_fpn_4 = '../input/cld-rn50-fpn-4/'
un_paths = [model_un_0, model_un_1, model_un_4]
fpn_paths = [model_fpn_0, model_fpn_2, model_fpn_4]

n_splits = 5
which_fold = 0  # should be int in [0, n_splits-1]

batch_size = 32

## Helper functions and classes

In [0]:
sigmoid = lambda x: 1 / (1 + np.exp(-x))

def seed_torch(seed=SEED):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def get_img(x, folder: str='train_images_525/train_images_525'):
    """
    Return image based on image name and folder.
    """
    data_folder = f"{img_paths}/{folder}"
    image_path = os.path.join(data_folder, x)
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def rle_decode(mask_rle: str = '', shape: tuple = (1400, 2100)):
    """
    Decode rle encoded mask.
    
    :param mask_rle: run-length as string formatted (start length)
    :param shape: (height, width) of array to return 
    Returns numpy array, 1 - mask, 0 - background
    """
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape, order='F')

def make_mask(image_name: str='img.jpg', shape: tuple=(350, 525)):
    "Create mask (ndarray, values between 0-1) based on image name and shape."
    masks = np.zeros((shape[0], shape[1], 4), dtype=np.float32)
    for classidx, classid in enumerate(label_classes):
        mask = cv2.imread("../input/understanding-clouds-resized/train_masks_525/train_masks_525/" + classid + image_name)
        if mask is None:
            continue
        if mask[:,:,0].shape != (350,525):
            mask = cv2.resize(mask, (525,350))
        masks[:, :, classidx] = mask[:,:,0]
    masks = masks / 255
    return masks

def mask2rle(img):
    """
    Convert mask to rle.
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    """
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)


def visualize(image, mask, original_image=None, original_mask=None):
    """
    Plot images and masks.
    
    Input
    - image: ndarray image loaded from cv2
    - mask: ndarray output from make_mask
    - original ones: If given, show them all
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}
    
    if original_image is None and original_mask is None:
        f, ax = plt.subplots(1, 5, figsize=(24, 24))

        ax[0].imshow(image)
        for i in range(4):
            ax[i + 1].imshow(mask[:, :, i])
            ax[i + 1].set_title(f'Mask {class_dict[i]}', fontsize=fontsize)
    else:
        f, ax = plt.subplots(2, 5, figsize=(24, 12))

        ax[0, 0].imshow(original_image)
        ax[0, 0].set_title('Original image', fontsize=fontsize)
                
        for i in range(4):
            ax[0, i + 1].imshow(original_mask[:, :, i])
            ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)
        
        ax[1, 0].imshow(image)
        ax[1, 0].set_title('Transformed image', fontsize=fontsize)        
        
        for i in range(4):
            ax[1, i + 1].imshow(mask[:, :, i])
            ax[1, i + 1].set_title(f'Transformed mask {class_dict[i]}', fontsize=fontsize)
            
            
def visualize_with_raw(image, mask, original_image=None, original_mask=None, raw_image=None, raw_mak=None):
    """
    Plot images and masks.
    
    Input
    - image: ndarray image loaded from cv2
    - mask: ndarray output from make_mask
    - original ones / raw ones: If given, show them all
    """
    fontsize = 14
    class_dict = {0: 'Fish', 1: 'Flower', 2: 'Gravel', 3: 'Sugar'}

    f, ax = plt.subplots(3, 5, figsize=(24, 12))

    ax[0, 0].imshow(original_image)
    ax[0, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[0, i + 1].imshow(original_mask[:, :, i])
        ax[0, i + 1].set_title(f'Original mask {class_dict[i]}', fontsize=fontsize)

    ax[1, 0].imshow(raw_image)
    ax[1, 0].set_title('Original image', fontsize=fontsize)

    for i in range(4):
        ax[1, i + 1].imshow(raw_mak[:, :, i])
        ax[1, i + 1].set_title(f'Raw predicted mask {class_dict[i]}', fontsize=fontsize)
        
    ax[2, 0].imshow(image)
    ax[2, 0].set_title('Transformed image', fontsize=fontsize)

    for i in range(4):
        ax[2, i + 1].imshow(mask[:, :, i])
        ax[2, i + 1].set_title(f'Predicted mask with processing {class_dict[i]}', fontsize=fontsize)
            
            
def plot_with_augmentation(image, mask, augment):
    """
    Plot images, masks and augmentation results.
    
    Input
    - image: ndarray image loaded from cv2
    - mask: ndarray output from make_mask
    - augment: transformation from albumentations
    """
    augmented = augment(image=image, mask=mask)
    image_flipped = augmented['image']
    mask_flipped = augmented['mask']
    visualize(image_flipped, mask_flipped, original_image=image, original_mask=mask)


def post_process(probability, threshold, min_size):
    """
    Post processing of each predicted mask, components with lesser number of pixels
    than `min_size` are ignored
    
    Input
    - probability: predicted probability mask, ndarray (350, 525)
    - threshold: value to binarize probability mask
    - min_size: ??
    """
    mask = cv2.threshold(probability, threshold, 1, cv2.THRESH_BINARY)[1]
    num_component, component = cv2.connectedComponents(mask.astype(np.uint8))
    predictions = np.zeros((350, 525), np.float32)
    num = 0
    for c in range(1, num_component):
        p = (component == c)
        if p.sum() > min_size:
            predictions[p] = 1
            num += 1
    return predictions, num


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.Resize(256, 384)
#         albu.Resize(320, 640)
    ]
    return albu.Compose(test_transform)

def to_tensor(x, **kwargs):
    """
    Convert image or mask.
    """
    return x.transpose(2, 0, 1).astype('float32')

def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)


def compute_dice(img1, img2):
    img1 = np.asarray(img1).astype(np.bool)
    img2 = np.asarray(img2).astype(np.bool)

    intersection = np.logical_and(img1, img2)

    return 2. * intersection.sum() / (img1.sum() + img2.sum())

## Data overview

Let's have a look at the data first.
We have folders with train and test images, file with train image ids and masks and sample submission.

In [0]:
train = pd.read_csv(f'{path}/train.csv')
sub = pd.read_csv(f'{path}/sample_submission.csv')

In [0]:
n_train = len(os.listdir(f'{img_paths}/train_images_525/train_images_525'))
n_test = len(os.listdir(f'{img_paths}/test_images_525/test_images_525'))
print(f'There are {n_train} images in train dataset')
print(f'There are {n_test} images in test dataset')

There are 5546 images in train dataset
There are 3698 images in test dataset


## Preparing data for modelling

At first, let's create a list of unique image ids and the count of masks for images. This will allow us to make a stratified split based on this count.

In [0]:
def train_val_split(id_mask_count, n_splits=None, which_fold=None):
    kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    cv_indices = [(tr_idx, val_idx) for tr_idx, val_idx in kf.split(id_mask_count['img_id'], id_mask_count['count'])]
    if which_fold is not None:
        return cv_indices[which_fold]
    return cv_indices

In [0]:
id_mask_count = (train.loc[~train['EncodedPixels'].isnull(), 'Image_Label']
                    .apply(lambda x: x.split('_')[0])
                    .value_counts()
                    .reset_index()
                    .rename(columns={'index': 'img_id', 'Image_Label': 'count'}))

trn_idx, val_idx = train_val_split(id_mask_count, n_splits, which_fold)
train_ids, valid_ids = id_mask_count['img_id'].values[trn_idx], id_mask_count['img_id'].values[val_idx]
test_ids = sub['Image_Label'].apply(lambda x: x.split('_')[0]).drop_duplicates().values

In [0]:
len(train_ids), len(valid_ids)

(4435, 1111)

In [0]:
id_mask_count

Unnamed: 0,img_id,count
0,36bc4ed.jpg,4
1,e19282c.jpg,4
2,3c890ca.jpg,4
3,3594a7e.jpg,4
4,40add1c.jpg,4
...,...,...
5541,87fd65c.jpg,1
5542,7b282d6.jpg,1
5543,a4e5ec5.jpg,1
5544,be4c586.jpg,1


In [0]:
id_mask_count['count'].value_counts()

2    2372
3    1560
1    1348
4     266
Name: count, dtype: int64

There are a lot of empty masks. In fact only 266 images have all four masks. It is important to remember this.

## Setting up data loader, model, solver

In [0]:
class CloudDataset(Dataset):
    def __init__(self, dataset_type: str='train', img_ids: np.array=None,
                 transforms=albu.Compose([albu.HorizontalFlip(), AT.ToTensor()]),
                 preprocessing=None, label_smooth=0):
        self.dataset_type = dataset_type
        if dataset_type != 'test':
            self.data_folder = f"{img_paths}/train_images_525/train_images_525"
        else:
            self.data_folder = f"{img_paths}/test_images_525/test_images_525"
        self.img_ids = img_ids
        self.transforms = transforms
        self.preprocessing = preprocessing
        self.label_smooth = label_smooth

    def __getitem__(self, idx):
        image_name = self.img_ids[idx]
        mask = make_mask(image_name)
        img = cv2.imread(os.path.join(self.data_folder, image_name))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        augmented = self.transforms(image=img, mask=mask)
        img = augmented['image']
        mask = augmented['mask']
        if self.label_smooth > 0:
            mask = np.where(mask==1, 1-label_smooth, label_smooth).astype('float32')
        if self.preprocessing:
            preprocessed = self.preprocessing(image=img, mask=mask)
            img = preprocessed['image']
            mask = preprocessed['mask']
        return img, mask

    def __len__(self):
        return len(self.img_ids)

In [0]:
ENCODER = 'resnet50'
ENCODER_WEIGHTS = None
ACTIVATION = None

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER)

unet = smp.Unet(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=4, 
    activation=ACTIVATION
)
unet = unet.to(device)

fpn = smp.FPN(
    encoder_name='resnet34', 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=4, 
    activation=ACTIVATION
)
fpn = fpn.to(device)

In [0]:
print(str(torch.cuda.memory_allocated(device)/1e6 ) + 'M')
print(str(torch.cuda.memory_cached(device)/1e6 ) + 'M')
torch.cuda.empty_cache()

228.8384M
249.561088M


In [0]:
test_dataset = CloudDataset(dataset_type='test', img_ids=test_ids,
                             transforms=get_validation_augmentation(),
                             preprocessing=get_preprocessing(preprocessing_fn))

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [0]:
# tta_transforms = tta.Compose(
#     [
#         tta.HorizontalFlip(),
#         tta.VerticalFlip(),
#         tta.Rotate90(angles=[0, 90, 180, 270]),
#     ]
# )

# # model = tta.SegmentationTTAWrapper(model, tta_transforms)
# model = tta.SegmentationTTAWrapper(model, tta.aliases.d4_transform(), merge_mode='mean')

## Model inference

In [0]:
def get_preds(loader, model, dataset_type='test'):
    """
    Inference for testset.

    :param loader: DataLoader for testing data
    :param model: model
    :param dataset_type: dataset type

    :return: list of batch predictions
    """
    assert dataset_type in ('val', 'test')
    model.eval()  # eval mode disables dropout

    if dataset_type=='val':
        label_masks = []
    predictions = []
    with torch.no_grad():
        # Batches
        for images, masks in tqdm_notebook(loader):
            if dataset_type=='val':
                for m in masks.view(-1, 256, 384).numpy():
#                 for m in masks.view(-1, 320, 640).numpy():
                    m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    label_masks.append(m)
            
            # Move to default device
            images = images.to(device)  # (N, 3, 320, 640)
            
            # Forward prop.
            pred_scores = model(images)  # (N, 4, 320, 640)
            pred_scores = pred_scores.view(-1, 256, 384)
#             pred_scores = pred_scores.view(-1, 320, 640)  # (N*4, 320, 640)
            
            # Resize to required size
            for m in torch.sigmoid(pred_scores).cpu().numpy():
                m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                predictions.append(m)
    torch.cuda.empty_cache()

    if dataset_type=='val':
        return predictions, label_masks
    return predictions

In [0]:
class Rot90(object):
    """rotate batch of images by 90 degrees k times"""
    def __init__(self, k=1):
        self.k = k
    def transform(self, x):
        return torch.rot90(x, self.k, (2, 3))
    def inverse(self, x):
        return torch.rot90(x, -self.k, (2, 3))

class Hflip(object):
    """flip batch of images horizontally"""
    def transform(self, x):
        return x.flip(3)
    def inverse(self, x):
        return x.flip(3)

class Vflip(object):
    """flip batch of images vertically"""
    def transform(self, x):
        return x.flip(2)
    def inverse(self, x):
        return x.flip(2)

def TTA(loader, model, dataset_type='test', beta=0.4):
    """
    Inference for testset with test-time augmentation (TTA).

    :param loader: DataLoader for testing data
    :param model: model
    :param dataset_type: dataset type
    :param beta: ratio of prediction from original dataset in final results

    :return: list of batch predictions
    """
    assert dataset_type in ('val', 'test')
    model.eval()  # eval mode disables dropout
    tsfms = [Vflip(), Hflip(), Rot90(k=1), Rot90(k=2), Rot90(k=3)]

    if dataset_type=='val':
        label_masks = []
    predictions = []
    with torch.no_grad():
        # Batches
        for images, masks in tqdm_notebook(loader):
            if dataset_type=='val':
                for m in masks.view(-1, 256, 384).numpy():
#                 for m in masks.view(-1, 320, 640).numpy():
                    m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                    label_masks.append(m)
            
            # Move to default device
            images = images.to(device)  # (N, 3, 320, 640)
            
            # Forward prop.
            pred_scores = beta * model(images)  # (N, 4, 320, 640)
            
            # TTA
            for tsfm in tsfms:
                pred_scores += (1-beta) * tsfm.inverse(model(tsfm.transform(images))) / len(tsfms)
            
            # Resize to required size
            pred_scores = pred_scores.view(-1, 256, 384)
#             pred_scores = pred_scores.view(-1, 320, 640)  # (N*4, 320, 640)
            for m in torch.sigmoid(pred_scores).cpu().numpy():
                m = cv2.resize(m, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                predictions.append(m)
    torch.cuda.empty_cache()

    if dataset_type=='val':
        return predictions, label_masks
    return predictions

In [0]:
def TTA_inference(loader, class_params, beta=0.4):
    """
    Inference for testset with test-time augmentation (TTA).

    :param loader: DataLoader for testing data
    :param model: model
    :param dataset_type: dataset type
    :param beta: ratio of prediction from original dataset in final results

    :return: list of batch predictions
    """
    tsfms = [Vflip(), Hflip(), Rot90(k=1), Rot90(k=2), Rot90(k=3)]

    encoded_pixels = []
    image_id = 0
    
    with torch.no_grad():
        # Batches
        for images, masks in tqdm_notebook(loader):
            
            # Move to default device
            images = images.to(device)  # (N, 3, 320, 640)
            
            # Model ensembles
            pred_scores = 0
            for mp in un_paths:
                unet.load_state_dict(torch.load(mp+'best_model_optim.pt')['model'])
                unet.eval()
                
                # Forward prop.
                pred_scores += 0.25 * beta * unet(images) / len(un_paths)  # (N, 4, 320, 640)

                # TTA
                for tsfm in tsfms:
                    pred_scores += 0.25 * (1-beta) * tsfm.inverse(unet(tsfm.transform(images))) / len(tsfms) / len(un_paths)
                
                torch.cuda.empty_cache()
            
            for mp in fpn_paths:
                fpn.load_state_dict(torch.load(mp+'best_model_optim.pt')['model'])
                fpn.eval()
                
                # Forward prop.
                pred_scores += 0.75 * beta * fpn(images) / len(fpn_paths)  # (N, 4, 320, 640)

                # TTA
                for tsfm in tsfms:
                    pred_scores += 0.75 * (1-beta) * tsfm.inverse(fpn(tsfm.transform(images))) / len(tsfms) / len(fpn_paths)
                
                torch.cuda.empty_cache()
            
            # Resize back and encode to rle
            pred_scores = pred_scores.view(-1, 256, 384)  # (N*4, 256, 384)
            for prob_mask in torch.sigmoid(pred_scores).cpu().numpy():
                prob_mask = cv2.resize(prob_mask, dsize=(525, 350), interpolation=cv2.INTER_LINEAR)
                pred, num_pred = post_process(prob_mask, *class_params[image_id%4])
                if num_pred == 0:
                    encoded_pixels.append('')
                else:
                    r = mask2rle(pred)
                    encoded_pixels.append(r)
                image_id += 1
    
    torch.cuda.empty_cache()

    return encoded_pixels

In [0]:
# class_params = [(0.45, 22500), (0.3, 22500), (0.5, 15000), (0.4, 10000)]
# class_params = [(0.45, 22500), (0.5, 22500), (0.45, 22500), (0.4, 10000)]
# class_params = [(0.45, 22500), (0.45, 22500), (0.5, 15000), (0.4, 12500)]
# class_params = [(0.45, 22500), (0.45, 22500), (0.45, 22500), (0.45, 20000)]
# class_params = [(0.4, 25000), (0.2, 22500), (0.2, 22500), (0.5, 10000)]
# class_params = [(0.35, 22500), (0.35, 20000), (0.4, 25000), (0.4, 10000)]
# class_params = [(0.3, 15000), (0.5, 10000), (0.45, 20000), (0.35, 15000)]

# class_params = [(0.35, 25000), (0.35, 15000), (0.2, 22500), (0.4, 15000)]
# class_params = [(0.3, 20000), (0.55, 15000), (0.35, 15000), (0.25, 15000)]
class_params = [(0.35, 22500), (0.5, 15000), (0.45, 15000), (0.35, 15000)]

### Prediction from testset

In [0]:
# predictions = get_preds(test_loader, model) # list
# predictions = TTA(test_loader, model) # list
encoded_pixels = TTA_inference(test_loader, class_params)

print(str(torch.cuda.memory_allocated(device)/1e6 ) + 'M')
print(str(torch.cuda.memory_cached(device)/1e6 ) + 'M')
torch.cuda.empty_cache()

HBox(children=(IntProgress(value=0, max=116), HTML(value='')))


228.8384M
316.669952M


In [0]:
sub['EncodedPixels'] = encoded_pixels
sub.to_csv('submission.csv', columns=['Image_Label', 'EncodedPixels'], index=False)