In [1]:
from typing import Callable, List, Tuple
import collections
import os
import torch
import catalyst

from catalyst.dl import utils
from torch import nn

from catalyst.contrib.nn import DiceLoss, IoULoss
from catalyst.dl import SupervisedRunner

from pathlib import Path
from models.unets import unet_resnet
from dataflow.dataloaders import get_train_val_loaders
from dataflow.visualisations import tensor_to_rgb
from dataflow.transforms import get_train_augmentation, get_validation_augmentation, prepare_batch_fp32, get_preprocessing
%matplotlib inline
%load_ext autoreload
%autoreload 2

  from pandas import Panel

Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.


Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.



In [2]:
data_dir = Path('/media/wwymak/Storage/spacenet/AOI_3_Paris_Train')
image_dir = data_dir / 'RGB-PanSharpen'
mask_dir = data_dir / 'masks'
summary_data_filepath = data_dir / 'summaryData' / 'AOI_3_Paris_Train_Building_Solutions.csv'


In [17]:
debug=False
batch_size = 8 
val_batch_size = batch_size * 2
num_workers = 12
val_interval = 3
accumulation_steps = 4

train_loader, val_loader, train_eval_loader = get_train_val_loaders(
    image_dir=image_dir,
    mask_dir=mask_dir,
    summary_data_filepath=summary_data_filepath,
    train_transforms=get_train_augmentation(512),
    val_transforms=get_validation_augmentation(512),
    train_ratio=0.8,
    batch_size=batch_size,
    num_workers=num_workers,
    limit_train_num_samples=100 if debug else None,
    limit_val_num_samples=100 if debug else None,
)

loaders = collections.OrderedDict()
loaders["train"] = train_loader
loaders["valid"] = val_loader

(17148, 4)
(16633, 4)


In [18]:
import segmentation_models_pytorch as smp

# We will use Feature Pyramid Network with pre-trained ResNeXt50 backbone
model = smp.Unet(encoder_name="efficientnet-b0", classes=1)

In [19]:

# we have multiple criterions
criterion = {
    "dice": DiceLoss(),
    "iou": IoULoss(),
    "bce": nn.BCEWithLogitsLoss()
}

from torch import optim

from catalyst.contrib.nn import RAdam, Lookahead

learning_rate = 0.001
encoder_learning_rate = 0.0005

# model, preprocessing_function = unet_resnet('efficientnet-b0')
# Since we use a pre-trained encoder, we will reduce the learning rate on it.
layerwise_params = {"encoder*": dict(lr=encoder_learning_rate, weight_decay=0.00003)}

# This function removes weight_decay for biases and applies our layerwise_params
model_params = utils.process_model_params(model, layerwise_params=layerwise_params)

# Catalyst has new SOTA optimizers out of box
base_optimizer = RAdam(model_params, lr=learning_rate, weight_decay=0.0003)
optimizer = Lookahead(base_optimizer)

# scheduler = optim.lr_scheduler.CyclicLR(optimizer, factor=0.25, patience=2)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=[x['lr'] * 10 for x in model_params], steps_per_epoch=len(train_loader), epochs=50)

In [20]:


num_epochs = 50
logdir = data_dir /"logs"/"unet_resnet"

device = utils.get_device()
# print(f"device: {device}")

# if is_fp16_used:
#     fp16_params = dict(opt_level="O1") # params for FP16
# else:
#     fp16_params = None

# print(f"FP16 params: {fp16_params}")


# by default SupervisedRunner uses "features" and "targets",
# in our case we get "image" and "mask" keys in dataset __getitem__
runner = SupervisedRunner(device=device, input_key="image", input_target_key="mask")

In [21]:
%load_ext tensorboard.notebook
%tensorboard --logdir {logdir}

In [None]:
from catalyst.dl.callbacks import DiceCallback, IouCallback, \
  CriterionCallback, MetricAggregationCallback

callbacks = [
    # Each criterion is calculated separately.
    CriterionCallback(
        input_key="mask",
        prefix="loss_dice",
        criterion_key="dice"
    ),
    CriterionCallback(
        input_key="mask",
        prefix="loss_iou",
        criterion_key="iou"
    ),
    CriterionCallback(
        input_key="mask",
        prefix="loss_bce",
        criterion_key="bce"
    ),

    # And only then we aggregate everything into one loss.
    MetricAggregationCallback(
        prefix="loss",
        mode="weighted_sum", # can be "sum", "weighted_sum" or "mean"
        # because we want weighted sum, we need to add scale for each loss
        metrics={"loss_dice": 1.0, "loss_iou": 1.0, "loss_bce": 0.8},
    ),

    # metrics
    DiceCallback(input_key="mask"),
    IouCallback(input_key="mask"),
]


runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    # our dataloaders
    loaders=loaders,
    # We can specify the callbacks list for the experiment;
    callbacks=callbacks,
    # path to save logs
    logdir=logdir,
    num_epochs=num_epochs,
    # save our best checkpoint by IoU metric
    main_metric="iou",
    # IoU needs to be maximized.
    minimize_metric=False,
    # for FP16. It uses the variable from the very first cell
#     fp16=fp16_params,
    # prints train logs
    verbose=True,
)

1/50 * Epoch (train):   0% 0/63 [00:00<?, ?it/s]


This overload of add is deprecated:
	add(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add(Tensor other, *, Number alpha)



1/50 * Epoch (train): 100% 63/63 [00:19<00:00,  3.18it/s, dice=0.252, iou=0.144, loss=2.215, loss_bce=0.764, loss_dice=0.748, loss_iou=0.856]
1/50 * Epoch (valid): 100% 16/16 [00:05<00:00,  3.02it/s, dice=0.165, iou=0.090, loss=2.188, loss_bce=0.553, loss_dice=0.835, loss_iou=0.910]
[2020-07-25 20:53:04,472] 
1/50 * Epoch 1 (_base): lr=0.0002 | momentum=0.9500
1/50 * Epoch 1 (train): dice=0.2101 | iou=0.1180 | loss=2.3075 | loss_bce=0.7946 | loss_dice=0.7899 | loss_iou=0.8820
1/50 * Epoch 1 (valid): dice=0.2052 | iou=0.1147 | loss=2.1564 | loss_bce=0.5953 | loss_dice=0.7948 | loss_iou=0.8853



To get the last learning rate computed by the scheduler, please use `get_last_lr()`.



2/50 * Epoch (train): 100% 63/63 [00:16<00:00,  3.71it/s, dice=0.305, iou=0.180, loss=2.047, loss_bce=0.664, loss_dice=0.695, loss_iou=0.820]
2/50 * Epoch (valid): 100% 16/16 [00:01<00:00,  9.47it/s, dice=0.178, iou=0.098, loss=2.126, loss_bce=0.501, loss_dice=0.822, loss_iou=0.902]
[2020-07-25 20:53:23,398] 
2/50 * Epoch 2 (_base): lr=0.0002 | momentum=0.9500
2/50 * Epoch 2 (train): dice=0.2399 | iou=0.1375 | loss=2.2029 | loss_bce=0.7255 | loss_dice=0.7601 | loss_iou=0.8625
2/50 * Epoch 2 (valid): dice=0.2255 | iou=0.1275 | loss=2.0806 | loss_bce=0.5420 | loss_dice=0.7745 | loss_iou=0.8725
3/50 * Epoch (train): 100% 63/63 [00:17<00:00,  3.66it/s, dice=0.359, iou=0.219, loss=1.865, loss_bce=0.555, loss_dice=0.641, loss_iou=0.781]
3/50 * Epoch (valid): 100% 16/16 [00:01<00:00,  9.49it/s, dice=0.244, iou=0.139, loss=2.012, loss_bce=0.494, loss_dice=0.756, loss_iou=0.861]
[2020-07-25 20:53:46,129] 
3/50 * Epoch 3 (_base): lr=0.0002 | momentum=0.9500
3/50 * Epoch 3 (train): dice=0.2764 | 

16/50 * Epoch (train): 100% 63/63 [00:16<00:00,  3.87it/s, dice=0.708, iou=0.549, loss=0.890, loss_bce=0.184, loss_dice=0.292, loss_iou=0.451]
16/50 * Epoch (valid): 100% 16/16 [00:01<00:00,  9.80it/s, dice=0.690, iou=0.527, loss=0.884, loss_bce=0.126, loss_dice=0.310, loss_iou=0.473]
[2020-07-25 20:58:02,918] 
16/50 * Epoch 16 (_base): lr=0.0002 | momentum=0.9499
16/50 * Epoch 16 (train): dice=0.6772 | iou=0.5132 | loss=0.9652 | loss_bce=0.1945 | loss_dice=0.3228 | loss_iou=0.4868
16/50 * Epoch 16 (valid): dice=0.6981 | iou=0.5371 | loss=0.9042 | loss_bce=0.1743 | loss_dice=0.3019 | loss_iou=0.4629
17/50 * Epoch (train): 100% 63/63 [00:16<00:00,  3.88it/s, dice=0.571, iou=0.400, loss=1.172, loss_bce=0.178, loss_dice=0.429, loss_iou=0.600]
17/50 * Epoch (valid): 100% 16/16 [00:01<00:00,  9.61it/s, dice=0.702, iou=0.541, loss=0.860, loss_bce=0.128, loss_dice=0.298, loss_iou=0.459]
[2020-07-25 20:58:22,246] 
17/50 * Epoch 17 (_base): lr=0.0002 | momentum=0.9499
17/50 * Epoch 17 (train): 

In [4]:
utils.process_model_params??