In [1]:
from xv import run
from torchvision.ops import misc as misc_nn_ops
from apex import amp
from torch.nn.modules.loss import CrossEntropyLoss
from xv.nn.losses import loss_dict, WeightedLoss
from pytorch_toolbelt import losses
import pandas as pd
from xv import dataset
import random
from xv.nn.layers import FrozenBatchNorm2d
from xv.util import vis_im_mask
from torch import nn
import torch
import numpy as np
from tqdm import tqdm
from glob import glob
from pprint import pprint
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn
import os
import wandb
import yaml
from xv import io
from pprint import pprint
from warmup_scheduler import GradualWarmupScheduler


conf_file = "config/config-damage.yaml"
# conf_file = "config/config-seg-finetune.yaml"
# conf_file = "config/config-seg-joint.yaml"

with open(conf_file) as f:
    conf_init = yaml.load(f)

os.environ['WANDB_MODE'] = 'dryrun'
wandb.init(project=conf_init['project'], config=conf_init, name=conf_init['name'])
conf = wandb.config



wandb: Wandb version 0.8.18 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


In [2]:
pprint(dict(conf))

{'add_suppl': False,
 'add_tertiary': False,
 'amp_opt_level': 'O1',
 'attention': 'scse',
 'aug_prob': 0.25,
 'batch_size': 6,
 'class_weight': [1, 3, 2, 2],
 'data_prefix': 'post',
 'encoder': 'efficientnet-b3',
 'epochs': 100,
 'eval_resolution': 1024,
 'filter_none': True,
 'freeze_decoder_norm': False,
 'freeze_encoder_norm': False,
 'load_weights': False,
 'loss_reduce_mode': 'mean',
 'lr': 0.0002,
 'metric': 'building:f1',
 'mode': 'categorical',
 'n_cpus': 10,
 'name': '25rerun-lowlr',
 'nclasses': 4,
 'optim': 'adam',
 'project': 'sky-eye-full',
 'scheduler_factor': 0.1,
 'scheduler_patience': 5,
 'segmentation_arch': 'Unet',
 'sync_bn': True,
 'train_patch': False,
 'train_repeat': 1,
 'training_resolution': 1024,
 'training_scales': [1.0]}


In [3]:
from efficientnet_pytorch import EfficientNet
from collections import OrderedDict

backbone = EfficientNet.from_pretrained(conf.encoder)
del backbone._fc

preprocess_fn = smp.encoders.get_preprocessing_fn(conf.encoder)

class DamageModel(nn.Module):
    
    def __init__(self, backbone, nclasses):
        super().__init__()
        self.backbone = backbone
        self.head = nn.Conv2d(backbone._conv_head.out_channels, conf.nclasses, kernel_size=1)    
    
    def forward(self, x):
        return self.head(self.backbone.extract_features(x))

model = DamageModel(backbone, conf.nclasses)
model.cuda();

Loaded pretrained weights for efficientnet-b3


In [4]:
train_dataset, train_loader = io.load_training_data(conf, preprocess_fn)
dev_dataset, dev_loader = io.load_dev_data(conf, preprocess_fn)

100%|██████████| 2519/2519 [00:14<00:00, 176.03it/s]
100%|██████████| 280/280 [00:01<00:00, 190.99it/s]


In [5]:
print(f"n_train: {len(train_dataset)}")
print(f"n_dev: {len(dev_dataset)}")

n_train: 2017
n_dev: 224


In [6]:
#loss = WeightedLoss({loss_dict[l](): w for l, w in conf.loss_weights.items()})

weights = torch.Tensor(conf.class_weight).float().cuda()
loss_fn = nn.CrossEntropyLoss(weights, reduction=conf.loss_reduce_mode, ignore_index=-1)

optims = {
    'adam': torch.optim.Adam,
    'sgd': torch.optim.SGD
}

optim = optims[conf.optim](model.parameters(), lr=conf.lr)


model, optim = amp.initialize(model, optim, opt_level=conf.amp_opt_level)

Selected optimization level O1:  Insert automatic casts around Pytorch functions and Tensor methods.

Defaults for this optimization level are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic
Processing user overrides (additional kwargs that are not None)...
After processing overrides, optimization options are:
enabled                : True
opt_level              : O1
cast_model_type        : None
patch_torch_functions  : True
keep_batchnorm_fp32    : None
master_weights         : None
loss_scale             : dynamic


In [7]:
scale = 1/32

In [39]:
@torch.no_grad()
def resize_heatmap(damage, damage_mask=None, scale=1/32):
    if damage_mask is not None:
        dmg_msk_dtype = damage_mask.dtype
        damage_mask = misc_nn_ops.interpolate(damage_mask[None].float(), scale_factor=scale)[0].to(dmg_msk_dtype)
    dmg_dtype = damage.dtype
    damage_one_hot = torch.nn.functional.one_hot(damage).permute(0, 3, 1, 2)
    damage = misc_nn_ops.interpolate(damage_one_hot.float(), scale_factor=scale).argmax(1)
    return damage, damage_mask

In [9]:
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optim, factor=conf.scheduler_factor, patience=conf.scheduler_patience
)

train_resize = run.MultiScaleResize(conf.mode, conf.training_scales)

best_score = 0
epoch = 0

In [10]:
def train(model, optim, data, loss_fn, train_resize):
    model = model.train()
    loss_sum = 0.
    for image, mask in tqdm(iter(data)):
        if train_resize:
            image, mask = train_resize((image, mask))
        optim.zero_grad()
        outputs = model(image.to('cuda'))
        _, nclasses, _, _ = outputs.shape
        mb, d_mask = mask
        mb, d_mask = resize_heatmap(d_mask.cuda(),mb.cuda(), scale)
        d_mask[mb] = -1
        loss = loss_fn(outputs, d_mask)
        with amp.scale_loss(loss, optim) as scaled_loss:
            scaled_loss.backward()
        optim.step()
        loss_sum += loss
    return {'train:loss':loss_sum.detach()/len(data)}

In [11]:
metrics = {'epoch': 0}
train_metrics = train(model, optim, train_loader, loss_fn, train_resize=train_resize)
metrics.update(train_metrics)

100%|██████████| 337/337 [05:04<00:00,  1.11it/s]


In [44]:
%pdb

Automatic pdb calling has been turned ON


In [57]:
from collections import defaultdict
from xv.submission_metrics import RowPairCalculator
from xv.run import get_metrics_for_counts
import scipy

@torch.no_grad()
def evaluate(model, data, loss_fn, threshold=0.5, nclasses=4):
    model = model.eval()
    metrics = {}
    loss=0.
    tps, fps, fns = defaultdict(float), defaultdict(float), defaultdict(float)
    for image, mask in tqdm(iter(data)):
        outputs = model(image.cuda())
        mask_bool, d_mask = mask
        
        d_mask_down, mb_down = resize_heatmap(d_mask.cuda(), mask_bool.cuda(), scale)
        d_mask_down[mb_down] = -1
        loss = loss_fn(outputs, d_mask_down)
        
        output_big, _ = resize_heatmap(outputs.argmax(1), damage_mask=None, scale=1/scale)
        output_big = output_big.float().cpu().numpy()
        
        flat_output, flat_target = output_big[mask_bool], d_mask[mask_bool].cpu().numpy()
        
        for ix in range(nclasses):                
            tp, fn, fp = RowPairCalculator.compute_tp_fn_fp(flat_output, flat_target, ix)
            tps[ix] += tp
            fps[ix] += fp
            fns[ix] += fn

    metrics['loss'] = loss / len(data)
    
    aggregate = defaultdict(list)
    for ix in range(nclasses):
        categorical_ix_metrics =  get_metrics_for_counts(tps[ix], fps[ix], fns[ix])
        for k,v in categorical_ix_metrics.items():
            metrics[f'damage:categorical:{ix}:{k}'] = v
            aggregate[f'damage:categorical:{k}'].append(v)
    hmean = {f'hmean:{k}': scipy.stats.hmean(v) for k,v in aggregate.items()}
    metrics.update(hmean)
    
    mean = {f'mean:{k}':scipy.mean(v) for k,v in aggregate.items()}
    metrics.update(mean)

    return metrics

In [58]:
evaluate(model, dev_loader, loss_fn, threshold=0.5, nclasses=4)

100%|██████████| 38/38 [00:13<00:00,  2.72it/s]


ValueError: Harmonic mean only defined if all elements greater than zero

> [0;32m/opt/anaconda3/lib/python3.7/site-packages/scipy/stats/stats.py[0m(398)[0;36mhmean[0;34m()[0m
[0;32m    396 [0;31m        [0;32mreturn[0m [0msize[0m [0;34m/[0m [0mnp[0m[0;34m.[0m[0msum[0m[0;34m([0m[0;36m1.0[0m [0;34m/[0m [0ma[0m[0;34m,[0m [0maxis[0m[0;34m=[0m[0maxis[0m[0;34m,[0m [0mdtype[0m[0;34m=[0m[0mdtype[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    397 [0;31m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 398 [0;31m        raise ValueError("Harmonic mean only defined if all elements greater "
[0m[0;32m    399 [0;31m                         "than zero")
[0m[0;32m    400 [0;31m[0;34m[0m[0m
[0m


ipdb>  q


In [12]:
for epoch in range(epoch, conf.epochs):
    print(f"epoch {epoch}/{conf.epochs}.")
    torch.save(optim.state_dict(), os.path.join(wandb.run.dir, "optim.pth"))
    torch.save(scheduler.state_dict(), os.path.join(wandb.run.dir, "scheduler.pth"))
    metrics = {'epoch': epoch}
    train_metrics = train_fn(model, optim, train_loader, loss, train_resize=train_resize, mode=conf.mode)
    metrics.update(train_metrics)

    dev_metrics = eval_fn(model, dev_loader, loss, mode=conf.mode)
    metrics.update(dev_metrics)
    
    """
    if conf.mode != "dual":
        examples = run.sample_masks(model, dev_dataset.instances, preprocess_fn, n=1)
        metrics['examples'] = [wandb.Image(im, caption=f'mask:{ix}') for e in examples for ix, im in enumerate(e)]
    """
    
    wandb.log(metrics)
    #scheduler.step(metrics['loss'])
    scheduler.step()
    score = metrics[conf.metric]
    pprint(metrics)
    if score > best_score:
        torch.save(model.state_dict(), os.path.join(wandb.run.dir, "state_dict.pth"))
        best_score = score

epoch 0/100.


NameError: name 'train_fn' is not defined