# Model's training analysis

In [2]:
from pathlib import Path

import sys
sys.path.insert(0, (Path(".").resolve().parent / "common").as_posix())

In [4]:
INPUT_PATH = Path(".").resolve().parent / "input/"

Train/overfit config

In [6]:
import torch
import torch.nn as nn
from torch.optim import SGD
from torch.optim.lr_scheduler import MultiStepLR

import matplotlib
matplotlib.use('Agg')

from albumentations import Compose, RandomCrop, RandomCropNearBBox, ShiftScaleRotate, GaussNoise, ElasticTransform
from albumentations import CenterCrop
from albumentations.pytorch import ToTensor

from dataflow.dataloaders import get_base_train_val_loaders_by_fold, HPADataset
from models.resnet import HPAResNet50

from custom_ignite.metrics.accuracy import Accuracy
from custom_ignite.metrics.precision import Precision
from custom_ignite.metrics.recall import Recall


seed = 12
device = "cuda"
debug = True

val_fold_index = 0


train_transforms = Compose([
    # ShiftScaleRotate(shift_limit=0.2, scale_limit=0.01, rotate_limit=15, interpolation=cv2.INTER_CUBIC, p=0.3),
    # ElasticTransform(p=0.3),
    CenterCrop(256, 256),
    ToTensor()
])
train_transform_fn = lambda dp: train_transforms(**{"image": dp[0], "tags": dp[1].astype('float32')})


val_transforms = Compose([
    CenterCrop(250, 250),
    ToTensor()
])
val_transform_fn = lambda dp: val_transforms(**{"image": dp[0], "tags": dp[1].astype('float32')})


batch_size = 5
train_loader, val_loader = get_base_train_val_loaders_by_fold(INPUT_PATH, train_transform_fn, val_transform_fn,
                                                              batch_size=batch_size, num_workers=8,
                                                              fold_index=val_fold_index, n_splits=3,
                                                              random_state=seed,
                                                              limit_train_num_samples=5,
                                                              limit_val_num_samples=5)

model = HPAResNet50(num_classes=HPADataset.num_tags)


# Training
criterion = nn.BCEWithLogitsLoss(reduction='mean')
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.5)

# Optional config param
# lr_scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader) * 10, eta_min=1e-5)
lr_scheduler = MultiStepLR(optimizer, milestones=[30, 60, 80], gamma=0.1)

num_epochs = 100


def thresholded_output_transform(output):
    y_pred, y = output
    y_pred = torch.round(torch.sigmoid(y_pred))
    return y_pred, y


# Optional config param
metrics = {
    "precision": Precision(output_transform=thresholded_output_transform,
                           average=True, is_multilabel=True),
    "recall": Recall(output_transform=thresholded_output_transform,
                     average=True, is_multilabel=True),
    "accuracy": Accuracy(output_transform=thresholded_output_transform,
                         is_multilabel=True)
}

log_interval = 10
val_interval_epochs = 1
val_metrics = metrics

trainer_checkpoint_interval = 1000


In [15]:
model.cuda()
_ = model.train()

Training loop

In [None]:
optimizer.zero_grad()

In [7]:
for batch in train_loader:
    break

In [16]:
x = batch['image'].cuda()
y = batch['tags'].cuda()

In [18]:
x.type(), x.shape, x.min(), x.max()

('torch.cuda.FloatTensor',
 torch.Size([5, 4, 256, 256]),
 tensor(0., device='cuda:0'),
 tensor(1., device='cuda:0'))

In [19]:
y.type(), y.shape, y

('torch.cuda.FloatTensor',
 torch.Size([5, 28]),
 tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 1., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 1., 0., 0., 0.]], device='cuda:0'))

In [20]:
y_pred = model(x)

In [22]:
y_pred.type(), y_pred.shape, y_pred

('torch.cuda.FloatTensor',
 torch.Size([5, 28]),
 tensor([[ 5.1568e-01, -6.6601e-01,  5.0193e-01, -3.3513e-01, -8.4777e-01,
           1.5676e+00,  4.1978e-02,  3.5155e-01, -7.3856e-01, -4.2269e-02,
           1.0408e-01,  4.6202e-01, -1.4127e-01,  1.3753e-01,  2.8624e-02,
          -5.8597e-01,  6.2380e-03, -1.1810e+00,  2.8056e-01, -5.4474e-01,
          -3.3011e-01,  4.6405e-02, -8.7605e-01, -1.2937e+00, -4.7177e-01,
           2.3426e-01, -2.0903e-01,  1.3883e+00],
         [-5.6634e-01, -1.8244e-01,  3.3004e-01, -1.8493e-01, -4.8620e-01,
           1.2350e+00, -1.2161e-01,  1.3512e-01, -1.2992e-01,  1.0296e-01,
          -2.5559e-01, -4.2026e-02, -8.0712e-02, -1.9398e-01,  5.0062e-01,
          -1.4312e-01, -2.4352e-01, -8.8529e-01,  1.4527e-01, -2.6481e-01,
          -4.2727e-01,  2.0227e-01, -7.9223e-01, -6.2631e-01, -2.5884e-01,
          -2.2482e-01, -3.9603e-02,  9.5804e-01],
         [-3.9562e-01, -2.2092e-01,  2.4799e-01, -2.4404e-01, -4.6749e-01,
           1.2710e+00, -1.

In [23]:
loss = criterion(y_pred, y)

In [25]:
loss.backward()

In [26]:
optimizer.step()

In [48]:
import torch.nn as nn

In [49]:
criterion = nn.MultiLabelMarginLoss()

In [54]:
loss = criterion(y_pred, y.type(torch.long))

In [55]:
loss

tensor(25.1940, device='cuda:0', grad_fn=<MultilabelMarginLossBackward>)

In [5]:
import torch
import torch.nn as nn

In [10]:
in_planes = 512

net = nn.Sequential(
    nn.Conv2d(in_planes, 256, kernel_size=3, stride=1, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(inplace=True),
    nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(inplace=True),
    nn.ConvTranspose2d(256, 256, kernel_size=3, stride=2, padding=1, output_padding=1)
)


In [12]:
x = torch.rand((8, 512, 40, 40))

y = net(x)
y.shape

torch.Size([8, 256, 80, 80])