In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2 

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device('cuda')
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = pyramid_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)]))
# do data augumentation on train dataset

validation_dataset = pyramid_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
'''
deeplab = DeepLab_ELU(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)
deeplab = deeplab.to(device=device, dtype=dtype)
#shape_test(icnet1, True)
# create the model, by default model type is float, use model.double(), model.float() to convert
# move the model to desirable device

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
epoch = 0
'''

# create an optimizer object
# note that only the model_2 params and model_4 params will be optimized by optimizer

"\ndeeplab = DeepLab_ELU(output_stride=16)\ndeeplab = nn.DataParallel(deeplab)\ndeeplab = convert_model(deeplab)\ndeeplab = deeplab.to(device=device, dtype=dtype)\n#shape_test(icnet1, True)\n# create the model, by default model type is float, use model.double(), model.float() to convert\n# move the model to desirable device\n\noptimizer = optim.Adam(deeplab.parameters(), lr=1e-2)\nscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)\nepoch = 0\n"

In [None]:

deeplab = DeepLab_ELU(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

#checkpoint = torch.load('../deeplab_save/2019-07-29 04:00:14.630172.pth') # second best
#checkpoint = torch.load('../deeplab_save/2019-07-28 23:47:36.279119.pth') # second best
#checkpoint = torch.load('../deeplab_save/2019-07-29 00:15:49.271222.pth') # best
#checkpoint = torch.load('../deeplab_save/2019-07-29 00:44:11.825872.pth')
checkpoint = torch.load('../deeplab_output_16_elu_save/2019-08-20 16:38:56.588017 epoch: 336.pth') # latest one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)
optimizer.load_state_dict(checkpoint['optimizer'])

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=25)
scheduler.load_state_dict(checkpoint['scheduler'])

epoch = checkpoint['epoch']
print(epoch)
for param_group in optimizer.param_groups:
    print(param_group['lr'])



336
0.01


In [None]:
epochs = 5000

min_val = 1

record = open('train_deeplab_output_16_elu.txt','a+')

logger = {'train':[], 'validation_1': []}

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches

    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
    
        deeplab.train()
        # Set the model flag to train
        # 1. enable dropout
        # 2. batchnorm behave differently in train and test
        
        image_1 = batch['image1_data'].to(device=device, dtype=dtype)
        label_1 = batch['image1_label'].to(device=device, dtype=dtype)
        # move data to device, convert dtype to desirable dtype
        
        out_1 = deeplab(image_1)
        # do the inference

        loss_1 = dice_loss_3(out_1, label_1)
        # calculate loss
        
        epoch_loss += loss_1.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss_1.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        #scheduler.step(loss_1)
        # take a gradient descent step
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()
    
    if (e <= 150 and e%5 == 0) or (e > 150 and e%1 == 0):
        deeplab.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient

            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0

            for v, vbatch in enumerate(validation_loader):
            # iter over validation mini batches

                image_1_val = vbatch['image1_data'].to(device=device, dtype=dtype)
                if get_dimensions(image_1_val) == 4:
                    image_1_val.unsqueeze_(0)
                label_1_val = vbatch['image1_label'].to(device=device, dtype=dtype)
                if get_dimensions(label_1_val) == 4:
                    label_1_val.unsqueeze_(0)
                # move data to device, convert dtype to desirable dtype
                # add one dimension to labels if they are 4D tensors

                out_1_val = deeplab(image_1_val)
                # do the inference

                loss_1 = dice_loss_3(out_1_val, label_1_val)
                # calculate loss

                valloss_1 += loss_1.item()
                # record mini batch loss

            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'

            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)

            print(outstr)
            record.write(outstr)
            record.flush()

            if avg_val_loss < min_val:
                print(avg_val_loss, "less than", min_val)
                min_val = avg_val_loss
                save_1('deeplab_output_16_elu_save', deeplab, optimizer, logger, e, scheduler)
            elif e%10 == 0:
                save_1('deeplab_output_16_elu_save', deeplab, optimizer, logger, e, scheduler)

record.close()

  0%|          | 0/4663 [00:00<?, ?it/s]

Epoch 337 finished ! Training Loss: 0.0863

------- 1st valloss=0.0733

0.07334133820689243 less than 1


  0%|          | 1/4663 [14:11<1103:15:19, 851.93s/it]

Checkpoint 337 saved !
Epoch 338 finished ! Training Loss: 0.0912



  0%|          | 2/4663 [26:57<1069:17:40, 825.89s/it]

------- 1st valloss=0.0827

Epoch 339 finished ! Training Loss: 0.0946



  0%|          | 3/4663 [39:47<1047:40:09, 809.36s/it]

------- 1st valloss=0.1474

Epoch 340 finished ! Training Loss: 0.0899

------- 1st valloss=0.1136



  0%|          | 4/4663 [52:43<1034:25:46, 799.30s/it]

Checkpoint 340 saved !
Epoch 341 finished ! Training Loss: 0.0898

------- 1st valloss=0.0704

0.0703579051339108 less than 0.07334133820689243


  0%|          | 5/4663 [1:05:40<1025:35:12, 792.64s/it]

Checkpoint 341 saved !
Epoch 342 finished ! Training Loss: 0.0860



  0%|          | 6/4663 [1:18:38<1019:41:00, 788.25s/it]

------- 1st valloss=0.0801

Epoch 343 finished ! Training Loss: 0.0818



  0%|          | 7/4663 [1:31:40<1016:48:51, 786.20s/it]

------- 1st valloss=0.0937

Epoch 344 finished ! Training Loss: 0.0843



  0%|          | 8/4663 [1:44:32<1011:16:07, 782.08s/it]

------- 1st valloss=0.1164

Epoch 345 finished ! Training Loss: 0.0803



  0%|          | 9/4663 [1:57:38<1012:35:17, 783.27s/it]

------- 1st valloss=0.0719

Epoch 346 finished ! Training Loss: 0.0794



  0%|          | 10/4663 [2:10:27<1006:43:45, 778.90s/it]

------- 1st valloss=0.0950

Epoch 347 finished ! Training Loss: 0.0747

------- 1st valloss=0.0677

0.06771823294136835 less than 0.0703579051339108


  0%|          | 11/4663 [2:23:24<1005:39:30, 778.24s/it]

Checkpoint 347 saved !
Epoch 348 finished ! Training Loss: 0.0769

------- 1st valloss=0.0674

0.06739170865520187 less than 0.06771823294136835


  0%|          | 12/4663 [2:36:19<1004:23:58, 777.43s/it]

Checkpoint 348 saved !
Epoch 349 finished ! Training Loss: 0.0780



  0%|          | 13/4663 [2:49:28<1008:41:16, 780.92s/it]

------- 1st valloss=0.0766

Epoch 350 finished ! Training Loss: 0.0784

------- 1st valloss=0.0976



  0%|          | 14/4663 [3:02:23<1006:13:46, 779.18s/it]

Checkpoint 350 saved !
Epoch 351 finished ! Training Loss: 0.0749

------- 1st valloss=0.0654

0.06536944971784302 less than 0.06739170865520187


  0%|          | 15/4663 [3:15:23<1006:17:05, 779.39s/it]

Checkpoint 351 saved !
Epoch 352 finished ! Training Loss: 0.0754

------- 1st valloss=0.0633

0.0633393673469191 less than 0.06536944971784302


  0%|          | 16/4663 [3:28:31<1009:17:59, 781.90s/it]

Checkpoint 352 saved !
Epoch 353 finished ! Training Loss: 0.0714



  0%|          | 17/4663 [3:41:34<1009:36:44, 782.31s/it]

------- 1st valloss=0.0650

Epoch 354 finished ! Training Loss: 0.0766



  0%|          | 18/4663 [3:54:30<1006:43:13, 780.24s/it]

------- 1st valloss=0.0738

Epoch 355 finished ! Training Loss: 0.0770



  0%|          | 19/4663 [4:07:18<1001:57:32, 776.71s/it]

------- 1st valloss=0.0684

Epoch 356 finished ! Training Loss: 0.0780



  0%|          | 20/4663 [4:20:18<1003:06:04, 777.77s/it]

------- 1st valloss=0.0680

Epoch 357 finished ! Training Loss: 0.0732



  0%|          | 21/4663 [4:33:21<1004:44:49, 779.21s/it]

------- 1st valloss=0.0762

Epoch 358 finished ! Training Loss: 0.0743



  0%|          | 22/4663 [4:46:21<1004:50:47, 779.45s/it]

------- 1st valloss=0.0662

Epoch 359 finished ! Training Loss: 0.0752



  0%|          | 23/4663 [4:59:27<1007:04:29, 781.35s/it]

------- 1st valloss=0.0662

Epoch 360 finished ! Training Loss: 0.0726

------- 1st valloss=0.0700



  1%|          | 24/4663 [5:12:28<1006:49:26, 781.32s/it]

Checkpoint 360 saved !
Epoch 361 finished ! Training Loss: 0.0720



  1%|          | 25/4663 [5:25:30<1006:50:52, 781.51s/it]

------- 1st valloss=0.0922

Epoch 362 finished ! Training Loss: 0.0728



  1%|          | 26/4663 [5:38:27<1004:47:51, 780.09s/it]

------- 1st valloss=0.0664

Epoch 363 finished ! Training Loss: 0.0714



  1%|          | 27/4663 [5:51:23<1002:56:17, 778.81s/it]

------- 1st valloss=0.0637

Epoch 364 finished ! Training Loss: 0.0708

------- 1st valloss=0.0617

0.061747847529857056 less than 0.0633393673469191


  1%|          | 28/4663 [6:04:12<999:14:11, 776.11s/it] 

Checkpoint 364 saved !
Epoch 365 finished ! Training Loss: 0.0731



  1%|          | 29/4663 [6:17:06<998:02:39, 775.35s/it]

------- 1st valloss=0.0667

Epoch 366 finished ! Training Loss: 0.0726



  1%|          | 30/4663 [6:30:05<999:05:42, 776.33s/it]

------- 1st valloss=0.0666

Epoch 367 finished ! Training Loss: 0.0728



  1%|          | 31/4663 [6:43:06<1000:43:41, 777.77s/it]

------- 1st valloss=0.0716

Epoch 368 finished ! Training Loss: 0.0712



  1%|          | 32/4663 [6:56:03<1000:15:06, 777.57s/it]

------- 1st valloss=0.0737

Epoch 369 finished ! Training Loss: 0.0728



  1%|          | 33/4663 [7:09:09<1003:25:44, 780.20s/it]

------- 1st valloss=0.0658

Epoch 370 finished ! Training Loss: 0.0715

------- 1st valloss=0.1368



  1%|          | 34/4663 [7:22:03<1000:46:19, 778.31s/it]

Checkpoint 370 saved !
Epoch 371 finished ! Training Loss: 0.0701



  1%|          | 35/4663 [7:34:54<997:42:33, 776.09s/it] 

------- 1st valloss=0.0864

Epoch 372 finished ! Training Loss: 0.0720



  1%|          | 36/4663 [7:47:46<995:52:41, 774.84s/it]

------- 1st valloss=0.0726

Epoch 373 finished ! Training Loss: 0.0731



  1%|          | 37/4663 [8:00:59<1002:43:15, 780.33s/it]

------- 1st valloss=0.0649

Epoch 374 finished ! Training Loss: 0.0706



  1%|          | 38/4663 [8:13:55<1000:49:00, 779.01s/it]

------- 1st valloss=0.0628

Epoch 375 finished ! Training Loss: 0.0700



  1%|          | 39/4663 [8:26:58<1002:14:15, 780.29s/it]

------- 1st valloss=0.0787

Epoch 376 finished ! Training Loss: 0.0947



  1%|          | 40/4663 [8:40:06<1005:00:36, 782.62s/it]

------- 1st valloss=0.1342

Epoch 377 finished ! Training Loss: 0.0962



  1%|          | 41/4663 [8:53:07<1004:05:25, 782.07s/it]

------- 1st valloss=0.0847

Epoch 378 finished ! Training Loss: 0.0818



  1%|          | 42/4663 [9:06:17<1006:55:41, 784.45s/it]

------- 1st valloss=0.0698

Epoch 379 finished ! Training Loss: 0.0851



  1%|          | 43/4663 [9:19:13<1003:24:33, 781.88s/it]

------- 1st valloss=0.0764

Epoch 380 finished ! Training Loss: 0.0789

------- 1st valloss=0.0658



  1%|          | 44/4663 [9:31:54<995:23:22, 775.80s/it] 

Checkpoint 380 saved !
Epoch 381 finished ! Training Loss: 0.0760



  1%|          | 45/4663 [9:44:48<994:16:36, 775.10s/it]

------- 1st valloss=0.0809

Epoch 382 finished ! Training Loss: 0.0746



  1%|          | 46/4663 [9:57:41<993:11:06, 774.41s/it]

------- 1st valloss=0.1022

Epoch 383 finished ! Training Loss: 0.0718



  1%|          | 47/4663 [10:10:27<989:48:03, 771.94s/it]

------- 1st valloss=0.0674

Epoch 384 finished ! Training Loss: 0.0721



  1%|          | 48/4663 [10:23:31<994:25:32, 775.72s/it]

------- 1st valloss=0.1034

Epoch 385 finished ! Training Loss: 0.0708



  1%|          | 49/4663 [10:36:29<994:56:22, 776.29s/it]

------- 1st valloss=0.1571

Epoch 386 finished ! Training Loss: 0.0792



  1%|          | 50/4663 [10:49:27<995:12:00, 776.66s/it]

------- 1st valloss=0.0664

Epoch 387 finished ! Training Loss: 0.0728



  1%|          | 51/4663 [11:02:17<992:32:45, 774.75s/it]

------- 1st valloss=0.0713

Epoch 388 finished ! Training Loss: 0.0747



  1%|          | 52/4663 [11:15:10<991:34:15, 774.16s/it]

------- 1st valloss=0.0644

Epoch 389 finished ! Training Loss: 0.0722



  1%|          | 53/4663 [11:27:54<987:37:14, 771.24s/it]

------- 1st valloss=0.1113

Epoch 390 finished ! Training Loss: 0.0695

------- 1st valloss=0.0671



  1%|          | 54/4663 [11:40:58<992:05:18, 774.90s/it]

Checkpoint 390 saved !
Epoch 391 finished ! Training Loss: 0.0705



  1%|          | 55/4663 [11:54:03<995:50:03, 778.00s/it]

------- 1st valloss=0.0700

Epoch 392 finished ! Training Loss: 0.0693



  1%|          | 56/4663 [12:07:14<1000:32:36, 781.84s/it]

------- 1st valloss=0.0648

Epoch 393 finished ! Training Loss: 0.0705



  1%|          | 57/4663 [12:20:08<997:23:50, 779.56s/it] 

------- 1st valloss=0.0667

Epoch 394 finished ! Training Loss: 0.0684



  1%|          | 58/4663 [12:33:07<996:56:09, 779.36s/it]

------- 1st valloss=0.0634

Epoch 395 finished ! Training Loss: 0.0690



  1%|▏         | 59/4663 [12:46:24<1003:40:06, 784.80s/it]

------- 1st valloss=0.0717

Epoch 396 finished ! Training Loss: 0.0706



  1%|▏         | 60/4663 [12:59:19<999:26:59, 781.67s/it] 

------- 1st valloss=0.0619

Epoch 397 finished ! Training Loss: 0.0697



  1%|▏         | 61/4663 [13:12:19<998:45:10, 781.29s/it]

------- 1st valloss=0.0642

Epoch 398 finished ! Training Loss: 0.0670

------- 1st valloss=0.0613

0.06129830476382504 less than 0.061747847529857056


  1%|▏         | 62/4663 [13:25:33<1003:28:41, 785.16s/it]

Checkpoint 398 saved !
Epoch 399 finished ! Training Loss: 0.0677



  1%|▏         | 63/4663 [13:38:37<1002:41:39, 784.72s/it]

------- 1st valloss=0.0642

Epoch 400 finished ! Training Loss: 0.0682

------- 1st valloss=0.0659



  1%|▏         | 64/4663 [13:51:25<995:58:10, 779.62s/it] 

Checkpoint 400 saved !
Epoch 401 finished ! Training Loss: 0.0683



  1%|▏         | 65/4663 [14:04:26<996:34:20, 780.27s/it]

------- 1st valloss=0.0630

Epoch 402 finished ! Training Loss: 0.0684



  1%|▏         | 66/4663 [14:17:24<995:26:06, 779.54s/it]

------- 1st valloss=0.0969

Epoch 403 finished ! Training Loss: 0.0684

------- 1st valloss=0.0600

0.06002031590627587 less than 0.06129830476382504


  1%|▏         | 67/4663 [14:30:24<995:10:01, 779.50s/it]

Checkpoint 403 saved !
Epoch 404 finished ! Training Loss: 0.0670



  1%|▏         | 68/4663 [14:43:24<995:26:55, 779.89s/it]

------- 1st valloss=0.0649

Epoch 405 finished ! Training Loss: 0.0678



  1%|▏         | 69/4663 [14:56:27<996:17:55, 780.73s/it]

------- 1st valloss=0.1450

Epoch 406 finished ! Training Loss: 0.0732



  2%|▏         | 70/4663 [15:09:34<998:16:27, 782.45s/it]

------- 1st valloss=0.0868

Epoch 407 finished ! Training Loss: 0.0772



  2%|▏         | 71/4663 [15:22:26<994:15:16, 779.47s/it]

------- 1st valloss=0.0639

Epoch 408 finished ! Training Loss: 0.0720



  2%|▏         | 72/4663 [15:35:30<995:48:23, 780.85s/it]

------- 1st valloss=0.1163

Epoch 409 finished ! Training Loss: 0.0919



  2%|▏         | 73/4663 [15:48:21<991:36:11, 777.73s/it]

------- 1st valloss=0.0704

Epoch 410 finished ! Training Loss: 0.0735

------- 1st valloss=0.0652



  2%|▏         | 74/4663 [16:01:29<995:18:22, 780.80s/it]

Checkpoint 410 saved !
Epoch 411 finished ! Training Loss: 0.0783



  2%|▏         | 75/4663 [16:14:38<998:33:33, 783.53s/it]

------- 1st valloss=0.1064

Epoch 412 finished ! Training Loss: 0.0774



  2%|▏         | 76/4663 [16:27:32<994:20:14, 780.38s/it]

------- 1st valloss=0.0957

Epoch 413 finished ! Training Loss: 0.0740



  2%|▏         | 77/4663 [16:40:29<992:50:25, 779.38s/it]

------- 1st valloss=0.0721

Epoch 414 finished ! Training Loss: 0.0749



  2%|▏         | 78/4663 [16:53:35<995:08:30, 781.35s/it]

------- 1st valloss=0.0769

Epoch 415 finished ! Training Loss: 0.0757



  2%|▏         | 79/4663 [17:06:33<994:00:19, 780.63s/it]

------- 1st valloss=0.0645

Epoch 416 finished ! Training Loss: 0.0703



  2%|▏         | 80/4663 [17:19:30<992:21:24, 779.51s/it]

------- 1st valloss=0.1004

Epoch 417 finished ! Training Loss: 0.0759



  2%|▏         | 81/4663 [17:32:32<993:04:55, 780.25s/it]

------- 1st valloss=0.0759

Epoch 418 finished ! Training Loss: 0.0745



  2%|▏         | 82/4663 [17:45:40<995:37:04, 782.41s/it]

------- 1st valloss=0.0681

Epoch 419 finished ! Training Loss: 0.0741



  2%|▏         | 83/4663 [17:58:33<991:44:47, 779.54s/it]

------- 1st valloss=0.0966

Epoch 420 finished ! Training Loss: 0.0706

------- 1st valloss=0.0775



  2%|▏         | 84/4663 [18:11:28<989:45:39, 778.15s/it]

Checkpoint 420 saved !
Epoch 421 finished ! Training Loss: 0.0712



  2%|▏         | 85/4663 [18:24:28<990:14:55, 778.70s/it]

------- 1st valloss=0.0666

Epoch 422 finished ! Training Loss: 0.0728



  2%|▏         | 86/4663 [18:37:36<993:43:56, 781.61s/it]

------- 1st valloss=0.0679

Epoch 423 finished ! Training Loss: 0.0692



  2%|▏         | 87/4663 [18:50:46<996:44:46, 784.15s/it]

------- 1st valloss=0.0684

Epoch 424 finished ! Training Loss: 0.0685



  2%|▏         | 88/4663 [19:03:54<998:09:57, 785.44s/it]

------- 1st valloss=0.0687

Epoch 425 finished ! Training Loss: 0.0675



  2%|▏         | 89/4663 [19:16:52<994:54:12, 783.05s/it]

------- 1st valloss=0.0664

Epoch 426 finished ! Training Loss: 0.0672



  2%|▏         | 90/4663 [19:29:46<991:15:46, 780.35s/it]

------- 1st valloss=0.0657

Epoch 427 finished ! Training Loss: 0.0662



  2%|▏         | 91/4663 [19:42:51<992:41:09, 781.64s/it]

------- 1st valloss=0.0646

Epoch 428 finished ! Training Loss: 0.0662



  2%|▏         | 92/4663 [19:55:56<993:55:05, 782.78s/it]

------- 1st valloss=0.0652

Epoch 429 finished ! Training Loss: 0.0666



  2%|▏         | 93/4663 [20:09:13<999:07:48, 787.06s/it]

------- 1st valloss=0.0676

Epoch 430 finished ! Training Loss: 0.0655

------- 1st valloss=0.0654



  2%|▏         | 94/4663 [20:22:20<998:53:22, 787.04s/it]

Checkpoint 430 saved !
Epoch 431 finished ! Training Loss: 0.0663



  2%|▏         | 95/4663 [20:35:34<1001:24:37, 789.20s/it]

------- 1st valloss=0.0814

Epoch 432 finished ! Training Loss: 0.0671



  2%|▏         | 96/4663 [20:48:26<994:34:13, 783.98s/it] 

------- 1st valloss=0.0641

Epoch 433 finished ! Training Loss: 0.0667



  2%|▏         | 97/4663 [21:01:29<993:53:05, 783.61s/it]

------- 1st valloss=0.0666

Epoch 434 finished ! Training Loss: 0.0654



  2%|▏         | 98/4663 [21:14:33<993:48:02, 783.72s/it]

------- 1st valloss=0.0649

Epoch 435 finished ! Training Loss: 0.0650

------- 1st valloss=0.0597

0.059688339738742165 less than 0.06002031590627587


  2%|▏         | 99/4663 [21:27:39<994:21:05, 784.33s/it]

Checkpoint 435 saved !
Epoch 436 finished ! Training Loss: 0.0661



  2%|▏         | 100/4663 [21:40:34<990:52:29, 781.76s/it]

------- 1st valloss=0.0621

Epoch 437 finished ! Training Loss: 0.0654



  2%|▏         | 101/4663 [21:53:30<988:25:25, 779.99s/it]

------- 1st valloss=0.0824

Epoch 438 finished ! Training Loss: 0.0655



  2%|▏         | 102/4663 [22:06:17<983:09:56, 776.01s/it]

------- 1st valloss=0.0644

Epoch 439 finished ! Training Loss: 0.0674



  2%|▏         | 103/4663 [22:19:18<984:57:45, 777.60s/it]

------- 1st valloss=0.5241

Epoch 440 finished ! Training Loss: 0.0707

------- 1st valloss=0.0652



  2%|▏         | 104/4663 [22:32:34<991:38:35, 783.05s/it]

Checkpoint 440 saved !
Epoch 441 finished ! Training Loss: 0.0688



  2%|▏         | 105/4663 [22:45:48<995:40:49, 786.41s/it]

------- 1st valloss=0.1106

Epoch 442 finished ! Training Loss: 0.0664



  2%|▏         | 106/4663 [22:58:52<994:15:36, 785.46s/it]

------- 1st valloss=0.0620

Epoch 443 finished ! Training Loss: 0.0941



  2%|▏         | 107/4663 [23:11:49<991:10:47, 783.20s/it]

------- 1st valloss=0.1591

Epoch 444 finished ! Training Loss: 0.1106



  2%|▏         | 108/4663 [23:24:49<989:44:45, 782.24s/it]

------- 1st valloss=0.0794

Epoch 445 finished ! Training Loss: 0.0833



  2%|▏         | 109/4663 [23:37:49<988:31:11, 781.44s/it]

------- 1st valloss=0.0678

Epoch 446 finished ! Training Loss: 0.0734



  2%|▏         | 110/4663 [23:50:46<986:43:02, 780.18s/it]

------- 1st valloss=0.0686

Epoch 447 finished ! Training Loss: 0.0748



  2%|▏         | 111/4663 [24:03:59<991:24:31, 784.07s/it]

------- 1st valloss=0.0668

Epoch 448 finished ! Training Loss: 0.0710



  2%|▏         | 112/4663 [24:16:46<984:35:38, 778.85s/it]

------- 1st valloss=0.0648

Epoch 449 finished ! Training Loss: 0.0701



  2%|▏         | 113/4663 [24:30:05<992:04:26, 784.94s/it]

------- 1st valloss=0.0742

Epoch 450 finished ! Training Loss: 0.0686

------- 1st valloss=0.0932



  2%|▏         | 114/4663 [24:43:04<989:39:06, 783.19s/it]

Checkpoint 450 saved !
Epoch 451 finished ! Training Loss: 0.0696



  2%|▏         | 115/4663 [24:56:07<989:05:12, 782.92s/it]

------- 1st valloss=0.0737

Epoch 452 finished ! Training Loss: 0.0689



  2%|▏         | 116/4663 [25:09:04<986:38:19, 781.15s/it]

------- 1st valloss=0.0607

Epoch 453 finished ! Training Loss: 0.0680



  3%|▎         | 117/4663 [25:22:01<985:07:46, 780.13s/it]

------- 1st valloss=0.0631

Epoch 454 finished ! Training Loss: 0.0711



  3%|▎         | 118/4663 [25:34:51<981:00:49, 777.04s/it]

------- 1st valloss=0.0660

Epoch 455 finished ! Training Loss: 0.0676



  3%|▎         | 119/4663 [25:47:55<983:28:41, 779.16s/it]

------- 1st valloss=0.0632

Epoch 456 finished ! Training Loss: 0.0668



  3%|▎         | 120/4663 [26:00:36<976:03:47, 773.46s/it]

------- 1st valloss=0.0656

Epoch 457 finished ! Training Loss: 0.0653



  3%|▎         | 121/4663 [26:13:47<982:44:25, 778.92s/it]

------- 1st valloss=0.0687

Epoch 458 finished ! Training Loss: 0.0672



  3%|▎         | 122/4663 [26:26:55<986:02:52, 781.72s/it]

------- 1st valloss=0.0665

Epoch 459 finished ! Training Loss: 0.0651



  3%|▎         | 123/4663 [26:39:39<978:59:57, 776.30s/it]

------- 1st valloss=0.0711

Epoch 460 finished ! Training Loss: 0.0655

------- 1st valloss=0.0668



  3%|▎         | 124/4663 [26:52:39<980:04:49, 777.33s/it]

Checkpoint 460 saved !
Epoch 461 finished ! Training Loss: 0.0654



  3%|▎         | 125/4663 [27:05:34<979:09:01, 776.76s/it]

------- 1st valloss=0.0637

Epoch 462 finished ! Training Loss: 0.0640



  3%|▎         | 126/4663 [27:18:38<981:43:13, 778.97s/it]

------- 1st valloss=0.0639

Epoch 463 finished ! Training Loss: 0.0644



  3%|▎         | 127/4663 [27:31:35<980:32:48, 778.21s/it]

------- 1st valloss=0.0678

Epoch 464 finished ! Training Loss: 0.0658



  3%|▎         | 128/4663 [27:44:25<977:16:29, 775.79s/it]

------- 1st valloss=0.0622

Epoch 465 finished ! Training Loss: 0.0646



  3%|▎         | 129/4663 [27:57:28<979:37:45, 777.83s/it]

------- 1st valloss=0.0640

Epoch 466 finished ! Training Loss: 0.0644



  3%|▎         | 130/4663 [28:10:35<983:12:29, 780.84s/it]

------- 1st valloss=0.0639

Epoch 467 finished ! Training Loss: 0.0641



  3%|▎         | 131/4663 [28:23:28<979:46:46, 778.29s/it]

------- 1st valloss=0.0675

Epoch 468 finished ! Training Loss: 0.0645



  3%|▎         | 132/4663 [28:36:36<983:27:51, 781.39s/it]

------- 1st valloss=0.0602

Epoch 469 finished ! Training Loss: 0.0639



  3%|▎         | 133/4663 [28:49:31<980:43:16, 779.38s/it]

------- 1st valloss=0.0905

Epoch 470 finished ! Training Loss: 0.0653

------- 1st valloss=0.0761



  3%|▎         | 134/4663 [29:02:29<980:00:14, 778.98s/it]

Checkpoint 470 saved !
Epoch 471 finished ! Training Loss: 0.0634



  3%|▎         | 135/4663 [29:15:22<977:34:09, 777.22s/it]

------- 1st valloss=0.0650

Epoch 472 finished ! Training Loss: 0.0632



  3%|▎         | 136/4663 [29:28:09<973:30:41, 774.16s/it]

------- 1st valloss=0.0643

Epoch 473 finished ! Training Loss: 0.0652



  3%|▎         | 137/4663 [29:41:16<978:04:31, 777.97s/it]

------- 1st valloss=0.0598

Epoch 474 finished ! Training Loss: 0.0644



  3%|▎         | 138/4663 [29:54:21<980:28:45, 780.05s/it]

------- 1st valloss=0.0904

Epoch 475 finished ! Training Loss: 0.0629



  3%|▎         | 139/4663 [30:07:28<982:57:39, 782.20s/it]

------- 1st valloss=0.0722

Epoch 476 finished ! Training Loss: 0.0640



  3%|▎         | 140/4663 [30:20:41<986:43:48, 785.37s/it]

------- 1st valloss=0.0619

Epoch 477 finished ! Training Loss: 0.0639



  3%|▎         | 141/4663 [30:33:35<982:16:09, 781.99s/it]

------- 1st valloss=0.0604

Epoch 478 finished ! Training Loss: 0.0646



  3%|▎         | 142/4663 [30:46:46<985:17:02, 784.57s/it]

------- 1st valloss=0.0624

Epoch 479 finished ! Training Loss: 0.0636



  3%|▎         | 143/4663 [30:59:50<984:48:16, 784.36s/it]

------- 1st valloss=0.0629

Epoch 480 finished ! Training Loss: 0.0644



In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
            # move data to device, convert dtype to desirable dtype

        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)