In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2 

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device('cuda')
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = pyramid_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)
                ]))
# do data augumentation on train dataset

validation_dataset = pyramid_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
'''
deeplab = DeepLab_ELU(output_stride=2)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)
deeplab = deeplab.to(device=device, dtype=dtype)
#shape_test(icnet1, True)
# create the model, by default model type is float, use model.double(), model.float() to convert
# move the model to desirable device

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=25)
epoch = 0

# create an optimizer object
# note that only the model_2 params and model_4 params will be optimized by optimizer
'''

"\ndeeplab = DeepLab_ELU(output_stride=2)\ndeeplab = nn.DataParallel(deeplab)\ndeeplab = convert_model(deeplab)\ndeeplab = deeplab.to(device=device, dtype=dtype)\n#shape_test(icnet1, True)\n# create the model, by default model type is float, use model.double(), model.float() to convert\n# move the model to desirable device\n\noptimizer = optim.Adam(deeplab.parameters(), lr=1e-2)\nscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=25)\nepoch = 0\n\n# create an optimizer object\n# note that only the model_2 params and model_4 params will be optimized by optimizer\n"

In [6]:

deeplab = DeepLab_ELU(output_stride=2)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

checkpoint = torch.load('../deeplab_output_2_elu_save/2019-08-21 20:02:58.787116 epoch: 450.pth') # latest one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)
#optimizer.load_state_dict(checkpoint['optimizer'])

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
scheduler.load_state_dict(checkpoint['scheduler'])

epoch = checkpoint['epoch']
print(epoch)
for param_group in optimizer.param_groups:
    print(param_group['lr'])


450
0.01


In [None]:
epochs = 5000

min_val = .07

record = open('train_deeplab_output_2_elu.txt','a+')

logger = {'train':[], 'validation_1': []}

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches

    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
    
        deeplab.train()
        # Set the model flag to train
        # 1. enable dropout
        # 2. batchnorm behave differently in train and test
        
        image_1 = batch['image1_data'].to(device=device, dtype=dtype)
        label_1 = batch['image1_label'].to(device=device, dtype=dtype)
        # move data to device, convert dtype to desirable dtype
        
        out_1 = deeplab(image_1)
        # do the inference

        loss_1 = dice_loss_3(out_1, label_1)
        # calculate loss
        
        epoch_loss += loss_1.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss_1.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        #scheduler.step(loss_1)
        # take a gradient descent step
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()
    
    if (e <= 150 and e % 5 == 0) or (e > 150 and e % 1 == 0):
        # do validation every 5 epoches

        deeplab.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient

            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0

            for v, vbatch in enumerate(validation_loader):
            # iter over validation mini batches

                image_1_val = vbatch['image1_data'].to(device=device, dtype=dtype)
                if get_dimensions(image_1_val) == 4:
                    image_1_val.unsqueeze_(0)
                label_1_val = vbatch['image1_label'].to(device=device, dtype=dtype)
                if get_dimensions(label_1_val) == 4:
                    label_1_val.unsqueeze_(0)
                # move data to device, convert dtype to desirable dtype
                # add one dimension to labels if they are 4D tensors

                out_1_val = deeplab(image_1_val)
                # do the inference

                loss_1 = dice_loss_3(out_1_val, label_1_val)
                # calculate loss

                valloss_1 += loss_1.item()
                # record mini batch loss

            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'

            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)

            print(outstr)
            record.write(outstr)
            record.flush()

            if avg_val_loss < min_val:
                print(avg_val_loss, "less than", min_val)
                min_val = avg_val_loss
                save_1('deeplab_output_2_elu_save', deeplab, optimizer, logger, e, scheduler)
            elif e%10 == 0:
                save_1('deeplab_output_2_elu_save', deeplab, optimizer, logger, e, scheduler)

record.close()

  0%|          | 0/4549 [00:00<?, ?it/s]

Epoch 451 finished ! Training Loss: 0.0803



  0%|          | 1/4549 [19:30<1478:36:28, 1170.40s/it]

------- 1st valloss=0.0746

Epoch 452 finished ! Training Loss: 0.0819



  0%|          | 2/4549 [37:40<1447:49:35, 1146.29s/it]

------- 1st valloss=0.0824

Epoch 453 finished ! Training Loss: 0.0830



  0%|          | 3/4549 [55:41<1422:58:01, 1126.85s/it]

------- 1st valloss=0.2967

Epoch 454 finished ! Training Loss: 0.0814



  0%|          | 4/4549 [1:13:40<1404:30:21, 1112.48s/it]

------- 1st valloss=0.0734

Epoch 455 finished ! Training Loss: 0.0768



  0%|          | 5/4549 [1:31:44<1393:26:59, 1103.97s/it]

------- 1st valloss=0.1055

Epoch 456 finished ! Training Loss: 0.0773

------- 1st valloss=0.0650

0.06501817152551982 less than 0.07


  0%|          | 6/4549 [1:49:55<1388:10:25, 1100.03s/it]

Checkpoint 456 saved !
Epoch 457 finished ! Training Loss: 0.0741



  0%|          | 7/4549 [2:07:54<1379:42:07, 1093.56s/it]

------- 1st valloss=0.1805

Epoch 458 finished ! Training Loss: 0.0835



  0%|          | 8/4549 [2:25:42<1369:53:42, 1086.02s/it]

------- 1st valloss=0.1025

Epoch 459 finished ! Training Loss: 0.0923



  0%|          | 9/4549 [2:43:42<1367:25:34, 1084.30s/it]

------- 1st valloss=0.1219

Epoch 460 finished ! Training Loss: 0.0778

------- 1st valloss=0.1208



  0%|          | 10/4549 [3:01:56<1370:40:08, 1087.11s/it]

Checkpoint 460 saved !
Epoch 461 finished ! Training Loss: 0.0900



  0%|          | 11/4549 [3:19:45<1363:20:07, 1081.54s/it]

------- 1st valloss=0.0819

Epoch 462 finished ! Training Loss: 0.0844



  0%|          | 12/4549 [3:38:09<1371:42:44, 1088.42s/it]

------- 1st valloss=0.0782

Epoch 463 finished ! Training Loss: 0.0886



  0%|          | 13/4549 [3:56:28<1375:28:31, 1091.65s/it]

------- 1st valloss=0.0832

Epoch 464 finished ! Training Loss: 0.0821



  0%|          | 14/4549 [4:14:26<1370:00:59, 1087.55s/it]

------- 1st valloss=0.1024

Epoch 465 finished ! Training Loss: 0.0767



  0%|          | 15/4549 [4:32:34<1369:46:24, 1087.60s/it]

------- 1st valloss=0.1481

Epoch 466 finished ! Training Loss: 0.0765



  0%|          | 16/4549 [4:50:48<1371:53:59, 1089.53s/it]

------- 1st valloss=0.1112

Epoch 467 finished ! Training Loss: 0.0844



  0%|          | 17/4549 [5:08:54<1370:10:39, 1088.40s/it]

------- 1st valloss=0.1395

Epoch 468 finished ! Training Loss: 0.0735



  0%|          | 18/4549 [5:27:07<1371:42:09, 1089.85s/it]

------- 1st valloss=0.0702

Epoch 469 finished ! Training Loss: 0.0773



  0%|          | 19/4549 [5:45:20<1372:26:50, 1090.69s/it]

------- 1st valloss=0.0769

Epoch 470 finished ! Training Loss: 0.0813

------- 1st valloss=0.0793



  0%|          | 20/4549 [6:03:24<1369:41:40, 1088.74s/it]

Checkpoint 470 saved !
Epoch 471 finished ! Training Loss: 0.0749



  0%|          | 21/4549 [6:21:30<1368:28:20, 1088.01s/it]

------- 1st valloss=0.0779

Epoch 472 finished ! Training Loss: 0.0849



  0%|          | 22/4549 [6:39:42<1369:34:51, 1089.13s/it]

------- 1st valloss=0.1395

Epoch 473 finished ! Training Loss: 0.0796



  1%|          | 23/4549 [6:57:49<1368:27:15, 1088.47s/it]

------- 1st valloss=0.0929

Epoch 474 finished ! Training Loss: 0.0759



  1%|          | 24/4549 [7:15:44<1362:55:11, 1084.31s/it]

------- 1st valloss=0.0956

Epoch 475 finished ! Training Loss: 0.0757



  1%|          | 25/4549 [7:33:44<1361:10:06, 1083.16s/it]

------- 1st valloss=0.0707

Epoch 476 finished ! Training Loss: 0.0881



  1%|          | 26/4549 [7:51:58<1364:49:38, 1086.31s/it]

------- 1st valloss=0.0895

Epoch 477 finished ! Training Loss: 0.0815



  1%|          | 27/4549 [8:10:12<1367:36:16, 1088.76s/it]

------- 1st valloss=0.0879

Epoch 478 finished ! Training Loss: 0.0798



  1%|          | 28/4549 [8:28:23<1368:04:31, 1089.38s/it]

------- 1st valloss=0.0739

Epoch 479 finished ! Training Loss: 0.0779



  1%|          | 29/4549 [8:46:31<1367:13:21, 1088.94s/it]

------- 1st valloss=0.0825

Epoch 480 finished ! Training Loss: 0.0791

------- 1st valloss=0.1117



  1%|          | 30/4549 [9:04:32<1363:57:00, 1086.57s/it]

Checkpoint 480 saved !
Epoch 481 finished ! Training Loss: 0.0859



  1%|          | 31/4549 [9:22:31<1360:52:19, 1084.36s/it]

------- 1st valloss=0.1029

Epoch 482 finished ! Training Loss: 0.0772



  1%|          | 32/4549 [9:40:45<1364:04:35, 1087.15s/it]

------- 1st valloss=0.0824

Epoch 483 finished ! Training Loss: 0.0741



  1%|          | 33/4549 [9:58:53<1364:01:56, 1087.36s/it]

------- 1st valloss=0.0807

Epoch 484 finished ! Training Loss: 0.0722



  1%|          | 34/4549 [10:17:00<1363:41:11, 1087.32s/it]

------- 1st valloss=0.0770

Epoch 485 finished ! Training Loss: 0.0720



  1%|          | 35/4549 [10:35:04<1362:15:29, 1086.43s/it]

------- 1st valloss=0.0709

Epoch 486 finished ! Training Loss: 0.0723



  1%|          | 36/4549 [10:53:00<1358:07:57, 1083.38s/it]

------- 1st valloss=0.0761

Epoch 487 finished ! Training Loss: 0.0709

------- 1st valloss=0.0623

0.06232943547808606 less than 0.06501817152551982


  1%|          | 37/4549 [11:11:06<1358:30:01, 1083.91s/it]

Checkpoint 487 saved !
Epoch 488 finished ! Training Loss: 0.0706



  1%|          | 38/4549 [11:29:21<1362:28:11, 1087.32s/it]

------- 1st valloss=0.0683

Epoch 489 finished ! Training Loss: 0.0688



  1%|          | 39/4549 [11:47:30<1362:50:27, 1087.86s/it]

------- 1st valloss=0.0665

Epoch 490 finished ! Training Loss: 0.0689

------- 1st valloss=0.0745



  1%|          | 40/4549 [12:05:32<1360:11:12, 1085.98s/it]

Checkpoint 490 saved !
Epoch 491 finished ! Training Loss: 0.0710



  1%|          | 41/4549 [12:23:48<1363:57:11, 1089.23s/it]

------- 1st valloss=0.0696

Epoch 492 finished ! Training Loss: 0.0700



  1%|          | 42/4549 [12:42:01<1365:02:29, 1090.34s/it]

------- 1st valloss=0.0674

Epoch 493 finished ! Training Loss: 0.0702



  1%|          | 43/4549 [13:00:02<1360:58:51, 1087.34s/it]

------- 1st valloss=0.0952

Epoch 494 finished ! Training Loss: 0.0737



  1%|          | 44/4549 [13:18:14<1362:30:32, 1088.80s/it]

------- 1st valloss=0.0659

Epoch 495 finished ! Training Loss: 0.0725



  1%|          | 45/4549 [13:36:43<1369:46:14, 1094.84s/it]

------- 1st valloss=0.0738

Epoch 496 finished ! Training Loss: 0.0705



  1%|          | 46/4549 [13:54:37<1361:33:23, 1088.52s/it]

------- 1st valloss=0.0828

Epoch 497 finished ! Training Loss: 0.0701



  1%|          | 47/4549 [14:12:49<1362:36:51, 1089.61s/it]

------- 1st valloss=0.0702

Epoch 498 finished ! Training Loss: 0.0692



  1%|          | 48/4549 [14:30:57<1361:43:22, 1089.14s/it]

------- 1st valloss=0.0779

Epoch 499 finished ! Training Loss: 0.0703



  1%|          | 49/4549 [14:48:58<1358:20:49, 1086.68s/it]

------- 1st valloss=0.0940

Epoch 500 finished ! Training Loss: 0.0675

------- 1st valloss=0.0730



  1%|          | 50/4549 [15:07:15<1361:52:23, 1089.74s/it]

Checkpoint 500 saved !
Epoch 501 finished ! Training Loss: 0.0671



  1%|          | 51/4549 [15:25:29<1363:28:30, 1091.27s/it]

------- 1st valloss=0.1100

Epoch 502 finished ! Training Loss: 0.0697



  1%|          | 52/4549 [15:43:40<1362:55:54, 1091.07s/it]

------- 1st valloss=0.0766

Epoch 503 finished ! Training Loss: 0.0681



  1%|          | 53/4549 [16:01:43<1359:44:11, 1088.76s/it]

------- 1st valloss=0.1114

Epoch 504 finished ! Training Loss: 0.0691



  1%|          | 54/4549 [16:19:48<1357:54:54, 1087.54s/it]

------- 1st valloss=0.0984

Epoch 505 finished ! Training Loss: 0.0683



  1%|          | 55/4549 [16:37:46<1354:00:17, 1084.65s/it]

------- 1st valloss=0.1121

Epoch 506 finished ! Training Loss: 0.0674



  1%|          | 56/4549 [16:55:44<1351:07:31, 1082.58s/it]

------- 1st valloss=0.0684

Epoch 507 finished ! Training Loss: 0.0675



  1%|▏         | 57/4549 [17:13:39<1348:08:41, 1080.44s/it]

------- 1st valloss=0.1036

Epoch 508 finished ! Training Loss: 0.0678



  1%|▏         | 58/4549 [17:31:37<1346:55:35, 1079.70s/it]

------- 1st valloss=0.0745

Epoch 509 finished ! Training Loss: 0.0698



  1%|▏         | 59/4549 [17:49:53<1352:38:59, 1084.53s/it]

------- 1st valloss=0.0945

Epoch 510 finished ! Training Loss: 0.0697

------- 1st valloss=0.0649



  1%|▏         | 60/4549 [18:07:43<1347:05:51, 1080.32s/it]

Checkpoint 510 saved !
Epoch 511 finished ! Training Loss: 0.0707



  1%|▏         | 61/4549 [18:25:35<1343:26:06, 1077.62s/it]

------- 1st valloss=0.0869

Epoch 512 finished ! Training Loss: 0.0722



  1%|▏         | 62/4549 [18:44:00<1353:31:26, 1085.96s/it]

------- 1st valloss=0.0663

Epoch 513 finished ! Training Loss: 0.0676



  1%|▏         | 63/4549 [19:02:11<1354:57:55, 1087.36s/it]

------- 1st valloss=0.0900

Epoch 514 finished ! Training Loss: 0.0873



  1%|▏         | 64/4549 [19:20:27<1357:54:38, 1089.96s/it]

------- 1st valloss=0.0887

Epoch 515 finished ! Training Loss: 0.0799



  1%|▏         | 65/4549 [19:38:29<1354:31:55, 1087.49s/it]

------- 1st valloss=0.0863

Epoch 516 finished ! Training Loss: 0.0731



  1%|▏         | 66/4549 [19:56:34<1353:24:09, 1086.83s/it]

------- 1st valloss=0.0642

Epoch 517 finished ! Training Loss: 0.0731



  1%|▏         | 67/4549 [20:14:34<1350:40:38, 1084.88s/it]

------- 1st valloss=0.0901

Epoch 518 finished ! Training Loss: 0.0710



  1%|▏         | 68/4549 [20:32:36<1349:17:52, 1084.02s/it]

------- 1st valloss=0.0633

Epoch 519 finished ! Training Loss: 0.0705



  2%|▏         | 69/4549 [20:50:45<1350:44:10, 1085.41s/it]

------- 1st valloss=0.0645

Epoch 520 finished ! Training Loss: 0.0695

------- 1st valloss=0.0632



  2%|▏         | 70/4549 [21:08:40<1346:24:48, 1082.18s/it]

Checkpoint 520 saved !
Epoch 521 finished ! Training Loss: 0.0684



  2%|▏         | 71/4549 [21:26:51<1349:35:46, 1084.98s/it]

------- 1st valloss=0.0642

Epoch 522 finished ! Training Loss: 0.0722



  2%|▏         | 72/4549 [21:45:04<1352:10:26, 1087.30s/it]

------- 1st valloss=0.0743

Epoch 523 finished ! Training Loss: 0.0742



  2%|▏         | 73/4549 [22:03:07<1350:32:26, 1086.23s/it]

------- 1st valloss=0.0729

Epoch 524 finished ! Training Loss: 0.0732



  2%|▏         | 74/4549 [22:21:15<1350:35:12, 1086.51s/it]

------- 1st valloss=0.0701

Epoch 525 finished ! Training Loss: 0.0717



  2%|▏         | 75/4549 [22:39:30<1353:26:19, 1089.04s/it]

------- 1st valloss=0.0790

Epoch 526 finished ! Training Loss: 0.0752



  2%|▏         | 76/4549 [22:57:37<1352:29:53, 1088.53s/it]

------- 1st valloss=0.0845

Epoch 527 finished ! Training Loss: 0.0724



  2%|▏         | 77/4549 [23:15:30<1346:28:48, 1083.93s/it]

------- 1st valloss=0.0747

Epoch 528 finished ! Training Loss: 0.0725



  2%|▏         | 78/4549 [23:33:32<1345:20:16, 1083.25s/it]

------- 1st valloss=0.0675

Epoch 529 finished ! Training Loss: 0.0689



  2%|▏         | 79/4549 [23:51:46<1349:03:06, 1086.48s/it]

------- 1st valloss=0.0820

Epoch 530 finished ! Training Loss: 0.0710

------- 1st valloss=0.0924



  2%|▏         | 80/4549 [24:09:36<1342:45:31, 1081.66s/it]

Checkpoint 530 saved !
Epoch 531 finished ! Training Loss: 0.0684



  2%|▏         | 81/4549 [24:27:48<1346:07:26, 1084.61s/it]

------- 1st valloss=0.0867

Epoch 532 finished ! Training Loss: 0.0669



  2%|▏         | 82/4549 [24:45:49<1344:25:49, 1083.49s/it]

------- 1st valloss=0.0652

Epoch 533 finished ! Training Loss: 0.0667



  2%|▏         | 83/4549 [25:03:52<1343:57:22, 1083.35s/it]

------- 1st valloss=0.0658

Epoch 534 finished ! Training Loss: 0.0663



  2%|▏         | 84/4549 [25:21:43<1339:02:02, 1079.62s/it]

------- 1st valloss=0.0695

Epoch 535 finished ! Training Loss: 0.0656



  2%|▏         | 85/4549 [25:39:54<1342:57:25, 1083.03s/it]

------- 1st valloss=0.0735

Epoch 536 finished ! Training Loss: 0.0659



  2%|▏         | 86/4549 [25:58:06<1346:04:22, 1085.79s/it]

------- 1st valloss=0.0700

Epoch 537 finished ! Training Loss: 0.0661



  2%|▏         | 87/4549 [26:16:09<1344:40:57, 1084.91s/it]

------- 1st valloss=0.0658

Epoch 538 finished ! Training Loss: 0.0661



  2%|▏         | 88/4549 [26:34:07<1341:57:02, 1082.95s/it]

------- 1st valloss=0.1426

Epoch 539 finished ! Training Loss: 0.0758



  2%|▏         | 89/4549 [26:52:18<1344:50:34, 1085.52s/it]

------- 1st valloss=0.1207

Epoch 540 finished ! Training Loss: 0.0718

------- 1st valloss=0.0909



  2%|▏         | 90/4549 [27:10:08<1338:29:19, 1080.64s/it]

Checkpoint 540 saved !
Epoch 541 finished ! Training Loss: 0.0712



  2%|▏         | 91/4549 [27:28:19<1342:06:05, 1083.80s/it]

------- 1st valloss=0.0715

Epoch 542 finished ! Training Loss: 0.0747



  2%|▏         | 92/4549 [27:46:30<1344:31:32, 1086.00s/it]

------- 1st valloss=0.0723

Epoch 543 finished ! Training Loss: 0.0697



  2%|▏         | 93/4549 [28:04:31<1342:29:07, 1084.59s/it]

------- 1st valloss=0.0723

Epoch 544 finished ! Training Loss: 0.0714



  2%|▏         | 94/4549 [28:22:19<1335:51:24, 1079.48s/it]

------- 1st valloss=0.0721

Epoch 545 finished ! Training Loss: 0.0672



  2%|▏         | 95/4549 [28:40:35<1341:42:09, 1084.45s/it]

------- 1st valloss=0.0672

Epoch 546 finished ! Training Loss: 0.0672



  2%|▏         | 96/4549 [28:58:46<1343:59:42, 1086.54s/it]

------- 1st valloss=0.0652

Epoch 547 finished ! Training Loss: 0.0668



  2%|▏         | 97/4549 [29:17:16<1352:10:30, 1093.40s/it]

------- 1st valloss=0.0691

Epoch 548 finished ! Training Loss: 0.0646



  2%|▏         | 98/4549 [29:35:36<1354:13:43, 1095.31s/it]

------- 1st valloss=0.0722

Epoch 549 finished ! Training Loss: 0.0660



  2%|▏         | 99/4549 [29:53:42<1350:37:35, 1092.64s/it]

------- 1st valloss=0.0648

Epoch 550 finished ! Training Loss: 0.0652

------- 1st valloss=0.0650



  2%|▏         | 100/4549 [30:11:55<1350:34:53, 1092.85s/it]

Checkpoint 550 saved !
Epoch 551 finished ! Training Loss: 0.0645



  2%|▏         | 101/4549 [30:29:57<1346:10:24, 1089.53s/it]

------- 1st valloss=0.0711

Epoch 552 finished ! Training Loss: 0.0656



  2%|▏         | 102/4549 [30:48:19<1350:22:08, 1093.17s/it]

------- 1st valloss=0.0637

Epoch 553 finished ! Training Loss: 0.0650



  2%|▏         | 103/4549 [31:06:33<1350:34:33, 1093.58s/it]

------- 1st valloss=0.0755

Epoch 554 finished ! Training Loss: 0.0663



  2%|▏         | 104/4549 [31:24:40<1347:54:26, 1091.67s/it]

------- 1st valloss=0.0636

Epoch 555 finished ! Training Loss: 0.0646



  2%|▏         | 105/4549 [31:42:43<1344:12:49, 1088.92s/it]

------- 1st valloss=0.0651

Epoch 556 finished ! Training Loss: 0.0654



  2%|▏         | 106/4549 [32:00:51<1343:26:17, 1088.54s/it]

------- 1st valloss=0.0659

Epoch 557 finished ! Training Loss: 0.0634



  2%|▏         | 107/4549 [32:18:50<1339:46:48, 1085.82s/it]

------- 1st valloss=0.0650

Epoch 558 finished ! Training Loss: 0.0651



  2%|▏         | 108/4549 [32:36:43<1334:41:01, 1081.93s/it]

------- 1st valloss=0.1034

Epoch 559 finished ! Training Loss: 0.0639



  2%|▏         | 109/4549 [32:54:38<1331:52:26, 1079.90s/it]

------- 1st valloss=0.0657

Epoch 560 finished ! Training Loss: 0.0637

------- 1st valloss=0.0637



  2%|▏         | 110/4549 [33:12:38<1331:44:00, 1080.03s/it]

Checkpoint 560 saved !
Epoch 561 finished ! Training Loss: 0.0645

------- 1st valloss=0.0606

0.060552956455427666 less than 0.06232943547808606


  2%|▏         | 111/4549 [33:30:39<1331:39:57, 1080.22s/it]

Checkpoint 561 saved !
Epoch 562 finished ! Training Loss: 0.0665



  2%|▏         | 112/4549 [33:48:40<1331:26:07, 1080.27s/it]

------- 1st valloss=0.0655

Epoch 563 finished ! Training Loss: 0.0650



  2%|▏         | 113/4549 [34:06:37<1330:07:13, 1079.45s/it]

------- 1st valloss=0.0690

Epoch 564 finished ! Training Loss: 0.0634



  3%|▎         | 114/4549 [34:24:56<1336:52:02, 1085.17s/it]

------- 1st valloss=0.0654

Epoch 565 finished ! Training Loss: 0.0632



  3%|▎         | 115/4549 [34:43:14<1341:32:26, 1089.21s/it]

------- 1st valloss=0.0749

Epoch 566 finished ! Training Loss: 0.0663



  3%|▎         | 116/4549 [35:01:20<1339:56:40, 1088.16s/it]

------- 1st valloss=0.0754

Epoch 567 finished ! Training Loss: 0.0685



  3%|▎         | 117/4549 [35:19:34<1341:39:15, 1089.79s/it]

------- 1st valloss=0.0758

Epoch 568 finished ! Training Loss: 0.0652



  3%|▎         | 118/4549 [35:37:39<1339:53:53, 1088.61s/it]

------- 1st valloss=0.0655

Epoch 569 finished ! Training Loss: 0.0725



  3%|▎         | 119/4549 [35:55:36<1335:12:10, 1085.04s/it]

------- 1st valloss=0.0916

Epoch 570 finished ! Training Loss: 0.0683

------- 1st valloss=0.0668



  3%|▎         | 120/4549 [36:13:36<1333:09:19, 1083.62s/it]

Checkpoint 570 saved !
Epoch 571 finished ! Training Loss: 0.0841



  3%|▎         | 121/4549 [36:31:43<1334:01:05, 1084.57s/it]

------- 1st valloss=0.1210

Epoch 572 finished ! Training Loss: 0.0818



  3%|▎         | 122/4549 [36:49:58<1337:27:47, 1087.61s/it]

------- 1st valloss=0.0804

Epoch 573 finished ! Training Loss: 0.0776



  3%|▎         | 123/4549 [37:07:50<1331:25:31, 1082.95s/it]

------- 1st valloss=0.0832

Epoch 574 finished ! Training Loss: 0.0690



  3%|▎         | 124/4549 [37:25:50<1330:13:01, 1082.21s/it]

------- 1st valloss=0.0702

Epoch 575 finished ! Training Loss: 0.0685



  3%|▎         | 125/4549 [37:44:05<1334:37:50, 1086.05s/it]

------- 1st valloss=0.0837

Epoch 576 finished ! Training Loss: 0.0677



  3%|▎         | 126/4549 [38:02:01<1330:28:20, 1082.91s/it]

------- 1st valloss=0.0876

Epoch 577 finished ! Training Loss: 0.0662



  3%|▎         | 127/4549 [38:20:21<1336:25:43, 1088.00s/it]

------- 1st valloss=0.0755

Epoch 578 finished ! Training Loss: 0.0660



  3%|▎         | 128/4549 [38:38:32<1337:14:03, 1088.90s/it]

------- 1st valloss=0.0740

Epoch 579 finished ! Training Loss: 0.0687



  3%|▎         | 129/4549 [38:56:41<1337:06:11, 1089.04s/it]

------- 1st valloss=0.0856

Epoch 580 finished ! Training Loss: 0.0671

------- 1st valloss=0.0794



  3%|▎         | 130/4549 [39:14:56<1338:54:19, 1090.76s/it]

Checkpoint 580 saved !
Epoch 581 finished ! Training Loss: 0.0662



  3%|▎         | 131/4549 [39:33:00<1336:02:40, 1088.67s/it]

------- 1st valloss=0.0638

Epoch 582 finished ! Training Loss: 0.0675



  3%|▎         | 132/4549 [39:50:57<1331:26:25, 1085.17s/it]

------- 1st valloss=0.0641

Epoch 583 finished ! Training Loss: 0.0669



  3%|▎         | 133/4549 [40:09:08<1333:21:53, 1086.98s/it]

------- 1st valloss=0.0831

Epoch 584 finished ! Training Loss: 0.0649



  3%|▎         | 134/4549 [40:27:04<1329:09:40, 1083.80s/it]

------- 1st valloss=0.0637

Epoch 585 finished ! Training Loss: 0.0643



In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
            # move data to device, convert dtype to desirable dtype

        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)