In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2 

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device('cuda')
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = pyramid_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5), 
                transforms.RandomApply([ElasticTransformation(256*2, 256*0.08)])
                ]))
# do data augumentation on train dataset

validation_dataset = pyramid_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:

deeplab = DeepLab(output_stride=8)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)
deeplab = deeplab.to(device=device, dtype=dtype)
#shape_test(icnet1, True)
# create the model, by default model type is float, use model.double(), model.float() to convert
# move the model to desirable device

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1)
epoch = 0

# create an optimizer object
# note that only the model_2 params and model_4 params will be optimized by optimizer

In [None]:
"""
deeplab = DeepLab(output_stride=8)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

optimizer = optim.Adam(deeplab.parameters(), lr=1e-2)

#checkpoint = torch.load('../deeplab_save/2019-07-29 04:00:14.630172.pth') # second best
#checkpoint = torch.load('../deeplab_save/2019-07-28 23:47:36.279119.pth') # second best
#checkpoint = torch.load('../deeplab_save/2019-07-29 00:15:49.271222.pth') # best
#checkpoint = torch.load('../deeplab_save/2019-07-29 00:44:11.825872.pth')
checkpoint = torch.load('../deeplab_save/2019-07-31 20:34:01.096131.pth') # latest one

deeplab.load_state_dict(checkpoint['state_dict_1'])
optimizer.load_state_dict(checkpoint['optimizer'])
#scheduler.load_state_dict(checkpoint['scheduler'])
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)
scheduler.load_state_dict(checkpoint['scheduler'])
epoch = checkpoint['epoch']
print(epoch)
"""

"\ndeeplab = DeepLab(output_stride=8)\ndeeplab = nn.DataParallel(deeplab)\ndeeplab = convert_model(deeplab)\n\noptimizer = optim.Adam(deeplab.parameters(), lr=1e-2)\n\n#checkpoint = torch.load('../deeplab_save/2019-07-29 04:00:14.630172.pth') # second best\n#checkpoint = torch.load('../deeplab_save/2019-07-28 23:47:36.279119.pth') # second best\n#checkpoint = torch.load('../deeplab_save/2019-07-29 00:15:49.271222.pth') # best\n#checkpoint = torch.load('../deeplab_save/2019-07-29 00:44:11.825872.pth')\ncheckpoint = torch.load('../deeplab_save/2019-07-31 20:34:01.096131.pth') # latest one\n\ndeeplab.load_state_dict(checkpoint['state_dict_1'])\noptimizer.load_state_dict(checkpoint['optimizer'])\n#scheduler.load_state_dict(checkpoint['scheduler'])\nscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5)\nscheduler.load_state_dict(checkpoint['scheduler'])\nepoch = checkpoint['epoch']\nprint(epoch)\n"

In [None]:
epochs = 5000

min_val = 1

record = open('train_deeplab_output_8_elastic.txt','w+')

logger = {'train':[], 'validation_1': []}

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches

    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
    
        deeplab.train()
        # Set the model flag to train
        # 1. enable dropout
        # 2. batchnorm behave differently in train and test
        
        image_1 = batch['image1_data'].to(device=device, dtype=dtype)
        label_1 = batch['image1_label'].to(device=device, dtype=dtype)
        # move data to device, convert dtype to desirable dtype
        
        out_1 = deeplab(image_1)
        # do the inference

        loss_1 = dice_loss_3(out_1, label_1)
        # calculate loss
        
        epoch_loss += loss_1.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss_1.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        #scheduler.step(loss_1)
        # take a gradient descent step
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()

    if e%5 == 0:
    # do validation every 5 epoches
    
        deeplab.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient
        
            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0
            
            for v, vbatch in enumerate(validation_loader):
            # iter over validation mini batches
                
                image_1_val = vbatch['image1_data'].to(device=device, dtype=dtype)
                if get_dimensions(image_1_val) == 4:
                    image_1_val.unsqueeze_(0)
                label_1_val = vbatch['image1_label'].to(device=device, dtype=dtype)
                if get_dimensions(label_1_val) == 4:
                    label_1_val.unsqueeze_(0)
                # move data to device, convert dtype to desirable dtype
                # add one dimension to labels if they are 4D tensors
                
                out_1_val = deeplab(image_1_val)
                # do the inference
                
                loss_1 = dice_loss_3(out_1_val, label_1_val)
                # calculate loss

                valloss_1 += loss_1.item()
                # record mini batch loss
            
            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'
            
            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)
            
            print(outstr)
            record.write(outstr)
            record.flush()
            
            if avg_val_loss < min_val:
                print(avg_val_loss, "less than", min_val)
                min_val = avg_val_loss
                
            save_1('deeplab_output_8_elastic_save', deeplab, optimizer, logger, e, scheduler)

record.close()

  0%|          | 1/4999 [12:09<1013:24:42, 729.95s/it]

Epoch 1 finished ! Training Loss: 0.5053



  0%|          | 2/4999 [23:04<981:48:02, 707.32s/it] 

Epoch 2 finished ! Training Loss: 0.4763



  0%|          | 3/4999 [34:09<963:55:40, 694.58s/it]

Epoch 3 finished ! Training Loss: 0.4648



  0%|          | 4/4999 [45:17<952:53:53, 686.77s/it]

Epoch 4 finished ! Training Loss: 0.4587

Epoch 5 finished ! Training Loss: 0.4429

------- 1st valloss=0.6204

0.6204477885495061 less than 1


  0%|          | 5/4999 [56:54<956:56:57, 689.83s/it]

Checkpoint 5 saved !


  0%|          | 6/4999 [1:08:03<948:07:06, 683.60s/it]

Epoch 6 finished ! Training Loss: 0.4359



  0%|          | 7/4999 [1:19:09<940:37:11, 678.33s/it]

Epoch 7 finished ! Training Loss: 0.4192



  0%|          | 8/4999 [1:30:11<933:25:57, 673.28s/it]

Epoch 8 finished ! Training Loss: 0.3971



  0%|          | 9/4999 [1:41:03<924:34:43, 667.03s/it]

Epoch 9 finished ! Training Loss: 0.3989

Epoch 10 finished ! Training Loss: 0.3883

------- 1st valloss=0.7038



  0%|          | 10/4999 [1:52:50<940:59:33, 679.01s/it]

Checkpoint 10 saved !


  0%|          | 11/4999 [2:03:52<933:38:51, 673.84s/it]

Epoch 11 finished ! Training Loss: 0.3828



  0%|          | 12/4999 [2:15:01<931:31:09, 672.44s/it]

Epoch 12 finished ! Training Loss: 0.3636



  0%|          | 13/4999 [2:26:15<932:02:56, 672.96s/it]

Epoch 13 finished ! Training Loss: 0.3516



  0%|          | 14/4999 [2:37:36<935:07:00, 675.31s/it]

Epoch 14 finished ! Training Loss: 0.3456

Epoch 15 finished ! Training Loss: 0.3440

------- 1st valloss=0.6122

0.6121868413427601 less than 0.6204477885495061


  0%|          | 15/4999 [2:49:18<946:00:54, 683.32s/it]

Checkpoint 15 saved !


  0%|          | 16/4999 [3:00:29<940:28:40, 679.45s/it]

Epoch 16 finished ! Training Loss: 0.3460



  0%|          | 17/4999 [3:11:36<935:09:23, 675.75s/it]

Epoch 17 finished ! Training Loss: 0.3291



  0%|          | 18/4999 [3:22:44<931:47:07, 673.44s/it]

Epoch 18 finished ! Training Loss: 0.3296



  0%|          | 19/4999 [3:34:10<936:42:51, 677.14s/it]

Epoch 19 finished ! Training Loss: 0.3225

Epoch 20 finished ! Training Loss: 0.3160

------- 1st valloss=0.2753

0.27526141767916473 less than 0.6121868413427601


  0%|          | 20/4999 [3:45:57<949:03:51, 686.21s/it]

Checkpoint 20 saved !


  0%|          | 21/4999 [3:56:48<934:16:32, 675.65s/it]

Epoch 21 finished ! Training Loss: 0.3293



  0%|          | 22/4999 [4:07:42<924:57:24, 669.05s/it]

Epoch 22 finished ! Training Loss: 0.3044



  0%|          | 23/4999 [4:18:45<922:26:32, 667.36s/it]

Epoch 23 finished ! Training Loss: 0.2829



  0%|          | 24/4999 [4:29:55<923:28:29, 668.24s/it]

Epoch 24 finished ! Training Loss: 0.2828

Epoch 25 finished ! Training Loss: 0.2866

------- 1st valloss=0.1934

0.19341003765230594 less than 0.27526141767916473


  1%|          | 25/4999 [4:41:48<941:43:32, 681.59s/it]

Checkpoint 25 saved !


  1%|          | 26/4999 [4:52:51<933:46:47, 675.97s/it]

Epoch 26 finished ! Training Loss: 0.3021



  1%|          | 27/4999 [5:03:49<926:01:37, 670.49s/it]

Epoch 27 finished ! Training Loss: 0.2859



  1%|          | 28/4999 [5:14:59<925:33:01, 670.28s/it]

Epoch 28 finished ! Training Loss: 0.2841



  1%|          | 29/4999 [5:26:00<921:35:28, 667.55s/it]

Epoch 29 finished ! Training Loss: 0.2811

Epoch 30 finished ! Training Loss: 0.2798

------- 1st valloss=0.2024



  1%|          | 30/4999 [5:38:04<945:00:03, 684.65s/it]

Checkpoint 30 saved !


  1%|          | 31/4999 [5:49:19<940:32:28, 681.55s/it]

Epoch 31 finished ! Training Loss: 0.2830



  1%|          | 32/4999 [6:00:26<934:36:16, 677.39s/it]

Epoch 32 finished ! Training Loss: 0.2646



  1%|          | 33/4999 [6:11:44<934:31:23, 677.46s/it]

Epoch 33 finished ! Training Loss: 0.2746



  1%|          | 34/4999 [6:22:42<926:09:58, 671.54s/it]

Epoch 34 finished ! Training Loss: 0.2769

Epoch 35 finished ! Training Loss: 0.2655

------- 1st valloss=0.2126



  1%|          | 35/4999 [6:34:42<946:23:12, 686.34s/it]

Checkpoint 35 saved !


  1%|          | 36/4999 [6:45:55<940:35:54, 682.28s/it]

Epoch 36 finished ! Training Loss: 0.2816



  1%|          | 37/4999 [6:57:09<936:46:04, 679.64s/it]

Epoch 37 finished ! Training Loss: 0.2671



  1%|          | 38/4999 [7:08:06<927:12:16, 672.84s/it]

Epoch 38 finished ! Training Loss: 0.2487



  1%|          | 39/4999 [7:19:18<926:59:53, 672.82s/it]

Epoch 39 finished ! Training Loss: 0.2680

Epoch 40 finished ! Training Loss: 0.2435

------- 1st valloss=0.1708

0.17082468711811563 less than 0.19341003765230594


  1%|          | 40/4999 [7:31:10<942:52:09, 684.48s/it]

Checkpoint 40 saved !


  1%|          | 41/4999 [7:42:26<939:08:20, 681.91s/it]

Epoch 41 finished ! Training Loss: 0.2603



  1%|          | 42/4999 [7:53:26<929:54:40, 675.34s/it]

Epoch 42 finished ! Training Loss: 0.2502



  1%|          | 43/4999 [8:04:22<921:42:34, 669.52s/it]

Epoch 43 finished ! Training Loss: 0.2555



  1%|          | 44/4999 [8:15:21<917:21:01, 666.49s/it]

Epoch 44 finished ! Training Loss: 0.2338

Epoch 45 finished ! Training Loss: 0.2577

------- 1st valloss=0.1484

0.14841418421786765 less than 0.17082468711811563


  1%|          | 45/4999 [8:27:18<937:40:54, 681.40s/it]

Checkpoint 45 saved !


  1%|          | 47/4999 [8:49:41<930:15:52, 676.28s/it]

Epoch 47 finished ! Training Loss: 0.2580



  1%|          | 48/4999 [9:00:40<922:51:52, 671.04s/it]

Epoch 48 finished ! Training Loss: 0.2400



  1%|          | 49/4999 [9:11:49<921:41:54, 670.33s/it]

Epoch 49 finished ! Training Loss: 0.2594

Epoch 50 finished ! Training Loss: 0.2402

------- 1st valloss=0.1260

0.1260421820308851 less than 0.14841418421786765


  1%|          | 50/4999 [9:23:46<940:42:30, 684.29s/it]

Checkpoint 50 saved !


  1%|          | 51/4999 [9:34:41<928:40:32, 675.67s/it]

Epoch 51 finished ! Training Loss: 0.2493



  1%|          | 52/4999 [9:45:53<926:44:59, 674.41s/it]

Epoch 52 finished ! Training Loss: 0.2358



  1%|          | 53/4999 [9:57:05<925:29:00, 673.62s/it]

Epoch 53 finished ! Training Loss: 0.2453



  1%|          | 54/4999 [10:08:18<925:03:26, 673.45s/it]

Epoch 54 finished ! Training Loss: 0.2267

Epoch 55 finished ! Training Loss: 0.2333

------- 1st valloss=0.4554



  1%|          | 55/4999 [10:20:06<939:10:27, 683.86s/it]

Checkpoint 55 saved !


  1%|          | 56/4999 [10:31:14<932:25:09, 679.08s/it]

Epoch 56 finished ! Training Loss: 0.2327



  1%|          | 57/4999 [10:42:24<928:30:48, 676.38s/it]

Epoch 57 finished ! Training Loss: 0.2571



  1%|          | 58/4999 [10:53:35<926:18:42, 674.91s/it]

Epoch 58 finished ! Training Loss: 0.2375



  1%|          | 59/4999 [11:04:36<920:12:42, 670.60s/it]

Epoch 59 finished ! Training Loss: 0.2580

Epoch 60 finished ! Training Loss: 0.2422

------- 1st valloss=0.1706



  1%|          | 60/4999 [11:16:28<937:09:05, 683.08s/it]

Checkpoint 60 saved !


  1%|          | 61/4999 [11:27:41<932:46:07, 680.03s/it]

Epoch 61 finished ! Training Loss: 0.2235



  1%|          | 62/4999 [11:38:54<929:38:16, 677.88s/it]

Epoch 62 finished ! Training Loss: 0.2558



  1%|▏         | 63/4999 [11:50:04<926:06:02, 675.44s/it]

Epoch 63 finished ! Training Loss: 0.2383



  1%|▏         | 64/4999 [12:01:17<925:16:11, 674.97s/it]

Epoch 64 finished ! Training Loss: 0.2371

Epoch 65 finished ! Training Loss: 0.2530

------- 1st valloss=0.3625



  1%|▏         | 65/4999 [12:13:15<942:40:21, 687.80s/it]

Checkpoint 65 saved !


  1%|▏         | 66/4999 [12:24:21<933:21:29, 681.15s/it]

Epoch 66 finished ! Training Loss: 0.2438



  1%|▏         | 67/4999 [12:35:24<925:46:01, 675.74s/it]

Epoch 67 finished ! Training Loss: 0.2418



  1%|▏         | 68/4999 [12:46:18<916:35:25, 669.18s/it]

Epoch 68 finished ! Training Loss: 0.2209



  1%|▏         | 69/4999 [12:57:28<916:57:23, 669.58s/it]

Epoch 69 finished ! Training Loss: 0.2186

Epoch 70 finished ! Training Loss: 0.2324

------- 1st valloss=0.1305



  1%|▏         | 70/4999 [13:09:20<934:12:08, 682.31s/it]

Checkpoint 70 saved !


  1%|▏         | 71/4999 [13:20:19<924:16:10, 675.20s/it]

Epoch 71 finished ! Training Loss: 0.2426



  1%|▏         | 72/4999 [13:31:30<922:27:14, 674.01s/it]

Epoch 72 finished ! Training Loss: 0.2250



  1%|▏         | 73/4999 [13:42:40<920:42:22, 672.87s/it]

Epoch 73 finished ! Training Loss: 0.2285



  1%|▏         | 74/4999 [13:53:54<920:41:01, 672.99s/it]

Epoch 74 finished ! Training Loss: 0.2178

Epoch 75 finished ! Training Loss: 0.2365

------- 1st valloss=0.2007



  2%|▏         | 75/4999 [14:05:41<934:33:18, 683.27s/it]

Checkpoint 75 saved !


  2%|▏         | 76/4999 [14:16:35<922:31:43, 674.61s/it]

Epoch 76 finished ! Training Loss: 0.2353



  2%|▏         | 77/4999 [14:27:28<913:13:57, 667.95s/it]

Epoch 77 finished ! Training Loss: 0.2248



  2%|▏         | 78/4999 [14:38:33<912:02:12, 667.21s/it]

Epoch 78 finished ! Training Loss: 0.2291



  2%|▏         | 79/4999 [14:49:46<914:10:31, 668.91s/it]

Epoch 79 finished ! Training Loss: 0.2258

Epoch 80 finished ! Training Loss: 0.2175

------- 1st valloss=0.4804



  2%|▏         | 80/4999 [15:01:36<930:53:17, 681.28s/it]

Checkpoint 80 saved !


  2%|▏         | 81/4999 [15:12:45<925:31:07, 677.48s/it]

Epoch 81 finished ! Training Loss: 0.2150



  2%|▏         | 82/4999 [15:23:36<914:31:01, 669.57s/it]

Epoch 82 finished ! Training Loss: 0.2004



  2%|▏         | 83/4999 [15:34:44<913:31:50, 668.98s/it]

Epoch 83 finished ! Training Loss: 0.2258



  2%|▏         | 84/4999 [15:45:40<908:18:14, 665.29s/it]

Epoch 84 finished ! Training Loss: 0.2116

Epoch 85 finished ! Training Loss: 0.2237

------- 1st valloss=0.1236

0.12362393056568892 less than 0.1260421820308851


  2%|▏         | 85/4999 [15:57:21<922:48:52, 676.05s/it]

Checkpoint 85 saved !


  2%|▏         | 86/4999 [16:08:29<919:19:50, 673.64s/it]

Epoch 86 finished ! Training Loss: 0.2266



  2%|▏         | 87/4999 [16:19:20<909:40:44, 666.70s/it]

Epoch 87 finished ! Training Loss: 0.2244



  2%|▏         | 88/4999 [16:30:24<908:24:48, 665.91s/it]

Epoch 88 finished ! Training Loss: 0.2240



  2%|▏         | 89/4999 [16:41:28<907:23:25, 665.30s/it]

Epoch 89 finished ! Training Loss: 0.2007

Epoch 90 finished ! Training Loss: 0.2160

------- 1st valloss=0.1255



  2%|▏         | 90/4999 [16:53:13<923:38:09, 677.35s/it]

Checkpoint 90 saved !


  2%|▏         | 91/4999 [17:04:22<919:53:57, 674.74s/it]

Epoch 91 finished ! Training Loss: 0.2100



  2%|▏         | 92/4999 [17:15:23<914:02:16, 670.58s/it]

Epoch 92 finished ! Training Loss: 0.2291



  2%|▏         | 93/4999 [17:34:14<1102:19:43, 808.88s/it]

Epoch 93 finished ! Training Loss: 0.1903



  2%|▏         | 94/4999 [17:45:26<1045:47:37, 767.56s/it]

Epoch 94 finished ! Training Loss: 0.2115

Epoch 95 finished ! Training Loss: 0.2311

------- 1st valloss=0.1536



  2%|▏         | 95/4999 [17:57:23<1025:14:48, 752.63s/it]

Checkpoint 95 saved !


  2%|▏         | 96/4999 [18:08:22<986:51:19, 724.59s/it] 

Epoch 96 finished ! Training Loss: 0.2310



  2%|▏         | 97/4999 [18:19:31<963:53:40, 707.88s/it]

Epoch 97 finished ! Training Loss: 0.2310



  2%|▏         | 98/4999 [18:30:30<943:30:58, 693.05s/it]

Epoch 98 finished ! Training Loss: 0.2085



  2%|▏         | 99/4999 [18:41:36<932:23:14, 685.02s/it]

Epoch 99 finished ! Training Loss: 0.2283

Epoch 100 finished ! Training Loss: 0.2227

------- 1st valloss=0.6760



  2%|▏         | 100/4999 [18:53:31<944:22:51, 693.97s/it]

Checkpoint 100 saved !


  2%|▏         | 101/4999 [19:04:38<933:08:26, 685.85s/it]

Epoch 101 finished ! Training Loss: 0.2108



  2%|▏         | 102/4999 [19:15:39<922:41:56, 678.32s/it]

Epoch 102 finished ! Training Loss: 0.2167



  2%|▏         | 103/4999 [19:26:42<916:28:07, 673.87s/it]

Epoch 103 finished ! Training Loss: 0.2170



  2%|▏         | 104/4999 [19:37:57<916:41:53, 674.18s/it]

Epoch 104 finished ! Training Loss: 0.2151

Epoch 105 finished ! Training Loss: 0.2162

------- 1st valloss=0.1618



  2%|▏         | 105/4999 [19:50:01<936:38:58, 688.99s/it]

Checkpoint 105 saved !


  2%|▏         | 106/4999 [20:01:18<931:42:27, 685.50s/it]

Epoch 106 finished ! Training Loss: 0.2224



  2%|▏         | 107/4999 [20:12:30<926:11:06, 681.58s/it]

Epoch 107 finished ! Training Loss: 0.2064



  2%|▏         | 108/4999 [20:23:41<921:33:20, 678.31s/it]

Epoch 108 finished ! Training Loss: 0.2376



  2%|▏         | 109/4999 [20:34:50<917:23:58, 675.39s/it]

Epoch 109 finished ! Training Loss: 0.2042

Epoch 110 finished ! Training Loss: 0.2136

------- 1st valloss=0.2621



  2%|▏         | 110/4999 [20:46:25<925:17:20, 681.33s/it]

Checkpoint 110 saved !


  2%|▏         | 111/4999 [20:57:46<925:10:09, 681.39s/it]

Epoch 111 finished ! Training Loss: 0.2117



  2%|▏         | 112/4999 [21:08:49<917:30:35, 675.88s/it]

Epoch 112 finished ! Training Loss: 0.2281



  2%|▏         | 113/4999 [21:19:57<914:09:39, 673.55s/it]

Epoch 113 finished ! Training Loss: 0.2022



  2%|▏         | 114/4999 [21:30:55<907:37:30, 668.87s/it]

Epoch 114 finished ! Training Loss: 0.2430

Epoch 115 finished ! Training Loss: 0.2050

------- 1st valloss=0.1207

0.12067875428044278 less than 0.12362393056568892


  2%|▏         | 115/4999 [21:42:47<924:40:56, 681.58s/it]

Checkpoint 115 saved !


  2%|▏         | 116/4999 [21:53:44<914:39:28, 674.33s/it]

Epoch 116 finished ! Training Loss: 0.2239



  2%|▏         | 117/4999 [22:04:44<908:33:07, 669.97s/it]

Epoch 117 finished ! Training Loss: 0.2208



  2%|▏         | 118/4999 [22:15:48<906:03:35, 668.27s/it]

Epoch 118 finished ! Training Loss: 0.1995



  2%|▏         | 119/4999 [22:26:53<904:31:53, 667.28s/it]

Epoch 119 finished ! Training Loss: 0.2101

Epoch 120 finished ! Training Loss: 0.2048

------- 1st valloss=0.4458



  2%|▏         | 120/4999 [22:38:50<924:28:34, 682.13s/it]

Checkpoint 120 saved !


  2%|▏         | 121/4999 [22:49:52<916:17:00, 676.22s/it]

Epoch 121 finished ! Training Loss: 0.2071



  2%|▏         | 122/4999 [23:01:00<912:48:06, 673.79s/it]

Epoch 122 finished ! Training Loss: 0.2090



  2%|▏         | 123/4999 [23:12:09<910:18:11, 672.09s/it]

Epoch 123 finished ! Training Loss: 0.2131



  2%|▏         | 124/4999 [23:23:17<908:48:03, 671.11s/it]

Epoch 124 finished ! Training Loss: 0.2020

Epoch 125 finished ! Training Loss: 0.2193

------- 1st valloss=0.0937

0.09369745526624762 less than 0.12067875428044278


  3%|▎         | 125/4999 [23:35:06<923:57:44, 682.45s/it]

Checkpoint 125 saved !


  3%|▎         | 126/4999 [23:46:17<919:05:50, 679.00s/it]

Epoch 126 finished ! Training Loss: 0.2109



  3%|▎         | 127/4999 [23:57:20<912:19:53, 674.14s/it]

Epoch 127 finished ! Training Loss: 0.2057



  3%|▎         | 128/4999 [24:08:24<908:10:20, 671.20s/it]

Epoch 128 finished ! Training Loss: 0.2274



  3%|▎         | 129/4999 [24:19:37<908:27:16, 671.55s/it]

Epoch 129 finished ! Training Loss: 0.2008

Epoch 130 finished ! Training Loss: 0.1932

------- 1st valloss=0.0932

0.09323392063379288 less than 0.09369745526624762


  3%|▎         | 130/4999 [24:31:28<924:22:21, 683.45s/it]

Checkpoint 130 saved !


  3%|▎         | 131/4999 [24:42:35<917:33:13, 678.55s/it]

Epoch 131 finished ! Training Loss: 0.2109



  3%|▎         | 132/4999 [24:53:42<912:28:34, 674.94s/it]

Epoch 132 finished ! Training Loss: 0.2007



  3%|▎         | 133/4999 [25:04:58<912:49:53, 675.34s/it]

Epoch 133 finished ! Training Loss: 0.1914



  3%|▎         | 134/4999 [25:16:24<916:50:43, 678.45s/it]

Epoch 134 finished ! Training Loss: 0.2200

Epoch 135 finished ! Training Loss: 0.2149

------- 1st valloss=0.3141



  3%|▎         | 135/4999 [25:28:11<928:28:36, 687.19s/it]

Checkpoint 135 saved !


  3%|▎         | 136/4999 [25:39:23<922:12:06, 682.69s/it]

Epoch 136 finished ! Training Loss: 0.2221



  3%|▎         | 137/4999 [25:50:34<917:09:56, 679.10s/it]

Epoch 137 finished ! Training Loss: 0.2329



  3%|▎         | 138/4999 [26:01:29<907:05:27, 671.78s/it]

Epoch 138 finished ! Training Loss: 0.2281



  3%|▎         | 139/4999 [26:12:30<902:41:12, 668.66s/it]

Epoch 139 finished ! Training Loss: 0.2067

Epoch 140 finished ! Training Loss: 0.2161

------- 1st valloss=0.1028



  3%|▎         | 140/4999 [26:24:09<914:33:38, 677.59s/it]

Checkpoint 140 saved !


  3%|▎         | 141/4999 [26:35:23<913:03:31, 676.62s/it]

Epoch 141 finished ! Training Loss: 0.2142



  3%|▎         | 142/4999 [26:46:32<909:55:36, 674.44s/it]

Epoch 142 finished ! Training Loss: 0.1897



  3%|▎         | 143/4999 [26:57:39<906:34:41, 672.09s/it]

Epoch 143 finished ! Training Loss: 0.2222



  3%|▎         | 144/4999 [27:08:52<906:46:42, 672.38s/it]

Epoch 144 finished ! Training Loss: 0.1937

Epoch 145 finished ! Training Loss: 0.1794

------- 1st valloss=0.2391



  3%|▎         | 145/4999 [27:20:34<918:32:01, 681.24s/it]

Checkpoint 145 saved !


  3%|▎         | 146/4999 [27:31:49<915:53:42, 679.42s/it]

Epoch 146 finished ! Training Loss: 0.1954



  3%|▎         | 147/4999 [27:42:52<909:05:54, 674.52s/it]

Epoch 147 finished ! Training Loss: 0.2179



  3%|▎         | 148/4999 [27:53:50<902:09:51, 669.51s/it]

Epoch 148 finished ! Training Loss: 0.2073



  3%|▎         | 149/4999 [28:05:05<904:11:23, 671.15s/it]

Epoch 149 finished ! Training Loss: 0.2114

Epoch 150 finished ! Training Loss: 0.2257

------- 1st valloss=0.6763



  3%|▎         | 150/4999 [28:17:16<928:11:25, 689.11s/it]

Checkpoint 150 saved !


  3%|▎         | 151/4999 [28:28:16<916:10:07, 680.32s/it]

Epoch 151 finished ! Training Loss: 0.2074



  3%|▎         | 152/4999 [28:39:13<906:36:24, 673.36s/it]

Epoch 152 finished ! Training Loss: 0.2121



  3%|▎         | 153/4999 [28:50:10<899:42:57, 668.38s/it]

Epoch 153 finished ! Training Loss: 0.1958



  3%|▎         | 154/4999 [29:01:21<900:40:57, 669.24s/it]

Epoch 154 finished ! Training Loss: 0.2106

Epoch 155 finished ! Training Loss: 0.1971

------- 1st valloss=0.1919



  3%|▎         | 155/4999 [29:13:17<919:29:32, 683.36s/it]

Checkpoint 155 saved !


  3%|▎         | 156/4999 [29:24:09<906:28:05, 673.81s/it]

Epoch 156 finished ! Training Loss: 0.2141



  3%|▎         | 157/4999 [29:35:24<906:44:00, 674.15s/it]

Epoch 157 finished ! Training Loss: 0.2104



  3%|▎         | 158/4999 [29:46:37<906:11:48, 673.89s/it]

Epoch 158 finished ! Training Loss: 0.2109



  3%|▎         | 159/4999 [29:57:42<902:28:06, 671.26s/it]

Epoch 159 finished ! Training Loss: 0.1971

Epoch 160 finished ! Training Loss: 0.1960

------- 1st valloss=0.0885

0.0885373302128004 less than 0.09323392063379288


  3%|▎         | 160/4999 [30:09:40<921:08:30, 685.29s/it]

Checkpoint 160 saved !


  3%|▎         | 161/4999 [30:20:37<909:22:17, 676.67s/it]

Epoch 161 finished ! Training Loss: 0.1845



  3%|▎         | 162/4999 [30:31:32<900:41:54, 670.36s/it]

Epoch 162 finished ! Training Loss: 0.1966



  3%|▎         | 163/4999 [30:42:54<905:00:50, 673.71s/it]

Epoch 163 finished ! Training Loss: 0.2182



  3%|▎         | 164/4999 [30:54:13<907:03:30, 675.37s/it]

Epoch 164 finished ! Training Loss: 0.2037

Epoch 165 finished ! Training Loss: 0.2010

------- 1st valloss=0.6513



  3%|▎         | 165/4999 [31:06:02<920:33:03, 685.56s/it]

Checkpoint 165 saved !


  3%|▎         | 166/4999 [31:17:18<916:29:27, 682.67s/it]

Epoch 166 finished ! Training Loss: 0.2099



  3%|▎         | 167/4999 [31:28:26<910:17:37, 678.20s/it]

Epoch 167 finished ! Training Loss: 0.1934



  3%|▎         | 168/4999 [31:39:35<906:21:27, 675.41s/it]

Epoch 168 finished ! Training Loss: 0.2024



  3%|▎         | 169/4999 [31:50:49<905:29:36, 674.90s/it]

Epoch 169 finished ! Training Loss: 0.2178

Epoch 170 finished ! Training Loss: 0.2094

------- 1st valloss=0.1138



  3%|▎         | 170/4999 [32:02:40<919:51:19, 685.75s/it]

Checkpoint 170 saved !


  3%|▎         | 171/4999 [32:13:28<904:42:00, 674.59s/it]

Epoch 171 finished ! Training Loss: 0.2139



  3%|▎         | 172/4999 [32:24:33<900:19:35, 671.47s/it]

Epoch 172 finished ! Training Loss: 0.2142



  3%|▎         | 173/4999 [32:35:31<895:03:20, 667.68s/it]

Epoch 173 finished ! Training Loss: 0.2079



  3%|▎         | 174/4999 [32:46:36<893:51:15, 666.92s/it]

Epoch 174 finished ! Training Loss: 0.2069

Epoch 175 finished ! Training Loss: 0.1954

------- 1st valloss=0.6793



  4%|▎         | 175/4999 [32:58:23<909:33:25, 678.77s/it]

Checkpoint 175 saved !


  4%|▎         | 176/4999 [33:09:42<909:21:35, 678.77s/it]

Epoch 176 finished ! Training Loss: 0.2202



  4%|▎         | 177/4999 [33:20:48<903:59:59, 674.91s/it]

Epoch 177 finished ! Training Loss: 0.2176



In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
            # move data to device, convert dtype to desirable dtype

        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)