In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from dense_vnet.DenseVNet import DenseVNet
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2 

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device("cuda:0")
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = get_full_resolution_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)]))
# do data augumentation on train dataset

validation_dataset = get_full_resolution_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
from bv_refinement_network.RefinementModel import RefinementModel, RefinementModel_NoDown

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    
refine_model = RefinementModel(num_classes=1)
refine_model = nn.DataParallel(refine_model)
refine_model = convert_model(refine_model)
refine_model = refine_model.to(device, dtype)

optimizer = optim.Adam(refine_model.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=25)

deeplab = DeepLab(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

checkpoint = torch.load('../deeplab_dilated_save/2019-08-10 09:28:43.844872 epoch: 1160.pth') # best one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

epoch = 0

Let's use 2 GPUs!


In [None]:
'''
test_dictionary = train_dataset[33]

image_1 = test_dictionary['image1_data'].view(1, 1, 256, 256, 256)
label_1 = test_dictionary['image1_label'].view(1, 3, 256, 256, 256)
bv_label = label_1.narrow(1,2,1).to(device, dtype)
if get_dimensions(bv_label) == 4:
    bv_label.unsqueeze_(0)

image_1 = image_1.to(device=device, dtype=dtype) 
label_1 = label_1.to(device=device, dtype=dtype)
'''

"\ntest_dictionary = train_dataset[33]\n\nimage_1 = test_dictionary['image1_data'].view(1, 1, 256, 256, 256)\nlabel_1 = test_dictionary['image1_label'].view(1, 3, 256, 256, 256)\nbv_label = label_1.narrow(1,2,1).to(device, dtype)\nif get_dimensions(bv_label) == 4:\n    bv_label.unsqueeze_(0)\n\nimage_1 = image_1.to(device=device, dtype=dtype) \nlabel_1 = label_1.to(device=device, dtype=dtype)\n"

In [None]:
def get_bboxes(image, label, output, batchsize):
    image_final = torch.zeros((batchsize, 1, 128, 128, 128)).to(device, dtype)
    label_final = torch.zeros((batchsize, 1, 128, 128, 128)).to(device, dtype)
    output_final = torch.zeros((batchsize, 1, 128, 128, 128)).to(device, dtype)
    for b in range(batchsize):
        out = output[b]
        x, y, z = find_bv_centroid(binarize_output(out))
        #x,y,z = loadbvcenter(binarize_output(out))
        x, y, z = np.clip([x, y, z], a_min=64, a_max=192)
        x1 = max(x-64, 0)
        x2 = min(x+64, 256)
        y1 = max(y-64, 0)
        y2 = min(y+64, 256)
        z1 = max(z-64, 0)
        z2 = min(z+64, 256)
        image_final[b] = image[b, :, x1:x2, y1:y2, z1:z2]
        label_final[b] = label[b, :, x1:x2, y1:y2, z1:z2]
        output_final[b] = output[b, :, x1:x2, y1:y2, z1:z2]
    return image_final, label_final, output_final

In [None]:
epochs = 5000

record = open('train_bv_refine_5.txt','a+')

logger = {'train':[], 'validation_1': []}

min_val = 1

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches
    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
        refine_model.train()
        deeplab.eval()
        # Set the model flag to train
        # 1. enable dropout
        # 2. batchnorm behave differently in train and test
        #print(batch['image1_data'])
        image_1 = batch['image1_data'].to(device=device, dtype=dtype)
        image_1 = image_1.view(BATCH_SIZE,1,256,256,256)

        label_1 = batch['image1_label'].to(device=device, dtype=dtype)
        label_1 = label_1.view(BATCH_SIZE,3,256,256,256)

        bv_label = label_1[:, 2, :, :, :]
        bv_label = bv_label.view(BATCH_SIZE,1,256,256,256)

        #original_res = [a[minibatch].item() for a in batch['original_resolution']]

        # Get coarse output from deeplab model from 256 resolution input
        out_coarse = deeplab(image_1)
        out_coarse = out_coarse.view(BATCH_SIZE,3,256,256,256)

        bv_coarse = out_coarse[:, 2, :, :, :]
        bv_coarse = bv_coarse.view(BATCH_SIZE,1,256,256,256)

        bbox_image, bbox_label, bbox_bv = get_bboxes(image_1, bv_label, bv_coarse, BATCH_SIZE)
        #bbox_image, bbox_label, bbox_bv = get_bboxes(image_1, bv_label, bv_coarse, BATCH_SIZE)
        
        bbox_concat = torch.cat([bbox_image, bbox_bv], dim=1)
        
        #show_image_slice(label_1)
        #show_image_slice(bv_label)
        #show_image_slice(bbox_image)
        #show_image_slice(bbox_label)
        #show_image_slice(bbox_bv)

        #print("bbox_concat", bbox_concat.shape)

        del out_coarse
        del image_1
        del bv_coarse
        del label_1
        del bv_label
        del bbox_image
        del bbox_bv

        torch.cuda.empty_cache()

        refine_out = refine_model(bbox_concat)
        # do the inference

        #print(refine_out.shape)
        #print(bbox_bv_label.shape)

        loss = dice_loss(refine_out, bbox_label)
        
        print(loss)
        epoch_loss += loss.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        # take a gradient descent step
        torch.cuda.empty_cache()
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()

    if e%2 == 0:
    # do validation every 5 epoches
        deeplab.eval()
        refine_model.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient
        
            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0
            
            for v, vbatch in enumerate(validation_loader):
                image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
                image_1 = image_1.view(BATCH_SIZE,1,256,256,256)

                label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)
                label_1 = label_1.view(BATCH_SIZE,3,256,256,256)

                bv_label = label_1[:, 2, :, :, :]
                bv_label = bv_label.view(BATCH_SIZE,1,256,256,256)

                #original_res = [a[minibatch].item() for a in batch['original_resolution']]

                # Get coarse output from deeplab model from 256 resolution input
                out_coarse = deeplab(image_1)
                out_coarse = out_coarse.view(BATCH_SIZE,3,256,256,256)

                bv_coarse = out_coarse[:, 2, :, :, :]
                bv_coarse = bv_coarse.view(BATCH_SIZE,1,256,256,256)

                bbox_image, bbox_label, bbox_bv = get_bboxes(image_1, bv_label, bv_coarse, BATCH_SIZE)

                bbox_concat = torch.cat([bbox_image, bbox_bv], dim=1)

                #show_image_slice(label)
                #show_image_slice(bbox_image)
                #show_image_slice(bbox_label)
                #show_image_slice(bbox_bv)

                #print("bbox_concat", bbox_concat.shape)
                #seg_image_concat = torch.cat([bv_coarse, image_1], dim=1)

                del out_coarse
                del image_1
                del bv_coarse
                del label_1
                del bv_label
                del bbox_image
                del bbox_bv

                torch.cuda.empty_cache()

                refine_out = refine_model(bbox_concat)
                        
                loss = dice_loss(refine_out, bbox_label)
                
                print(loss)
            
                # calculate loss
                valloss_1 += loss.item()
                
                torch.cuda.empty_cache()
                
            
            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'
            
            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)
            
            if avg_val_loss < min_val:
                save_1('refine_bv5_save', refine_model, optimizer, logger, e, scheduler)
            elif e % 10 == 0:
                save_1('refine_bv5_save', refine_model, optimizer, logger, e, scheduler)
            
            torch.cuda.empty_cache()
            
            print(outstr)
            record.write(outstr)
            record.flush()
    


record.close()

  0%|          | 0/4999 [00:00<?, ?it/s]

tensor(0.9966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9878, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8701, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6249, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6811, device='cuda:0',

  0%|          | 1/4999 [12:49<1068:37:03, 769.71s/it]

Epoch 1 finished ! Training Loss: 0.7244

tensor(0.5967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5123, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1781, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5242, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6567, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6049, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3090, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5835, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7677, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2983, device='cuda:0', grad_fn=<RsubB

  0%|          | 2/4999 [25:29<1064:07:52, 766.63s/it]

Checkpoint 2 saved !
------- 1st valloss=0.3238

tensor(0.2122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6276, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5902, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8416, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6334, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2704, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4291, device='cuda:0', grad_fn

  0%|          | 3/4999 [37:10<1036:44:36, 747.05s/it]

Epoch 3 finished ! Training Loss: 0.5693

tensor(0.4445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6721, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4120, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6682, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2807, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackw

  0%|          | 4/4999 [49:51<1042:18:58, 751.22s/it]

Checkpoint 4 saved !
------- 1st valloss=0.4669

tensor(0.6813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3139, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1729, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4500, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6617, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3288, device='cuda:0', grad_fn=<Rs

  0%|          | 5/4999 [1:01:32<1021:08:54, 736.11s/it]

Epoch 5 finished ! Training Loss: 0.5938

tensor(0.3196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6498, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2704, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2912, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2589, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4477, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7599, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6239, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5968, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6477, device='cuda:0', grad_fn=<RsubB

  0%|          | 6/4999 [1:14:12<1030:48:24, 743.22s/it]

Checkpoint 6 saved !
------- 1st valloss=0.3145

tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3359, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3497, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2114, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2821, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6256, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6756, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6481, device='cuda:0', grad_fn

  0%|          | 7/4999 [1:25:53<1013:00:18, 730.53s/it]

Epoch 7 finished ! Training Loss: 0.5734

tensor(0.5853, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9835, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3232, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6370, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5437, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5284, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7132, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7262, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6039, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8380, device='cuda:0', grad_fn=<RsubBackw

  0%|          | 8/4999 [1:38:32<1024:39:19, 739.08s/it]

Checkpoint 8 saved !
------- 1st valloss=0.3299

tensor(0.4288, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4239, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4157, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3281, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3167, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6441, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6695, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2819, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3231, device='cuda:0', grad_fn=<Rs

  0%|          | 9/4999 [1:50:22<1012:39:38, 730.58s/it]

Epoch 9 finished ! Training Loss: 0.5544

tensor(0.2891, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5922, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6320, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4346, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7892, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6381, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6186, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3199, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubB

  0%|          | 10/4999 [2:03:04<1025:18:37, 739.85s/it]

Checkpoint 10 saved !
------- 1st valloss=0.3237

tensor(0.3721, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7161, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2261, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3040, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6202, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2662, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6576, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6183, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2752, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3057, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8348, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6022, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5791, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2724, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6158, device='cuda:0', grad_f

  0%|          | 11/4999 [2:14:47<1009:56:34, 728.91s/it]

Epoch 11 finished ! Training Loss: 0.5192

tensor(0.6685, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3641, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3679, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6670, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9846, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6033, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1487, device='cuda:0', grad_fn=<RsubBack

  0%|          | 12/4999 [2:27:24<1021:20:34, 737.28s/it]

Checkpoint 12 saved !
------- 1st valloss=0.3147

tensor(0.3056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6529, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3229, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6809, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2715, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9884, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6418, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2390, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6537, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8367, device='cuda:0', grad_fn=<R

  0%|          | 13/4999 [2:39:05<1006:04:59, 726.41s/it]

Epoch 13 finished ! Training Loss: 0.5578

tensor(0.5108, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3154, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7825, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3828, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2752, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2180, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4980, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3968, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5764, device='cuda:0', grad_fn=<RsubBack

  0%|          | 14/4999 [2:51:44<1019:16:51, 736.09s/it]

Checkpoint 14 saved !
------- 1st valloss=0.2824

tensor(0.6181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6900, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6030, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2206, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1777, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5753, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2102, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5713, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3148, device='cuda:0', grad_fn=<RsubB

  0%|          | 15/4999 [3:03:30<1006:48:36, 727.23s/it]

Epoch 15 finished ! Training Loss: 0.4950

tensor(0.3074, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2227, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5562, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4826, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5648, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5954, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2692, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5779, device='cuda:0', grad_fn=<Rsub

  0%|          | 16/4999 [3:16:14<1021:45:07, 738.17s/it]

Checkpoint 16 saved !
------- 1st valloss=0.2805

tensor(0.2619, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1818, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6128, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3032, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3265, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2226, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3142, device='cuda:0', grad_f

  0%|          | 17/4999 [3:27:52<1005:03:19, 726.25s/it]

Epoch 17 finished ! Training Loss: 0.5115

tensor(0.6856, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6858, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7990, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)


In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
        # move data to device, convert dtype to desirable dtype
        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()
        
        if bv.item() >= 0.2 or bd.item() >= 0.1:
            show_image_slice(image_1)
            show_image_slice(label_1)
            show_image_slice(output)

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)