In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from dense_vnet.DenseVNet import DenseVNet
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 1

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device("cuda:0")
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = get_full_resolution_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)]))
# do data augumentation on train dataset

validation_dataset = get_full_resolution_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
from bv_refinement_network.RefinementModel import RefinementModel, RefinementModel_NoDown

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    
refine_model = RefinementModel_NoDown(num_classes=1)
refine_model = nn.DataParallel(refine_model)
refine_model = convert_model(refine_model)
refine_model = refine_model.to(device, dtype)

optimizer = optim.Adam(refine_model.parameters(), lr=1e-2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

deeplab = DeepLab(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

checkpoint = torch.load('../deeplab_dilated_save/2019-08-10 09:28:43.844872 epoch: 1160.pth') # best one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

epoch = 0

In [None]:
'''
test_dictionary = train_dataset[33]

image_1 = test_dictionary['image1_data'].view(1, 1, 256, 256, 256)
label_1 = test_dictionary['image1_label'].view(1, 3, 256, 256, 256)
bv_label = label_1.narrow(1,2,1).to(device, dtype)
if get_dimensions(bv_label) == 4:
    bv_label.unsqueeze_(0)

image_1 = image_1.to(device=device, dtype=dtype) 
label_1 = label_1.to(device=device, dtype=dtype)

deeplab.eval()

out_coarse = deeplab(image_1)
bv_coarse = out_coarse.narrow(1,2,1)

if get_dimensions(bv_coarse) == 4:
    bv_coarse.unsqueeze_(0)

seg_image_concat = torch.cat([bv_coarse, image_1], dim=1)
print(bv_label.shape)
print(bv_coarse.shape)
print(dice_loss(bv_coarse, bv_label))
print(seg_image_concat.shape)
'''

"\ntest_dictionary = train_dataset[33]\n\nimage_1 = test_dictionary['image1_data'].view(1, 1, 256, 256, 256)\nlabel_1 = test_dictionary['image1_label'].view(1, 3, 256, 256, 256)\nbv_label = label_1.narrow(1,2,1).to(device, dtype)\nif get_dimensions(bv_label) == 4:\n    bv_label.unsqueeze_(0)\n\nimage_1 = image_1.to(device=device, dtype=dtype) \nlabel_1 = label_1.to(device=device, dtype=dtype)\n\ndeeplab.eval()\n\nout_coarse = deeplab(image_1)\nbv_coarse = out_coarse.narrow(1,2,1)\n\nif get_dimensions(bv_coarse) == 4:\n    bv_coarse.unsqueeze_(0)\n\nseg_image_concat = torch.cat([bv_coarse, image_1], dim=1)\nprint(bv_label.shape)\nprint(bv_coarse.shape)\nprint(dice_loss(bv_coarse, bv_label))\nprint(seg_image_concat.shape)\n"

In [None]:
epochs = 5000

record = open('train_bv_refine_nodown_resize.txt','a+')

logger = {'train':[], 'validation_1': []}

min_val = 1

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches
    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
        #print(batch.shape)
        training_losses = []
        for minibatch in range(BATCH_SIZE):
            refine_model.train()
            deeplab.eval()
            # Set the model flag to train
            # 1. enable dropout
            # 2. batchnorm behave differently in train and test
            #print(batch['image1_data'])
            image_1 = batch['image1_data'][minibatch].to(device=device, dtype=dtype)
            image_1 = image_1.view(1,1,256,256,256)
            
            label_1 = batch['image1_label'][minibatch].to(device=device, dtype=dtype)
            label_1 = label_1.view(1,3,256,256,256)

            bv_label = label_1[:, 2, :, :, :]
            bv_label = bv_label.view(1,1,256,256,256)

            original_res = [a[minibatch].item() for a in batch['original_resolution']]
    
            # Get coarse output from deeplab model from 256 resolution input
            out_coarse = deeplab(image_1)
            out_coarse = out_coarse.view(1,3,256,256,256)

            bv_coarse = out_coarse[:, 2, :, :, :]
            bv_coarse = bv_coarse.view(1,1,256,256,256)
            bv_coarse_binarized = binarize_output(bv_coarse)
            
            bv_x, bv_y, bv_z = loadbvcenter(bv_label.view(1, 256, 256, 256))
            bbox_bv_label = bv_label.view(256, 256, 256)[bv_x-64:bv_x+64, bv_y-64:bv_y+64, bv_z-64:bv_z+64]
            bbox_bv_label = reshape_image(bbox_bv_label.squeeze(), 128, 128, 128).to(device, dtype)
            bbox_bv_label = bbox_bv_label.view(1,1,128,128,128)
            
            pred_x, pred_y, pred_z = loadbvcenter(bv_coarse_binarized.view(1,256,256,256))
            bbox_bv = bv_coarse.view(256, 256, 256)[pred_x-64:pred_x+64, pred_y-64:pred_y+64, pred_z-64:pred_z+64]
            bbox_bv = reshape_image(bbox_bv.squeeze(), 128, 128, 128).to(device, dtype)
            bbox_bv = bbox_bv.view(1,1,128,128,128)

            #bbox_image = get_bounding_box_image(image_1, (256,256,256)).to(device, dtype)
            bbox_image = image_1[:, :, pred_x-64:pred_x+64, pred_y-64:pred_y+64, pred_z-64:pred_z+64]
            bbox_image = reshape_image(bbox_image.squeeze(), 128, 128, 128).to(device, dtype)
            bbox_image = bbox_image.view(1, 1, 128, 128, 128)
            #print(bbox_image.shape)

            #show_image_slice(image_1)
            #show_image_slice(image_1_resize)
            #show_image_slice(label_1)
            #show_image_slice(bv_label)
            #show_image_slice(bv_label_resize)
            #show_image_slice(out_coarse)
            #show_image_slice(bv_coarse)
            #show_image_slice(bv_coarse_resize)
            #show_image_slice(bbox_bv_label)
            #show_image_slice(bbox_bv)
            #show_image_slice(bbox_image)

            bbox_concat = torch.cat([bbox_bv, bbox_image], dim=1)

            refine_out = refine_model(bbox_concat)
            loss = dice_loss(refine_out, bbox_bv_label)
            training_losses.append(loss)
            
            torch.cuda.empty_cache()
            
        # calculate loss
        avg_loss = sum(training_losses) / BATCH_SIZE
        training_losses = []
        
        print(avg_loss)
        epoch_loss += avg_loss.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        avg_loss.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        # take a gradient descent step
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()

    if (e <= 100 and e%5 == 0) or (e > 100 and e%2 == 0):
    # do validation every 5 epoches
        deeplab.eval()
        refine_model.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient
        
            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0
            
            for v, vbatch in enumerate(validation_loader):
            # iter over validation mini batches
                val_losses = []
                for minibatch in range(BATCH_SIZE):
                    image_1 = vbatch['image1_data'][minibatch].to(device=device, dtype=dtype)
                    image_1 = image_1.view(1,1,256,256,256)

                    label_1 = vbatch['image1_label'][minibatch].to(device=device, dtype=dtype)
                    label_1 = label_1.view(1,3,256,256,256)

                    bv_label = label_1[:, 2, :, :, :]
                    bv_label = bv_label.view(1,1,256,256,256)

                    original_res = [a[minibatch].item() for a in vbatch['original_resolution']]

                    # Get coarse output from deeplab model from 256 resolution input
                    out_coarse = deeplab(image_1)
                    out_coarse = out_coarse.view(1,3,256,256,256)

                    bv_coarse = out_coarse[:, 2, :, :, :]
                    bv_coarse = bv_coarse.view(1,1,256,256,256)
                    bv_coarse_binarized = binarize_output(bv_coarse)

                    bv_x, bv_y, bv_z = loadbvcenter(bv_label.view(1, 256, 256, 256))
                    bbox_bv_label = bv_label.view(256, 256, 256)[bv_x-64:bv_x+64, bv_y-64:bv_y+64, bv_z-64:bv_z+64]
                    bbox_bv_label = reshape_image(bbox_bv_label.squeeze(), 128, 128, 128).to(device, dtype)
                    bbox_bv_label = bbox_bv_label.view(1,1,128,128,128)
                    
                    pred_x, pred_y, pred_z = loadbvcenter(bv_coarse_binarized.view(1,256,256,256))
                    bbox_bv = bv_coarse.view(256, 256, 256)[pred_x-64:pred_x+64, pred_y-64:pred_y+64, pred_z-64:pred_z+64]
                    #print(bbox_bv.shape)
                    bbox_bv = reshape_image(bbox_bv.squeeze(), 128, 128, 128).to(device, dtype)
                    bbox_bv = bbox_bv.view(1,1,128,128,128)

                    #bbox_image = get_bounding_box_image(image_1, (256,256,256)).to(device, dtype)
                    bbox_image = image_1[:, :, pred_x-64:pred_x+64, pred_y-64:pred_y+64, pred_z-64:pred_z+64]
                    bbox_image = reshape_image(bbox_image.squeeze(), 128, 128, 128).to(device, dtype)
                    bbox_image = bbox_image.view(1, 1, 128, 128, 128)
                    #print(bbox_image.shape)

                    #show_image_slice(image_1)
                    #show_image_slice(image_1_resize)
                    #show_image_slice(label_1)
                    #show_image_slice(bv_label)
                    #show_image_slice(bv_label_resize)
                    #show_image_slice(out_coarse)
                    #show_image_slice(bv_coarse)
                    #show_image_slice(bv_coarse_resize)
                    #show_image_slice(bbox_bv_label)
                    #show_image_slice(bbox_bv)
                    #show_image_slice(bbox_image)

                    bbox_concat = torch.cat([bbox_bv, bbox_image], dim=1)

                    refine_out = refine_model(bbox_concat)
                        
                    loss = dice_loss(refine_out, bbox_bv_label)
                    val_losses.append(loss)
                
                avg_loss = sum(val_losses) / BATCH_SIZE
                val_losses = []
                print(avg_loss)
            
                # calculate loss
                valloss_1 += avg_loss.item()
                torch.cuda.empty_cache()
            
            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'
            
            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)
            
            if avg_val_loss < min_val:
                save_1('refine_bv_nodown_resize_save', refine_model, optimizer, logger, e, scheduler)
            elif e % 10 == 0:
                save_1('refine_bv2_nodown_resize_save', refine_model, optimizer, logger, e, scheduler)
            
            print(outstr)
            record.write(outstr)
            record.flush()
    


record.close()

  0%|          | 0/4999 [00:00<?, ?it/s]

tensor(0.9715, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9660, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8324, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9037, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8356, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8612, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8152, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9538, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8322, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7555, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7664, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6831, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8186, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7641, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7844, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7061, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)


  0%|          | 1/4999 [05:31<460:03:49, 331.38s/it]

Epoch 1 finished ! Training Loss: 0.8185

tensor(0.6249, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6841, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8223, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7385, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8501, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8137, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7100, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7892, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7846, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6606, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6718, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8127, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7502, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6670, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8400, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7350,

  0%|          | 2/4999 [10:56<457:33:25, 329.64s/it]

Epoch 2 finished ! Training Loss: 0.7746

tensor(0.8420, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7943, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6432, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7335, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8171, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7651, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6882, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7564, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7156, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7653, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6266, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7384, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8334, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6709, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8167, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6644, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7

  0%|          | 3/4999 [16:22<455:34:05, 328.27s/it]

Epoch 3 finished ! Training Loss: 0.7628

tensor(0.7515, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6530, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7679, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7310, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7680, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7029, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7881, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7961, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7698, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6800, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7879, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6580, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7848, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6359, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6449, dev

  0%|          | 4/4999 [21:49<455:03:16, 327.97s/it]

Epoch 4 finished ! Training Loss: 0.7654

tensor(0.7862, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7307, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7741, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7754, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8910, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7773, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8313, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6982, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8355, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7545, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6573, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5868, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7200, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7506, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8127, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8340, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7632, device='cuda:0', grad_fn=<DivBackward0>)
tensor

tensor(0.7399, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7194, device='cuda:0')
tensor(0.8208, device='cuda:0')
tensor(0.7126, device='cuda:0')
tensor(0.6367, device='cuda:0')
tensor(0.7681, device='cuda:0')
tensor(0.7440, device='cuda:0')
tensor(0.7228, device='cuda:0')
tensor(0.7349, device='cuda:0')
tensor(0.7034, device='cuda:0')
tensor(0.7377, device='cuda:0')
tensor(0.6684, device='cuda:0')
tensor(0.8053, device='cuda:0')
tensor(0.7454, device='cuda:0')
tensor(0.7306, device='cuda:0')
tensor(0.6971, device='cuda:0')
tensor(0.6808, device='cuda:0')
tensor(0.7697, device='cuda:0')
tensor(0.8633, device='cuda:0')
tensor(0.6303, device='cuda:0')
tensor(0.6783, device='cuda:0')
tensor(0.6702, device='cuda:0')
tensor(0.7668, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7692, device='cuda:0')
tensor(0.7195, device='cuda:0')
tensor(0.7475, device='cuda:0')
tensor(0.7383, device='cuda:0')


  0%|          | 5/4999 [28:17<479:58:01, 345.99s/it]

tensor(0.8036, device='cuda:0')
Checkpoint 5 saved !
------- 1st valloss=0.7452

tensor(0.6674, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6924, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5961, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7490, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8306, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6198, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6660, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7730, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6716, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7400, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7723, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7064, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8837, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7722, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8110, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6533, device='cuda

  0%|          | 6/4999 [33:44<472:06:36, 340.40s/it]

Epoch 6 finished ! Training Loss: 0.7516

tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7781, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7616, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7185, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7176, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7096, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8270, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7108, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8013, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7567, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6828, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8248, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7306, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7572, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7597, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7569, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7469,

  0%|          | 7/4999 [39:13<467:01:43, 336.80s/it]

Epoch 7 finished ! Training Loss: 0.7570

tensor(0.7896, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7044, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8051, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6290, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6950, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7982, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6858, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7693, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7985, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7511, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8082, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6601, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6309, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7458, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6590, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8911, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7533, device='cuda:0', grad_fn=<DivBackward0>)
tensor

  0%|          | 8/4999 [44:40<463:07:38, 334.05s/it]

Epoch 8 finished ! Training Loss: 0.7652

tensor(0.8457, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7078, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6841, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6915, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8014, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6824, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6269, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7333, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7545, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7935, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7658, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6280, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6567, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7368, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6335, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7523,

  0%|          | 9/4999 [50:08<460:34:25, 332.28s/it]

Epoch 9 finished ! Training Loss: 0.7651

tensor(0.6657, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8498, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7985, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6668, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6870, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6571, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6696, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8148, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7933, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6664, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7955, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7441, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7922, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7304, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7513, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6399, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8

tensor(0.6950, device='cuda:0')
tensor(0.7522, device='cuda:0')
tensor(0.8105, device='cuda:0')
tensor(0.6993, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.6712, device='cuda:0')
tensor(0.7205, device='cuda:0')
tensor(0.7361, device='cuda:0')
tensor(0.7790, device='cuda:0')
tensor(0.7555, device='cuda:0')
tensor(0.6893, device='cuda:0')
tensor(0.8187, device='cuda:0')
tensor(0.7041, device='cuda:0')
tensor(0.8353, device='cuda:0')
tensor(0.7007, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7527, device='cuda:0')
tensor(0.7596, device='cuda:0')
tensor(0.6941, device='cuda:0')
tensor(0.6419, device='cuda:0')
tensor(0.6305, device='cuda:0')
tensor(0.8458, device='cuda:0')
tensor(0.6863, device='cuda:0')
tensor(0.8025, device='cuda:0')
tensor(0.6604, device='cuda:0')
tensor(0.7758, device='cuda:0')
tensor(0.7146, device='cuda:0')
tensor(0.7660, device='cuda:0')
tensor(0.6557, device='cuda:0')
tensor(0.8072, device='cuda:0')
tensor(0.6324, device='cuda:0')


  0%|          | 10/4999 [56:38<484:23:13, 349.53s/it]

Checkpoint 10 saved !
------- 1st valloss=0.7547

tensor(0.8604, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7739, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7920, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7689, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8186, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8127, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7125, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7914, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7791, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6655, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6274, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7587, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7242, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7172, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6580, device='cuda:0', grad_fn=<DivBackward0>)
tensor

  0%|          | 11/4999 [1:02:07<475:40:26, 343.31s/it]

Epoch 11 finished ! Training Loss: 0.7580

tensor(0.7025, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7978, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8174, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8534, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7343, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7166, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8060, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6410, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6984, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7672, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7328, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7790, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7830, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6238, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6936, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7135, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 12/4999 [1:07:33<468:25:13, 338.14s/it]

Epoch 12 finished ! Training Loss: 0.7586

tensor(0.7150, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6655, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7779, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8233, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8466, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7778, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8212, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6793, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6652, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8138, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7319, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7152, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7753, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6391, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7452, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6690, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 13/4999 [1:13:03<464:56:50, 335.70s/it]

Epoch 13 finished ! Training Loss: 0.7522

tensor(0.7653, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7191, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8335, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6258, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8525, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8531, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6739, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7908, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6125, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7716, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8338, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8163, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7469, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7370, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7285, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 14/4999 [1:18:30<461:18:05, 333.14s/it]

Epoch 14 finished ! Training Loss: 0.7605

tensor(0.6709, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7174, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7201, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7224, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8104, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7198, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6734, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6415, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7363, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6826, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7697, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6786, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7272, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7397, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6224, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6392, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7697, device='cuda:0', grad_fn=<DivBackward0>)
tenso

tensor(0.8741, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7605, device='cuda:0')
tensor(0.8506, device='cuda:0')
tensor(0.7730, device='cuda:0')
tensor(0.7123, device='cuda:0')
tensor(0.6442, device='cuda:0')
tensor(0.6710, device='cuda:0')
tensor(0.6931, device='cuda:0')
tensor(0.7602, device='cuda:0')
tensor(0.7404, device='cuda:0')
tensor(0.7265, device='cuda:0')
tensor(0.7933, device='cuda:0')
tensor(0.7878, device='cuda:0')
tensor(0.8055, device='cuda:0')
tensor(0.7611, device='cuda:0')
tensor(0.7351, device='cuda:0')
tensor(0.8175, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7063, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8255, device='cuda:0')
tensor(0.8382, device='cuda:0')
tensor(0.6890, device='cuda:0')
tensor(0.7282, device='cuda:0')
tensor(0.7424, device='cuda:0')
tensor(0.8285, device='cuda:0')
tensor(0.7048, device='cuda:0')
tensor(0.6995, device='cuda:0')
tensor(0.8562, device='cuda:0')
tensor(0.7107, device='cuda:0')


  0%|          | 15/4999 [1:25:02<485:28:24, 350.66s/it]

Checkpoint 15 saved !
------- 1st valloss=0.7692

tensor(0.8101, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7334, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7781, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7878, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7641, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7517, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6729, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7253, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8034, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7332, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6058, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7448, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6484, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7064, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7738, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8033, device='cuda:0', grad_fn=<DivBackward0>)
te

  0%|          | 16/4999 [1:30:29<475:39:58, 343.65s/it]

Epoch 16 finished ! Training Loss: 0.7586

tensor(0.6607, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8188, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6260, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7818, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6621, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7905, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6638, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7224, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6776, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6624, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6976, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7347, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7420, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7462, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7633, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6826, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6429, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  0%|          | 17/4999 [1:35:57<469:05:37, 338.97s/it]

Epoch 17 finished ! Training Loss: 0.7495

tensor(0.8210, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6214, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7753, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6791, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6767, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7667, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7897, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8459, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8047, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8154, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7396, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6439, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8032, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6693, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7269, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7640, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 18/4999 [1:41:24<464:02:31, 335.38s/it]

Epoch 18 finished ! Training Loss: 0.7553

tensor(0.6427, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7741, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6651, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7122, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7536, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6386, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6927, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7317, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8253, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6843, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7602, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8228, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7153, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6474, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6446, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7888, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 19/4999 [1:46:52<460:51:35, 333.15s/it]

Epoch 19 finished ! Training Loss: 0.7576

tensor(0.8080, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6945, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7632, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5968, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6846, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7691, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7980, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6893, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7238, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6604, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6577, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6567, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7664, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6393, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7816, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7069, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

tensor(0.8141, device='cuda:0')
tensor(0.7424, device='cuda:0')
tensor(0.7982, device='cuda:0')
tensor(0.8067, device='cuda:0')
tensor(0.8219, device='cuda:0')
tensor(0.8448, device='cuda:0')
tensor(0.6871, device='cuda:0')
tensor(0.8527, device='cuda:0')
tensor(0.7757, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8339, device='cuda:0')
tensor(0.7843, device='cuda:0')
tensor(0.7716, device='cuda:0')
tensor(0.8435, device='cuda:0')
tensor(0.7682, device='cuda:0')
tensor(0.8284, device='cuda:0')
tensor(0.8145, device='cuda:0')
tensor(0.7448, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8736, device='cuda:0')
tensor(0.8463, device='cuda:0')
tensor(0.7661, device='cuda:0')
tensor(0.8489, device='cuda:0')
tensor(0.7513, device='cuda:0')
tensor(0.7291, device='cuda:0')
tensor(0.8438, device='cuda:0')
tensor(0.7417, device='cuda:0')
tensor(0.8070, device='cuda:0')
tensor(0.8564, device='cuda:0')
tensor(0.7895, device='cuda:0')
tensor(0.7446, device='cuda:0')


  0%|          | 20/4999 [1:53:24<485:08:16, 350.77s/it]

Checkpoint 20 saved !
------- 1st valloss=0.8106

tensor(0.7813, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7696, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7267, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6686, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7004, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6712, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7450, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8263, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8184, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6288, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7625, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6637, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7264, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6094, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7

  0%|          | 21/4999 [1:58:50<474:58:37, 343.49s/it]

Epoch 21 finished ! Training Loss: 0.7639

tensor(0.6899, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7225, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7852, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7822, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7548, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7382, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8069, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7978, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6908, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7989, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7638, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6682, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7197, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7415, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7061, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7020, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8312, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  0%|          | 22/4999 [2:04:17<467:53:08, 338.43s/it]

Epoch 22 finished ! Training Loss: 0.7563

tensor(0.7503, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8111, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8375, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7731, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7686, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6672, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7717, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7966, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7540, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7815, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7976, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7736, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6571, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7331, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6216, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6828, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7888, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  0%|          | 23/4999 [2:09:45<463:36:40, 335.41s/it]

Epoch 23 finished ! Training Loss: 0.7603

tensor(0.7468, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7336, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8145, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7726, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6364, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6438, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6914, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6404, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6816, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7438, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7318, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6693, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7658, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8753, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8192, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6608, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  0%|          | 24/4999 [2:15:12<459:59:48, 332.86s/it]

Epoch 24 finished ! Training Loss: 0.7535

tensor(0.7392, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7184, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8166, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8140, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6564, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7353, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6987, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7652, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7311, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6910, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6955, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8093, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6590, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6985, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6768, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7744, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

tensor(0.7734, device='cuda:0')
tensor(0.7514, device='cuda:0')
tensor(0.7195, device='cuda:0')
tensor(0.8260, device='cuda:0')
tensor(0.7071, device='cuda:0')
tensor(0.7116, device='cuda:0')
tensor(0.8015, device='cuda:0')
tensor(0.8100, device='cuda:0')
tensor(0.6872, device='cuda:0')
tensor(0.7428, device='cuda:0')
tensor(0.8025, device='cuda:0')
tensor(0.7596, device='cuda:0')
tensor(0.8511, device='cuda:0')
tensor(0.7784, device='cuda:0')
tensor(0.8193, device='cuda:0')
tensor(0.7418, device='cuda:0')
tensor(0.7124, device='cuda:0')
tensor(0.8043, device='cuda:0')
tensor(0.7537, device='cuda:0')
tensor(0.7693, device='cuda:0')
tensor(0.6850, device='cuda:0')
tensor(0.7228, device='cuda:0')
tensor(0.7116, device='cuda:0')
tensor(0.7895, device='cuda:0')
tensor(0.6949, device='cuda:0')
tensor(0.7251, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7712, device='cuda:0')
tensor(0.8095, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8299, devic

  1%|          | 25/4999 [2:21:42<483:29:02, 349.93s/it]

Checkpoint 25 saved !
------- 1st valloss=0.7860

tensor(0.7384, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7428, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8767, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6745, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6845, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7626, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7615, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7575, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6270, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6604, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7342, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7691, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7103, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6680, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7609, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6500, device='cuda:0', grad_fn=<DivBackward0>)
te

  1%|          | 26/4999 [2:27:10<474:03:56, 343.18s/it]

Epoch 26 finished ! Training Loss: 0.7560

tensor(0.6687, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6316, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7404, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6112, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7635, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6465, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7211, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7841, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7629, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7072, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7556, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7086, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8198, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7449, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6489, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7989, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7730, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  1%|          | 27/4999 [2:32:39<468:17:42, 339.07s/it]

Epoch 27 finished ! Training Loss: 0.7570

tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8175, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7863, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7692, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6819, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7581, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7708, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7919, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6715, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7370, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8369, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7879, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7280, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6840, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8011, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6474, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7699, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.

  1%|          | 28/4999 [2:38:08<464:09:53, 336.15s/it]

Epoch 28 finished ! Training Loss: 0.7578

tensor(0.7740, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6714, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8565, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6500, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7873, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8193, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7308, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7316, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7771, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6005, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7085, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7670, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6552, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7236, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7783, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6608

  1%|          | 29/4999 [2:43:38<461:30:37, 334.29s/it]

Epoch 29 finished ! Training Loss: 0.7492

tensor(0.7521, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7286, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6815, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7813, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7636, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6440, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6219, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6403, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6795, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7768, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6704, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6575, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7550, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8909, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8732, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8090

tensor(0.6938, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.6971, device='cuda:0')
tensor(0.7132, device='cuda:0')
tensor(0.7489, device='cuda:0')
tensor(0.7643, device='cuda:0')
tensor(0.6841, device='cuda:0')
tensor(0.6879, device='cuda:0')
tensor(0.7275, device='cuda:0')
tensor(0.8220, device='cuda:0')
tensor(0.7440, device='cuda:0')
tensor(0.7743, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7126, device='cuda:0')
tensor(0.7041, device='cuda:0')
tensor(0.7284, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8196, device='cuda:0')
tensor(0.7504, device='cuda:0')
tensor(0.6615, device='cuda:0')
tensor(0.7698, device='cuda:0')
tensor(0.6963, device='cuda:0')
tensor(0.8214, device='cuda:0')
tensor(0.6646, device='cuda:0')
tensor(0.7614, device='cuda:0')
tensor(0.6611, device='cuda:0')
tensor(0.6731, device='cuda:0')
tensor(0.7341, device='cuda:0')
tensor(0.7442, device='cuda:0')
tensor(0.6560, device='cuda:0')
tensor(0.7493, device='cuda:0')
tensor(0.7558, devic

  1%|          | 30/4999 [2:50:08<484:31:58, 351.04s/it]

Checkpoint 30 saved !
------- 1st valloss=0.7561

tensor(0.6836, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6149, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8416, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7271, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8330, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7099, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7184, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7038, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7283, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6905, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7342, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7791, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7554, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6521, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7740, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6573, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
te

  1%|          | 31/4999 [2:55:34<474:06:04, 343.55s/it]

Epoch 31 finished ! Training Loss: 0.7491

tensor(0.5945, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6392, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6683, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7765, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6649, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7527, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7762, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7307, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8085, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7733, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6167, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6442, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6315, device='cuda:0', grad_fn=<DivBackward0>)


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.7454, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6393, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7827, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7524, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6412, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8230, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7873, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6881, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7257, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7395, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7357, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7160, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6255, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8234, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7105, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6377, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7327, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7827, device='cuda:0', grad_fn=<DivBack

  2%|▏         | 100/4999 [9:27:08<476:33:17, 350.19s/it]

Checkpoint 100 saved !
------- 1st valloss=0.8047

tensor(0.7742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8254, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6748, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6547, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7569, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8235, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7732, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6693, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7634, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6559, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6682, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7054, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6725, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7372, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6733, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6774, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
t

  2%|▏         | 101/4999 [9:32:36<467:18:12, 343.47s/it]

Epoch 101 finished ! Training Loss: 0.7562

tensor(0.8002, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7372, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6105, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7743, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7688, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7229, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7753, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6185, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6362, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6648, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6416, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6879, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8053, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6531, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7857, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8060, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7064, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(0.8826, device='cuda:0')
tensor(0.9997, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(0.7528, device='cuda:0')
tensor(0.7785, device='cuda:0')
tensor(0.9924, device='cuda:0')
tensor(0.9800, device='cuda:0')
tensor(0.9920, device='cuda:0')
tensor(0.7743, device='cuda:0')
tensor(0.9931, device='cuda:0')
tensor(0.9368, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7056, device='cuda:0')
tensor(0.9997, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8824, device='cuda:0')
tensor(0.9941, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9441, device='cuda:0')
tensor(0.8282, device='cuda:0')
tensor(0.9387, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(0.9941, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9828, device='cuda:0')
tensor(0.8113, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9936, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8138, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7846, device='cuda:0')


  2%|▏         | 102/4999 [9:39:06<486:27:53, 357.62s/it]

Checkpoint 102 saved !
------- 1st valloss=0.9365

tensor(0.6551, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7227, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6904, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6707, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6085, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7767, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7863, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6447, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7212, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7144, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7245, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6592, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7208, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6332, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6308, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6301, device='cuda:0', grad_fn=<DivBackward0>)
t

  2%|▏         | 103/4999 [9:44:33<473:41:23, 348.30s/it]

Epoch 103 finished ! Training Loss: 0.7562

tensor(0.8228, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7380, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7654, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7570, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6677, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7074, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7453, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7544, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6716, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7702, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7778, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7600, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8123, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6195, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6115, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6507, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7952, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(1., device='cuda:0')
tensor(0.9211, device='cuda:0')
tensor(0.8852, device='cuda:0')
tensor(0.8483, device='cuda:0')
tensor(0.8218, device='cuda:0')
tensor(0.8144, device='cuda:0')
tensor(0.7229, device='cuda:0')
tensor(0.8188, device='cuda:0')
tensor(0.7388, device='cuda:0')
tensor(0.8085, device='cuda:0')
tensor(0.7541, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8141, device='cuda:0')
tensor(0.8494, device='cuda:0')
tensor(0.6796, device='cuda:0')
tensor(0.7446, device='cuda:0')
tensor(0.8446, device='cuda:0')
tensor(0.6571, device='cuda:0')
tensor(0.7797, device='cuda:0')
tensor(0.8340, device='cuda:0')
tensor(0.6701, device='cuda:0')
tensor(0.7724, device='cuda:0')
tensor(0.7652, device='cuda:0')
tensor(0.8131, device='cuda:0')
tensor(0.7329, device='cuda:0')
tensor(0.7842, device='cuda:0')
tensor(0.8229, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8128, device='cuda:0')
tensor(0.8473, device='cuda:0')
tensor(0.6618, device='c

  2%|▏         | 104/4999 [9:51:06<491:54:05, 361.77s/it]

Checkpoint 104 saved !
------- 1st valloss=0.8028

tensor(0.7442, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8022, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7709, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6617, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7300, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7249, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6134, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6861, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7198, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7622, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6292, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7648, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6832, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6695, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6263, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7585, device='cuda:0', grad_fn=<DivBackward0>)
t

  2%|▏         | 105/4999 [9:56:35<478:30:07, 351.98s/it]

Epoch 105 finished ! Training Loss: 0.7463

tensor(0.6421, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7640, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7672, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6791, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7293, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6358, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7501, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8498, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6928, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7096, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6665, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5877, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7856, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6552, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7624, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6550, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0

tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8858, device='cuda:0')
tensor(0.8107, device='cuda:0')
tensor(0.6994, device='cuda:0')
tensor(0.8432, device='cuda:0')
tensor(0.7119, device='cuda:0')
tensor(0.8617, device='cuda:0')
tensor(0.7315, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.6996, device='cuda:0')
tensor(0.8818, device='cuda:0')
tensor(0.9173, device='cuda:0')
tensor(0.9990, device='cuda:0')
tensor(0.8496, device='cuda:0')
tensor(0.7864, device='cuda:0')
tensor(0.8073, device='cuda:0')
tensor(0.7390, device='cuda:0')
tensor(0.6943, device='cuda:0')
tensor(0.8196, device='cuda:0')
tensor(0.9280, device='cuda:0')
tensor(0.7676, device='cuda:0')
tensor(0.7843, device='cuda:0')
tensor(0.8162, device='cuda:0')
tensor(0.9329, device='cuda:0')
tensor(0.9985, device='cuda:0')
tensor(0.8752, device='cuda:0')
tensor(0.8706, device='cuda:0')
tensor(0.8571, device='cuda:0')
tensor(0.8094, device='c

  2%|▏         | 106/4999 [10:03:05<493:42:14, 363.24s/it]

Checkpoint 106 saved !
------- 1st valloss=0.8548

tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7015, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6925, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6401, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7911, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6451, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7655, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7777, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6260, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7714, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7428, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8007, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6779, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7878, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7511, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6799, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  2%|▏         | 107/4999 [10:08:35<480:03:59, 353.28s/it]

Epoch 107 finished ! Training Loss: 0.7443

tensor(0.7420, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7753, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8337, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7576, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6448, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6556, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6574, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6412, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6606, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7107, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6642, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6410, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6232, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6397, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6159, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6241, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6561, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(0.8234, device='cuda:0')
tensor(0.7570, device='cuda:0')
tensor(0.8598, device='cuda:0')
tensor(0.7866, device='cuda:0')
tensor(0.9581, device='cuda:0')
tensor(0.6945, device='cuda:0')
tensor(0.7589, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7881, device='cuda:0')
tensor(0.7130, device='cuda:0')
tensor(0.8547, device='cuda:0')
tensor(0.8752, device='cuda:0')
tensor(0.7243, device='cuda:0')
tensor(0.8108, device='cuda:0')
tensor(0.7696, device='cuda:0')
tensor(0.8347, device='cuda:0')
tensor(0.6940, device='cuda:0')
tensor(0.7633, device='cuda:0')
tensor(0.9091, device='cuda:0')
tensor(0.8881, device='cuda:0')
tensor(0.9166, device='cuda:0')
tensor(0.8694, device='cuda:0')
tensor(0.7749, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8475, device='cuda:0')
tensor(0.6862, device='cuda:0')
tensor(0.7186, device='cuda:0')
tensor(0.7072, device='cuda:0')
tensor(0.7189, device='cuda:0')
tensor(0.8759, device='cuda:0')
tensor(0.7276, devic

  2%|▏         | 108/4999 [10:15:09<496:47:01, 365.66s/it]

Checkpoint 108 saved !
------- 1st valloss=0.8204

tensor(0.7010, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6062, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7274, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7621, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6427, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6972, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6540, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7533, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7755, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7562, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7864, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6636, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6656, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8182, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7254, device='cuda:0', grad_fn=<DivBackward0>)
tenso

  2%|▏         | 109/4999 [10:20:35<480:17:54, 353.59s/it]

Epoch 109 finished ! Training Loss: 0.7436

tensor(0.7036, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7493, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6118, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6551, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7754, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6496, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7071, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6811, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8607, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7913, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7159, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6960, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8032, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7064, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7096, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6502, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0

tensor(0.7208, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9597, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9981, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8237, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9993, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7656, device='cuda:0')
tensor(0.9986, device='cuda:0')
tensor(0.9786, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9890, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(0.8907, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9941, device='cuda:0')
tensor(1.0000, device='cuda:0')
tensor(0.7093, device='cuda:0')
tensor(0.9995, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9572, device='cuda:0')
tensor(0.9816, device='cuda:0')


  2%|▏         | 110/4999 [10:27:06<495:40:47, 364.99s/it]

Checkpoint 110 saved !
------- 1st valloss=0.9524

tensor(0.7622, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7192, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7150, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6597, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7401, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7994, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7067, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6943, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6453, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7766, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6231, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6903, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8328, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6943, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6811, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7020, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6171, device='cuda:0', grad_fn=<DivBackward0

  2%|▏         | 111/4999 [10:32:35<480:36:37, 353.97s/it]

Epoch 111 finished ! Training Loss: 0.7393

tensor(0.7833, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7400, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6832, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7766, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7028, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6299, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6333, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7275, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8554, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7460, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6752, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6378, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7736, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7308, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7531, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6067, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7946, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(0.9801, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7731, device='cuda:0')
tensor(0.9947, device='cuda:0')
tensor(0.9856, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9174, device='cuda:0')
tensor(1., device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7632, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7601, device='cuda:0')
tensor(0.9999, device='cuda:0')
tensor(0.8533, device='cuda:0')
tensor(0.7845, device='cuda:0')
tensor(0.9595, device='cuda:0')
tensor(0.8368, device='cuda:0')
tensor(0.9961, device='cuda:0')
tensor(0.7628, device='cuda:0')
tensor(0.9456, device='cuda:0')
tensor(0.9999, device='cuda:0')
tensor(0.9102, device='cuda:0')
tensor(0.9813, device='cuda:0')
tensor(0.9895, device='cuda:0')
tensor(0.8866, device='cuda:0')
tensor(0.8238, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9801, device='cuda:0')
tensor(0.7384, device='cuda:0')
tensor(0.7952, device='cuda:0')
tensor(0.9923, device='cuda:0')


  2%|▏         | 112/4999 [10:39:06<495:46:37, 365.21s/it]

Checkpoint 112 saved !
------- 1st valloss=0.9272

tensor(0.7613, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6780, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7562, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7923, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8202, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6451, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7573, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7253, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8297, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7906, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6996, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7516, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6887, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6212, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7847, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7801, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
t

  2%|▏         | 113/4999 [10:44:36<481:09:59, 354.52s/it]

Epoch 113 finished ! Training Loss: 0.7367

tensor(0.8044, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7208, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8075, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7021, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6675, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7902, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6059, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7211, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7797, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6741, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6606, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6122, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7579, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8105, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6139, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7822, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6352, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(0.7098, device='cuda:0')
tensor(0.8961, device='cuda:0')
tensor(0.8719, device='cuda:0')
tensor(0.9887, device='cuda:0')
tensor(0.7617, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7106, device='cuda:0')
tensor(0.8128, device='cuda:0')
tensor(0.8489, device='cuda:0')
tensor(0.8642, device='cuda:0')
tensor(0.8211, device='cuda:0')
tensor(0.7784, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7734, device='cuda:0')
tensor(0.6695, device='cuda:0')
tensor(0.7727, device='cuda:0')
tensor(0.6857, device='cuda:0')
tensor(0.6998, device='cuda:0')
tensor(0.7539, device='cuda:0')
tensor(0.6993, device='cuda:0')
tensor(0.6909, device='cuda:0')
tensor(0.6980, device='cuda:0')
tensor(0.7694, device='cuda:0')
tensor(0.6514, device='cuda:0')
tensor(0.7628, device='cuda:0')
tensor(0.7649, device='cuda:0')
tensor(0.7841, device='cuda:0')
tensor(0.7813, device='cuda:0')
tensor(0.7626, device='cuda:0')
tensor(0.8245, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8762, devic

  2%|▏         | 114/4999 [10:51:09<496:58:33, 366.25s/it]

Checkpoint 114 saved !
------- 1st valloss=0.8046

tensor(0.7508, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8042, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6828, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7918, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6702, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7314, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7722, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6677, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7449, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7283, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6808, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6016, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7434, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7505, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7433, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6343, device='cuda:0', grad_fn=<DivBackward0>)
t

  2%|▏         | 115/4999 [10:56:38<481:38:21, 355.02s/it]

Epoch 115 finished ! Training Loss: 0.7351

tensor(0.7579, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8796, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7349, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7470, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6816, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6448, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7086, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7879, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6417, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7313, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6165, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8024, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6106, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6610, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6655, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7228, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6514, device='cuda:0', grad_fn=<DivBackward0>)
tens

tensor(1., device='cuda:0')
tensor(0.7757, device='cuda:0')
tensor(0.7613, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9275, device='cuda:0')
tensor(0.8926, device='cuda:0')
tensor(0.9359, device='cuda:0')
tensor(0.7835, device='cuda:0')
tensor(0.7616, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.7621, device='cuda:0')
tensor(0.7678, device='cuda:0')
tensor(0.7932, device='cuda:0')
tensor(0.9388, device='cuda:0')
tensor(0.9995, device='cuda:0')
tensor(0.8804, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.9390, device='cuda:0')
tensor(0.8528, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.6645, device='cuda:0')
tensor(0.8057, device='cuda:0')
tensor(0.7522, device='cuda:0')
tensor(0.8391, device='cuda:0')
tensor(0.7451, device='cuda:0')
tensor(0.9984, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8922, device='cuda:0')
tensor(0.7911, device='cuda:0')
tensor(0.9911, device='cuda:0')
tensor(0.7810, device='cuda:0')


  2%|▏         | 116/4999 [11:03:09<496:17:50, 365.90s/it]

Checkpoint 116 saved !
------- 1st valloss=0.8574

tensor(0.8267, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7455, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7966, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6300, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6438, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6469, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6647, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6084, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7316, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7581, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7528, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6729, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5723, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5860, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7752, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6227, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6868, device='cuda:0', grad_fn=<DivBackward0

  2%|▏         | 117/4999 [11:08:40<481:43:34, 355.23s/it]

Epoch 117 finished ! Training Loss: 0.7528

tensor(0.6195, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8028, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7848, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6031, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7208, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6144, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7328, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7324, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6469, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6811, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6563, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6297, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6994, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6793, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.7955, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6623, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7288, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7758, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6155, device='cuda:0', grad_fn=<DivBackward0>)


  3%|▎         | 153/4999 [14:44:31<478:31:03, 355.48s/it]

Epoch 153 finished ! Training Loss: 0.7310

tensor(0.7861, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7342, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7441, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7504, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6794, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6069, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7533, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6679, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7167, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6939, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.8084, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7021, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6763, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7265, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7649, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6691, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0

tensor(0.8255, device='cuda:0')
tensor(0.6969, device='cuda:0')
tensor(0.8824, device='cuda:0')
tensor(0.7046, device='cuda:0')
tensor(0.8021, device='cuda:0')
tensor(0.5992, device='cuda:0')
tensor(0.7414, device='cuda:0')
tensor(0.8907, device='cuda:0')
tensor(0.7858, device='cuda:0')
tensor(0.8439, device='cuda:0')
tensor(0.6797, device='cuda:0')
tensor(0.7076, device='cuda:0')
tensor(0.8167, device='cuda:0')
tensor(0.7624, device='cuda:0')
tensor(0.8288, device='cuda:0')
tensor(0.8016, device='cuda:0')
tensor(0.7898, device='cuda:0')
tensor(0.6613, device='cuda:0')
tensor(1., device='cuda:0')
tensor(0.8826, device='cuda:0')
tensor(0.6756, device='cuda:0')
tensor(0.7595, device='cuda:0')
tensor(0.8134, device='cuda:0')
tensor(0.7795, device='cuda:0')
tensor(0.7710, device='cuda:0')
tensor(0.8828, device='cuda:0')
tensor(0.6374, device='cuda:0')
tensor(0.8278, device='cuda:0')
tensor(0.9846, device='cuda:0')
tensor(0.8523, device='cuda:0')
tensor(0.8597, device='cuda:0')


  3%|▎         | 154/4999 [14:51:03<493:15:50, 366.51s/it]

Checkpoint 154 saved !
------- 1st valloss=0.8033

tensor(0.6172, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6953, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6119, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6712, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7745, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7028, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7448, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6602, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6667, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7119, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7750, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.7498, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6555, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.5828, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.6030, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<DivBackward0>)
tenso

In [None]:
print(image_1_resize.shape)

In [None]:
show_image_slice(image_1)
show_image_slice(label_1)
show_image_slice(bbox_bv_label)
show_image_slice(out_coarse)
show_image_slice(bbox_bv)
show_image_slice(bbox_image)

In [None]:
b = loadbvcenter(image_1.squeeze(0).cpu().detach().numpy())
x1 = int(b[0])
y1 = int(b[1])
z1 = int(b[2])
show_image_slice(bv_label[x1-64:x1+64, y1-64:y1+64, z1-64:z1+64])
c = find_bv_centroid(bv_label)
x2 = int(c[0])
y2 = int(c[1])
z2 = int(c[2])
show_image_slice(bv_label[x2-64:x2+64, y2-64:y2+64, z2-64:z2+64])

In [None]:
print(bv_coarse.shape)
print("bbox_bv_label", bbox_bv_label.shape)
print("bbox_bv", bbox_bv.shape)
print("bbox_image", bbox_image.shape)

In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
        # move data to device, convert dtype to desirable dtype
        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()
        
        if bv.item() >= 0.2 or bd.item() >= 0.1:
            show_image_slice(image_1)
            show_image_slice(label_1)
            show_image_slice(output)

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)