In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from dense_vnet.DenseVNet import DenseVNet
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2 

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device("cuda:0")
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = get_full_resolution_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)]))
# do data augumentation on train dataset

validation_dataset = get_full_resolution_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
from vnet import VNet
from bv_refinement_network.RefinementModel import RefinementModel_ELU
from refinenet import refine_net

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    
checkpoint_refine = torch.load('../refine_bv_resize_save/2019-08-22 00:10:23.854113 epoch: 41.pth')
    
refine_model = refine_net(num_classes=1)
#refine_model = nn.DataParallel(refine_model)
#refine_model = convert_model(refine_model)

refine_model.load_state_dict(checkpoint_refine['state_dict_1'])

refine_model = refine_model.to(device, dtype)

optimizer = optim.Adam(refine_model.parameters(), lr=1e-3)
optimizer.load_state_dict(checkpoint_refine['optimizer'])

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)
scheduler.load_state_dict(checkpoint_refine['scheduler'])

deeplab = DeepLab(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

checkpoint = torch.load('../deeplab_dilated_save/2019-08-10 09:28:43.844872 epoch: 1160.pth') # best one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

epoch = checkpoint_refine['epoch']
#epoch = 0
print(epoch)

41


In [None]:
def get_bboxes(image, label, output, batchsize, box_size):
    image_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    label_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    output_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    half_size = int(box_size/2)
    image_size_x = int(image.shape[-3])
    image_size_y = int(image.shape[-2])
    image_size_z = int(image.shape[-1])
    for b in range(batchsize):
        out = output[b]
        x,y,z = loadbvcenter(binarize_output(out))
        x, y, z = np.clip([x, y, z], a_min=half_size, a_max=181)
        x1 = max(x-half_size, 0)
        x2 = min(x+half_size, image_size_x)
        y1 = max(y-half_size, 0)
        y2 = min(y+half_size, image_size_y)
        z1 = max(z-half_size, 0)
        z2 = min(z+half_size, image_size_z)
        image_final[b] = image[b, :, x1:x2, y1:y2, z1:z2]
        label_final[b] = label[b, :, x1:x2, y1:y2, z1:z2]
        output_final[b] = output[b, :, x1:x2, y1:y2, z1:z2]
    return image_final, label_final, output_final

In [None]:
epochs = 5000

record = open('train_bv_refine_resize2.txt','a+')

logger = {'train':[], 'validation_1': []}

min_val = 1

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches
    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
        train_losses=[]
        for minibatch in range(BATCH_SIZE):
            refine_model.train()
            deeplab.eval()
            # Set the model flag to train
            # 1. enable dropout
            # 2. batchnorm behave differently in train and test
            #print(batch['image1_data'])
            image_1 = batch['image1_data'][minibatch].to(device=device, dtype=dtype)
            image_1 = image_1.view(1,1,256,256,256)

            label_1 = batch['image1_label'][minibatch].to(device=device, dtype=dtype)
            label_1 = label_1.view(1,3,256,256,256)

            bv_label = label_1[:, 2, :, :, :]
            bv_label = bv_label.view(1,1,256,256,256)

            original_res = [a[minibatch].item() for a in batch['original_resolution']]

            image_1_resize = F.interpolate(image_1, size=original_res, mode='trilinear', align_corners=True)
            image_1_resize = image_1_resize.view(1,1,original_res[0], original_res[1], original_res[2])

            bv_label_resize = F.interpolate(bv_label, size=original_res, mode='trilinear', align_corners=True)

            # Get coarse output from deeplab model from 256 resolution input
            out_coarse = deeplab(image_1)
            out_coarse = out_coarse.view(1,3,256,256,256)

            bv_coarse = out_coarse[:, 2, :, :, :]
            bv_coarse = bv_coarse.view(1,1,256,256,256)

            bv_coarse_resize = F.interpolate(bv_coarse, size=original_res, mode='trilinear', align_corners=True)
            
            box_size = 192
            half_size = int(box_size / 2)
            
            image_size_x = int(image_1_resize.shape[-3])
            image_size_y = int(image_1_resize.shape[-2])
            image_size_z = int(image_1_resize.shape[-1])
            
            x,y,z = loadbvcenter(binarize_output(bv_coarse_resize).view([1] + original_res))
            x, y, z = np.clip([x, y, z], a_min=box_size-half_size, a_max=box_size+half_size)
            x1 = max(x-half_size, 0)
            x2 = min(x+half_size, image_size_x)
            y1 = max(y-half_size, 0)
            y2 = min(y+half_size, image_size_y)
            z1 = max(z-half_size, 0)
            z2 = min(z+half_size, image_size_z)
            
            
            bbox_bv = bv_coarse_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
            bbox_bv = reshape_image(bbox_bv.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_bv = bbox_bv.view(1,1,box_size,box_size,box_size)
            
            bbox_bv_label = bv_label_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
            bbox_bv_label = reshape_image(bbox_bv_label.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_bv_label = bbox_bv_label.view(1,1,box_size,box_size,box_size)

            #bbox_image = get_bounding_box_image(image_1, (256,256,256)).to(device, dtype)
            bbox_image = image_1_resize[:, :, x1:x2, y1:y2, z1:z2]
            bbox_image = reshape_image(bbox_image.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_image = bbox_image.view(1, 1, box_size, box_size, box_size)
            
            #bbox_iamge, bbox_bv_label, bbox_bv = get_bboxes(image_1_resize, bv_label_resize, bv_coarse_resize, 1, 200)
            
            bbox_concat = torch.cat([bbox_bv, bbox_image], dim=1)
            bbox_concat_2 = F.interpolate(bbox_concat, scale_factor=1/2, mode='trilinear', align_corners=True)
            bbox_concat_4 = F.interpolate(bbox_concat, scale_factor=1/4, mode='trilinear', align_corners=True)

            refine_out = refine_model(bbox_concat, bbox_concat_2, bbox_concat_4)
            #refine_out = refine_model(seg_image_concat)
            # do the inference

            #print(refine_out.shape)
            #print(bbox_bv_label.shape)

            loss = dice_loss(refine_out, bbox_bv_label)
            print(loss)
            train_losses.append(loss)
        
        loss = sum(train_losses) / BATCH_SIZE
        train_losses=[]
        epoch_loss += loss.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        #scheduler.step(loss_1)
        # take a gradient descent step
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()

    if e%1 == 0:
    # do validation every 5 epoches
        deeplab.eval()
        refine_model.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient
        
            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0
            
            for v, vbatch in enumerate(validation_loader):
            # iter over validation mini batches
                val_losses = []
                for minibatch in range(BATCH_SIZE):
                    image_1 = vbatch['image1_data'][minibatch].to(device=device, dtype=dtype)
                    image_1 = image_1.view(1,1,256,256,256)

                    label_1 = vbatch['image1_label'][minibatch].to(device=device, dtype=dtype)
                    label_1 = label_1.view(1,3,256,256,256)

                    bv_label = label_1[:, 2, :, :, :]
                    bv_label = bv_label.view(1,1,256,256,256)

                    original_res = [a[minibatch].item() for a in vbatch['original_resolution']]

                    image_1_resize = F.interpolate(image_1, size=original_res, mode='trilinear', align_corners=True)
                    image_1_resize = image_1_resize.view(1,1,original_res[0], original_res[1], original_res[2])

                    bv_label_resize = F.interpolate(bv_label, size=original_res, mode='trilinear', align_corners=True)

                    # Get coarse output from deeplab model from 256 resolution input
                    out_coarse = deeplab(image_1)
                    out_coarse = out_coarse.view(1,3,256,256,256)

                    bv_coarse = out_coarse[:, 2, :, :, :]
                    bv_coarse = bv_coarse.view(1,1,256,256,256)

                    bv_coarse_resize = F.interpolate(bv_coarse, size=original_res, mode='trilinear', align_corners=True)

                    box_size = 192
                    half_size = int(box_size / 2)

                    image_size_x = int(image_1_resize.shape[-3])
                    image_size_y = int(image_1_resize.shape[-2])
                    image_size_z = int(image_1_resize.shape[-1])

                    x,y,z = loadbvcenter(binarize_output(bv_coarse_resize).view([1] + original_res))
                    x, y, z = np.clip([x, y, z], a_min=box_size-half_size, a_max=box_size+half_size)
                    x1 = max(x-half_size, 0)
                    x2 = min(x+half_size, image_size_x)
                    y1 = max(y-half_size, 0)
                    y2 = min(y+half_size, image_size_y)
                    z1 = max(z-half_size, 0)
                    z2 = min(z+half_size, image_size_z)


                    bbox_bv = bv_coarse_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
                    bbox_bv = reshape_image(bbox_bv.squeeze(), box_size, box_size, box_size).to(device, dtype)
                    bbox_bv = bbox_bv.view(1,1,box_size,box_size,box_size)

                    bbox_bv_label = bv_label_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
                    bbox_bv_label = reshape_image(bbox_bv_label.squeeze(), box_size, box_size, box_size).to(device, dtype)
                    bbox_bv_label = bbox_bv_label.view(1,1,box_size,box_size,box_size)

                    #bbox_image = get_bounding_box_image(image_1, (256,256,256)).to(device, dtype)
                    bbox_image = image_1_resize[:, :, x1:x2, y1:y2, z1:z2]
                    bbox_image = reshape_image(bbox_image.squeeze(), box_size, box_size, box_size).to(device, dtype)
                    bbox_image = bbox_image.view(1, 1, box_size, box_size, box_size)

                    #bbox_iamge, bbox_bv_label, bbox_bv = get_bboxes(image_1_resize, bv_label_resize, bv_coarse_resize, 1, 200)

                    bbox_concat = torch.cat([bbox_bv, bbox_image], dim=1)
                    bbox_concat_2 = F.interpolate(bbox_concat, scale_factor=1/2, mode='trilinear', align_corners=True)
                    bbox_concat_4 = F.interpolate(bbox_concat, scale_factor=1/4, mode='trilinear', align_corners=True)

                    refine_out = refine_model(bbox_concat, bbox_concat_2, bbox_concat_4)
                    
                    loss = dice_loss(refine_out, bbox_bv_label)
                    val_losses.append(loss)
                
                avg_loss = sum(val_losses) / BATCH_SIZE
                val_losses = []
                print(avg_loss)
            
                # calculate loss
                valloss_1 += avg_loss.item()
                
            
            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'
            
            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)
            
            if avg_val_loss < min_val:
                min_val = avg_val_loss
                save_1('refine_bv_resize_save', refine_model, optimizer, logger, e, scheduler)
            elif e % 10 == 0:
                save_1('refine_bv_resize_save', refine_model, optimizer, logger, e, scheduler)
            
            print(outstr)
            record.write(outstr)
            record.flush()
    


record.close()

  0%|          | 0/4958 [00:00<?, ?it/s]

tensor(0.1540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0635, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0773, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0701, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0873, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1709, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0788, device='cuda:0', grad_fn

tensor(0.0909, device='cuda:0')
tensor(0.0821, device='cuda:0')
tensor(0.0810, device='cuda:0')
tensor(0.1094, device='cuda:0')
tensor(0.0815, device='cuda:0')
tensor(0.0432, device='cuda:0')
tensor(0.0722, device='cuda:0')
tensor(0.0538, device='cuda:0')
tensor(0.0867, device='cuda:0')
tensor(0.0976, device='cuda:0')


  0%|          | 1/4958 [09:39<798:24:30, 579.84s/it]

tensor(0.0821, device='cuda:0')
Checkpoint 42 saved !
------- 1st valloss=0.0812

tensor(0.0738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0753, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1302, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2058,

tensor(0.1013, device='cuda:0')
tensor(0.0828, device='cuda:0')
tensor(0.1122, device='cuda:0')
tensor(0.2171, device='cuda:0')
tensor(0.0491, device='cuda:0')
tensor(0.0997, device='cuda:0')
tensor(0.0692, device='cuda:0')
tensor(0.0687, device='cuda:0')
tensor(0.0830, device='cuda:0')
tensor(0.0715, device='cuda:0')
tensor(0.0758, device='cuda:0')
tensor(0.0547, device='cuda:0')
tensor(0.1154, device='cuda:0')
tensor(0.0832, device='cuda:0')


  0%|          | 2/4958 [18:40<781:57:14, 568.01s/it]

------- 1st valloss=0.0921

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0639, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0697, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0929, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1212, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1126, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0744, devi

tensor(0.0522, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.0624, device='cuda:0')
tensor(0.0528, device='cuda:0')
tensor(0.0686, device='cuda:0')
tensor(0.1938, device='cuda:0')
tensor(0.0984, device='cuda:0')
tensor(0.0697, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.0652, device='cuda:0')
tensor(0.0593, device='cuda:0')


  0%|          | 3/4958 [27:39<769:57:23, 559.40s/it]

Checkpoint 44 saved !
------- 1st valloss=0.0774

tensor(0.1341, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0418, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1098, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1006, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0508, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0889, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1026, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0358, device='cuda:0', grad_fn=<RsubBackw

tensor(0.0668, device='cuda:0')
tensor(0.0909, device='cuda:0')
tensor(0.0941, device='cuda:0')
tensor(0.0744, device='cuda:0')
tensor(0.0481, device='cuda:0')
tensor(0.1002, device='cuda:0')
tensor(0.0573, device='cuda:0')
tensor(0.1094, device='cuda:0')
tensor(0.0737, device='cuda:0')
tensor(0.0647, device='cuda:0')
tensor(0.0686, device='cuda:0')
tensor(0.0467, device='cuda:0')


  0%|          | 4/4958 [36:35<760:18:47, 552.51s/it]

------- 1st valloss=0.0857

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0541, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0846, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1003, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0973, devi

tensor(0.1092, device='cuda:0')
tensor(0.0823, device='cuda:0')
tensor(0.1795, device='cuda:0')
tensor(0.0588, device='cuda:0')
tensor(0.0746, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.0595, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.0830, device='cuda:0')
tensor(0.0705, device='cuda:0')
tensor(0.0726, device='cuda:0')
tensor(0.0587, device='cuda:0')


  0%|          | 5/4958 [45:36<755:21:32, 549.02s/it]

------- 1st valloss=0.0801

tensor(0.0493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1272, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0964, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0782, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0378, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9993, device='cuda

tensor(0.0634, device='cuda:0')
tensor(0.0974, device='cuda:0')
tensor(0.0920, device='cuda:0')
tensor(0.0899, device='cuda:0')
tensor(0.1741, device='cuda:0')
tensor(0.1016, device='cuda:0')
tensor(0.0596, device='cuda:0')
tensor(0.0468, device='cuda:0')
tensor(0.0579, device='cuda:0')
tensor(0.0944, device='cuda:0')
tensor(0.1354, device='cuda:0')
tensor(0.0863, device='cuda:0')


  0%|          | 6/4958 [54:37<751:32:47, 546.36s/it]

------- 1st valloss=0.0830

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0757, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0454, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1006, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1105, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0711, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0855, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.10

tensor(0.1012, device='cuda:0')
tensor(0.0811, device='cuda:0')
tensor(0.1028, device='cuda:0')
tensor(0.0442, device='cuda:0')
tensor(0.0606, device='cuda:0')
tensor(0.0954, device='cuda:0')
tensor(0.0725, device='cuda:0')
tensor(0.0882, device='cuda:0')
tensor(0.0769, device='cuda:0')
tensor(0.0986, device='cuda:0')
tensor(0.0633, device='cuda:0')
tensor(0.0983, device='cuda:0')


  0%|          | 7/4958 [1:03:44<752:03:01, 546.84s/it]

------- 1st valloss=0.0870

tensor(0.0819, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0684, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0759, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1058, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1149, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1171, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0724, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0747, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1160, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0818, device='cuda:0')
tensor(0.1073, device='cuda:0')
tensor(0.0622, device='cuda:0')
tensor(0.0625, device='cuda:0')
tensor(0.1603, device='cuda:0')
tensor(0.0698, device='cuda:0')
tensor(0.0823, device='cuda:0')
tensor(0.0936, device='cuda:0')
tensor(0.0667, device='cuda:0')
tensor(0.0669, device='cuda:0')
tensor(0.0415, device='cuda:0')
tensor(0.0649, device='cuda:0')


  0%|          | 8/4958 [1:12:44<748:43:36, 544.53s/it]

Checkpoint 49 saved !
------- 1st valloss=0.0774

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1164, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0773, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0918, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0873, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0570, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1148, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0378, device='cuda:0', grad_fn=<RsubB

tensor(0.0833, device='cuda:0')
tensor(0.0520, device='cuda:0')
tensor(0.0828, device='cuda:0')
tensor(0.0940, device='cuda:0')
tensor(0.0576, device='cuda:0')
tensor(0.0671, device='cuda:0')
tensor(0.0649, device='cuda:0')
tensor(0.0526, device='cuda:0')
tensor(0.0702, device='cuda:0')
tensor(0.0814, device='cuda:0')
tensor(0.0842, device='cuda:0')
tensor(0.0774, device='cuda:0')
tensor(0.0882, device='cuda:0')


  0%|          | 9/4958 [1:21:49<748:59:18, 544.83s/it]

Checkpoint 50 saved !
------- 1st valloss=0.0814

tensor(0.0449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0376, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0713, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0641, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1086, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackw

tensor(0.3047, device='cuda:0')
tensor(0.4829, device='cuda:0')
tensor(0.0874, device='cuda:0')
tensor(0.0927, device='cuda:0')
tensor(0.1031, device='cuda:0')
tensor(0.4663, device='cuda:0')
tensor(0.0968, device='cuda:0')
tensor(0.2577, device='cuda:0')
tensor(0.1163, device='cuda:0')
tensor(0.1173, device='cuda:0')
tensor(0.2404, device='cuda:0')
tensor(0.4824, device='cuda:0')
tensor(0.5131, device='cuda:0')


  0%|          | 10/4958 [1:30:53<748:28:38, 544.57s/it]

------- 1st valloss=0.2303

tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1365, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1161, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0777, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0979, device='cuda:0')
tensor(0.0514, device='cuda:0')
tensor(0.0524, device='cuda:0')
tensor(0.0876, device='cuda:0')
tensor(0.0858, device='cuda:0')
tensor(0.0649, device='cuda:0')
tensor(0.0702, device='cuda:0')
tensor(0.0853, device='cuda:0')
tensor(0.0755, device='cuda:0')
tensor(0.0761, device='cuda:0')
tensor(0.0756, device='cuda:0')
tensor(0.1142, device='cuda:0')


  0%|          | 11/4958 [1:40:04<750:49:13, 546.38s/it]

------- 1st valloss=0.0835

tensor(0.0615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1086, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9858, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0758, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0856, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., 

tensor(0.1064, device='cuda:0')
tensor(0.0660, device='cuda:0')
tensor(0.0746, device='cuda:0')
tensor(0.1042, device='cuda:0')
tensor(0.0510, device='cuda:0')
tensor(0.1952, device='cuda:0')
tensor(0.0577, device='cuda:0')
tensor(0.0742, device='cuda:0')
tensor(0.0791, device='cuda:0')
tensor(0.0633, device='cuda:0')
tensor(0.0633, device='cuda:0')
tensor(0.0577, device='cuda:0')


  0%|          | 12/4958 [1:49:17<753:35:27, 548.51s/it]

Checkpoint 53 saved !
------- 1st valloss=0.0772

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0641, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0460, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0836, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0622, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0938, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0635, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0571, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1

tensor(0.0493, device='cuda:0')
tensor(0.0847, device='cuda:0')
tensor(0.0904, device='cuda:0')
tensor(0.0915, device='cuda:0')
tensor(0.0685, device='cuda:0')
tensor(0.0959, device='cuda:0')
tensor(0.0685, device='cuda:0')
tensor(0.1704, device='cuda:0')
tensor(0.0691, device='cuda:0')
tensor(0.0564, device='cuda:0')
tensor(0.0876, device='cuda:0')
tensor(0.0620, device='cuda:0')


  0%|          | 13/4958 [1:58:12<747:43:27, 544.35s/it]

------- 1st valloss=0.0791

tensor(0.1059, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0701, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0745, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1549, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0460, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0574, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.23

tensor(0.0856, device='cuda:0')
tensor(0.0680, device='cuda:0')
tensor(0.0558, device='cuda:0')
tensor(0.1838, device='cuda:0')
tensor(0.0635, device='cuda:0')
tensor(0.0507, device='cuda:0')
tensor(0.0943, device='cuda:0')
tensor(0.0698, device='cuda:0')
tensor(0.0743, device='cuda:0')
tensor(0.0762, device='cuda:0')
tensor(0.0843, device='cuda:0')
tensor(0.0906, device='cuda:0')
------- 1st valloss=0.0801



  0%|          | 14/4958 [2:07:19<748:55:04, 545.33s/it]

tensor(0.0826, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0403, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0594, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0441, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0704, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0646, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0406, device='cuda:0', grad_fn

tensor(0.0499, device='cuda:0')
tensor(0.0887, device='cuda:0')
tensor(0.1179, device='cuda:0')
tensor(0.0770, device='cuda:0')
tensor(0.0552, device='cuda:0')
tensor(0.0971, device='cuda:0')
tensor(0.0551, device='cuda:0')
tensor(0.0624, device='cuda:0')
tensor(0.0870, device='cuda:0')
tensor(0.0781, device='cuda:0')
tensor(0.1146, device='cuda:0')


  0%|          | 15/4958 [2:16:26<749:21:55, 545.76s/it]

------- 1st valloss=0.0798

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1250, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0620, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0548, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1928, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0592, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1197, devi

tensor(0.0689, device='cuda:0')
tensor(0.0817, device='cuda:0')
tensor(0.0974, device='cuda:0')
tensor(0.0958, device='cuda:0')
tensor(0.0758, device='cuda:0')
tensor(0.0702, device='cuda:0')
tensor(0.0620, device='cuda:0')
tensor(0.0998, device='cuda:0')
tensor(0.0697, device='cuda:0')
tensor(0.0402, device='cuda:0')
tensor(0.0867, device='cuda:0')


  0%|          | 16/4958 [2:25:34<750:07:06, 546.42s/it]

tensor(0.0626, device='cuda:0')
------- 1st valloss=0.0789

tensor(0.0915, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0984, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0787, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9976, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0558, device='cuda:0', grad_fn=<Rsu

tensor(0.0663, device='cuda:0')
tensor(0.0696, device='cuda:0')
tensor(0.0878, device='cuda:0')
tensor(0.0886, device='cuda:0')
tensor(0.0413, device='cuda:0')
tensor(0.1042, device='cuda:0')
tensor(0.0475, device='cuda:0')
tensor(0.0552, device='cuda:0')
tensor(0.0766, device='cuda:0')
tensor(0.0525, device='cuda:0')
tensor(0.0719, device='cuda:0')
tensor(0.1161, device='cuda:0')
tensor(0.1057, device='cuda:0')


  0%|          | 17/4958 [2:34:31<745:57:37, 543.50s/it]

Checkpoint 58 saved !
------- 1st valloss=0.0770

tensor(0.0421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0709, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0401, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0980, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<R

tensor(0.0584, device='cuda:0')
tensor(0.0984, device='cuda:0')
tensor(0.0724, device='cuda:0')
tensor(0.1388, device='cuda:0')
tensor(0.0749, device='cuda:0')
tensor(0.1941, device='cuda:0')
tensor(0.0474, device='cuda:0')
tensor(0.0677, device='cuda:0')
tensor(0.1208, device='cuda:0')
tensor(0.0656, device='cuda:0')
tensor(0.0821, device='cuda:0')
tensor(0.0648, device='cuda:0')
tensor(0.1083, device='cuda:0')


  0%|          | 18/4958 [2:43:37<747:01:07, 544.39s/it]

------- 1st valloss=0.0877

tensor(0.0813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0586, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0801, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0643, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0832, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0506, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1579, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0393, devi

tensor(0.0970, device='cuda:0')
tensor(0.0814, device='cuda:0')
tensor(0.0708, device='cuda:0')
tensor(0.0854, device='cuda:0')
tensor(0.0988, device='cuda:0')
tensor(0.1079, device='cuda:0')
tensor(0.0601, device='cuda:0')
tensor(0.1247, device='cuda:0')
tensor(0.0715, device='cuda:0')
tensor(0.0684, device='cuda:0')
tensor(0.0860, device='cuda:0')
tensor(0.0849, device='cuda:0')


  0%|          | 19/4958 [2:52:42<747:07:32, 544.57s/it]

Checkpoint 60 saved !
------- 1st valloss=0.0898

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0843, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1405, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1313, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0622, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0878, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0696, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1081, device='cuda:0', grad_fn=<RsubB

tensor(0.0940, device='cuda:0')
tensor(0.1306, device='cuda:0')
tensor(0.1865, device='cuda:0')
tensor(0.0773, device='cuda:0')
tensor(0.0739, device='cuda:0')
tensor(0.0687, device='cuda:0')
tensor(0.0761, device='cuda:0')
tensor(0.0627, device='cuda:0')
tensor(0.0620, device='cuda:0')
tensor(0.0844, device='cuda:0')
tensor(0.0715, device='cuda:0')
tensor(0.0565, device='cuda:0')
tensor(0.0551, device='cuda:0')


  0%|          | 20/4958 [3:01:47<747:03:35, 544.64s/it]

------- 1st valloss=0.0851

tensor(0.0958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0971, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0552, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0713, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0755, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0835, 

tensor(0.0810, device='cuda:0')
tensor(0.0720, device='cuda:0')
tensor(0.0620, device='cuda:0')
tensor(0.0954, device='cuda:0')
tensor(0.0771, device='cuda:0')
tensor(0.0753, device='cuda:0')
tensor(0.0546, device='cuda:0')
tensor(0.0784, device='cuda:0')
tensor(0.0767, device='cuda:0')
tensor(0.1046, device='cuda:0')
tensor(0.0487, device='cuda:0')
tensor(0.0650, device='cuda:0')


  0%|          | 21/4958 [3:10:45<743:58:12, 542.49s/it]

------- 1st valloss=0.0817

tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0821, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1179, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0895, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0776, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1213, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1144, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.08

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.1243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1019, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0860, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0711, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0431, device='cuda:0', grad_fn=<RsubBa

  1%|          | 58/4958 [8:45:43<738:56:16, 542.89s/it]

------- 1st valloss=0.0912

tensor(0.0861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0672, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0865, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0297, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2390, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0591, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0529, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0707, device='cuda:0')
tensor(0.0732, device='cuda:0')
tensor(0.0644, device='cuda:0')
tensor(0.0878, device='cuda:0')
tensor(0.0688, device='cuda:0')
tensor(0.1084, device='cuda:0')
tensor(0.0817, device='cuda:0')
tensor(0.0687, device='cuda:0')
tensor(0.1902, device='cuda:0')
tensor(0.0556, device='cuda:0')
tensor(0.0742, device='cuda:0')
tensor(0.0636, device='cuda:0')


  1%|          | 59/4958 [8:54:48<739:44:45, 543.60s/it]

Checkpoint 100 saved !
------- 1st valloss=0.0770

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9592, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0761, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0

tensor(0.0641, device='cuda:0')
tensor(0.0623, device='cuda:0')
tensor(0.0879, device='cuda:0')
tensor(0.1628, device='cuda:0')
tensor(0.0990, device='cuda:0')
tensor(0.0585, device='cuda:0')
tensor(0.0887, device='cuda:0')
tensor(0.0551, device='cuda:0')
tensor(0.0586, device='cuda:0')
tensor(0.0603, device='cuda:0')
tensor(0.0742, device='cuda:0')
tensor(0.0443, device='cuda:0')


  1%|          | 60/4958 [9:04:01<743:32:03, 546.49s/it]

tensor(0.0686, device='cuda:0')
------- 1st valloss=0.0737

tensor(0.0798, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5904, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1386, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0646, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0863, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0582, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0582, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0954, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0328, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0624, device='cuda:0', grad

tensor(0.0905, device='cuda:0')
tensor(0.0449, device='cuda:0')
tensor(0.0965, device='cuda:0')
tensor(0.0802, device='cuda:0')
tensor(0.0620, device='cuda:0')
tensor(0.0836, device='cuda:0')
tensor(0.0782, device='cuda:0')
tensor(0.0766, device='cuda:0')
tensor(0.0419, device='cuda:0')
tensor(0.0444, device='cuda:0')
tensor(0.0663, device='cuda:0')
tensor(0.0486, device='cuda:0')
tensor(0.0815, device='cuda:0')


  1%|          | 61/4958 [9:13:01<740:33:54, 544.42s/it]

------- 1st valloss=0.0707

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0337, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0601, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1131, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., devi

tensor(0.0425, device='cuda:0')
tensor(0.0627, device='cuda:0')
tensor(0.0890, device='cuda:0')
tensor(0.0472, device='cuda:0')
tensor(0.0834, device='cuda:0')
tensor(0.0541, device='cuda:0')
tensor(0.0753, device='cuda:0')
tensor(0.0667, device='cuda:0')
tensor(0.0660, device='cuda:0')
tensor(0.0546, device='cuda:0')
tensor(0.0754, device='cuda:0')
tensor(0.0705, device='cuda:0')


  1%|▏         | 62/4958 [9:22:00<738:25:23, 542.96s/it]

------- 1st valloss=0.0691

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0407, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0847, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0264, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0637, 

tensor(0.0644, device='cuda:0')
tensor(0.0492, device='cuda:0')
tensor(0.0535, device='cuda:0')
tensor(0.0938, device='cuda:0')
tensor(0.0713, device='cuda:0')
tensor(0.0431, device='cuda:0')
tensor(0.1726, device='cuda:0')
tensor(0.0786, device='cuda:0')
tensor(0.0787, device='cuda:0')
tensor(0.0530, device='cuda:0')
tensor(0.0466, device='cuda:0')
tensor(0.0681, device='cuda:0')


  1%|▏         | 63/4958 [9:31:09<740:19:56, 544.47s/it]

Checkpoint 104 saved !
------- 1st valloss=0.0681

tensor(0.0768, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0608, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1168, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1186, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0982, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0564, device='cuda:0', grad_fn=<RsubBack

tensor(0.0830, device='cuda:0')
tensor(0.0970, device='cuda:0')
tensor(0.1765, device='cuda:0')
tensor(0.0721, device='cuda:0')
tensor(0.0949, device='cuda:0')
tensor(0.1013, device='cuda:0')
tensor(0.0708, device='cuda:0')
tensor(0.0523, device='cuda:0')
tensor(0.0435, device='cuda:0')
tensor(0.0987, device='cuda:0')
tensor(0.0713, device='cuda:0')
tensor(0.0844, device='cuda:0')


  1%|▏         | 64/4958 [9:40:05<737:02:05, 542.16s/it]

tensor(0.0475, device='cuda:0')
------- 1st valloss=0.0766

tensor(0.0517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1101, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0434, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0853, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0434, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0827, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0728, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0339, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad

tensor(0.0556, device='cuda:0')
tensor(0.0518, device='cuda:0')
tensor(0.0714, device='cuda:0')
tensor(0.0529, device='cuda:0')
tensor(0.0512, device='cuda:0')
tensor(0.0939, device='cuda:0')
tensor(0.0889, device='cuda:0')
tensor(0.0604, device='cuda:0')
tensor(0.0765, device='cuda:0')
tensor(0.0448, device='cuda:0')
tensor(0.0694, device='cuda:0')
tensor(0.0370, device='cuda:0')
tensor(0.0635, device='cuda:0')
------- 1st valloss=0.0704



  1%|▏         | 65/4958 [9:48:59<733:28:26, 539.65s/it]

tensor(0.0808, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0598, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0628, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1205, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0354, device='cuda:0', grad_fn=<RsubBackward1>

tensor(0.0655, device='cuda:0')
tensor(0.0544, device='cuda:0')
tensor(0.0916, device='cuda:0')
tensor(0.1119, device='cuda:0')
tensor(0.0853, device='cuda:0')
tensor(0.0481, device='cuda:0')
tensor(0.0590, device='cuda:0')
tensor(0.0386, device='cuda:0')
tensor(0.0492, device='cuda:0')
tensor(0.1901, device='cuda:0')
tensor(0.1066, device='cuda:0')


  1%|▏         | 66/4958 [9:58:07<736:47:07, 542.20s/it]

------- 1st valloss=0.0740

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0672, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0261, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0701, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0632, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1120, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., devi

tensor(0.0594, device='cuda:0')
tensor(0.2023, device='cuda:0')
tensor(0.0416, device='cuda:0')
tensor(0.0765, device='cuda:0')
tensor(0.0657, device='cuda:0')
tensor(0.0681, device='cuda:0')
tensor(0.0792, device='cuda:0')
tensor(0.0376, device='cuda:0')
tensor(0.0471, device='cuda:0')
tensor(0.0707, device='cuda:0')
tensor(0.0855, device='cuda:0')
tensor(0.0777, device='cuda:0')
------- 1st valloss=0.0746



  1%|▏         | 67/4958 [10:07:09<736:16:24, 541.93s/it]

tensor(0.0831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1931, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0777, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0500, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1101, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1197, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<Rs

tensor(0.0869, device='cuda:0')
tensor(0.0744, device='cuda:0')
tensor(0.0381, device='cuda:0')
tensor(0.1046, device='cuda:0')
tensor(0.0581, device='cuda:0')
tensor(0.0827, device='cuda:0')
tensor(0.0570, device='cuda:0')
tensor(0.0532, device='cuda:0')
tensor(0.0361, device='cuda:0')
tensor(0.0694, device='cuda:0')
tensor(0.0892, device='cuda:0')


  1%|▏         | 68/4958 [10:16:17<738:58:10, 544.03s/it]

------- 1st valloss=0.0732

tensor(0.0502, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1029, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0676, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0912, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0537, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0533, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2259, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0339, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0937, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0577, device='cuda:0')
tensor(0.0845, device='cuda:0')
tensor(0.0911, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.0837, device='cuda:0')
tensor(0.0640, device='cuda:0')
tensor(0.0418, device='cuda:0')
tensor(0.0505, device='cuda:0')
tensor(0.0660, device='cuda:0')
tensor(0.0694, device='cuda:0')
tensor(0.0985, device='cuda:0')
tensor(0.0483, device='cuda:0')


  1%|▏         | 69/4958 [10:25:19<737:39:19, 543.17s/it]

Checkpoint 110 saved !
------- 1st valloss=0.0715

tensor(0.0487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0588, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0498, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0492, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0891, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1148, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2619, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward

tensor(0.0722, device='cuda:0')
tensor(0.0747, device='cuda:0')
tensor(0.0452, device='cuda:0')
tensor(0.0508, device='cuda:0')
tensor(0.0761, device='cuda:0')
tensor(0.0766, device='cuda:0')
tensor(0.0803, device='cuda:0')
tensor(0.0614, device='cuda:0')
tensor(0.0422, device='cuda:0')
tensor(0.0913, device='cuda:0')
tensor(0.0472, device='cuda:0')
tensor(0.0442, device='cuda:0')
tensor(0.0496, device='cuda:0')


  1%|▏         | 70/4958 [10:34:12<733:40:04, 540.34s/it]

Checkpoint 111 saved !
------- 1st valloss=0.0680

tensor(0.0513, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1024, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0335, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0581, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0843, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0702, device='cuda:0', grad_fn=<Rsub

tensor(0.0401, device='cuda:0')
tensor(0.0874, device='cuda:0')
tensor(0.0713, device='cuda:0')
tensor(0.0646, device='cuda:0')
tensor(0.0647, device='cuda:0')
tensor(0.0796, device='cuda:0')
tensor(0.0363, device='cuda:0')
tensor(0.0750, device='cuda:0')
tensor(0.0655, device='cuda:0')
tensor(0.0773, device='cuda:0')
tensor(0.0632, device='cuda:0')
tensor(0.0556, device='cuda:0')
tensor(0.0580, device='cuda:0')


  1%|▏         | 71/4958 [10:43:10<732:26:08, 539.55s/it]

------- 1st valloss=0.0685

tensor(0.0999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0542, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0758, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1066, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0297, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0773, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., devi

tensor(0.0790, device='cuda:0')
tensor(0.0640, device='cuda:0')
tensor(0.0578, device='cuda:0')
tensor(0.0556, device='cuda:0')
tensor(0.0594, device='cuda:0')
tensor(0.0593, device='cuda:0')
tensor(0.0892, device='cuda:0')
tensor(0.0577, device='cuda:0')
tensor(0.0713, device='cuda:0')
tensor(0.0522, device='cuda:0')
tensor(0.0460, device='cuda:0')
tensor(0.0654, device='cuda:0')


  1%|▏         | 72/4958 [10:52:11<733:01:59, 540.10s/it]

Checkpoint 113 saved !
------- 1st valloss=0.0672

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0779, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0947, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0566, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0514, device='cuda:0', grad_fn=<RsubBackward1>)
tens

tensor(0.0890, device='cuda:0')
tensor(0.0563, device='cuda:0')
tensor(0.0588, device='cuda:0')
tensor(0.0713, device='cuda:0')
tensor(0.0499, device='cuda:0')
tensor(0.0584, device='cuda:0')
tensor(0.0761, device='cuda:0')
tensor(0.0799, device='cuda:0')
tensor(0.0606, device='cuda:0')
tensor(0.0897, device='cuda:0')
tensor(0.0477, device='cuda:0')
tensor(0.0536, device='cuda:0')
tensor(0.1884, device='cuda:0')


  1%|▏         | 73/4958 [11:01:31<740:44:03, 545.88s/it]

------- 1st valloss=0.0709

tensor(0.0667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0608, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0315, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0392, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0507, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0624, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0692, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9976, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.06

tensor(0.0485, device='cuda:0')
tensor(0.0651, device='cuda:0')
tensor(0.0819, device='cuda:0')
tensor(0.0675, device='cuda:0')
tensor(0.0707, device='cuda:0')
tensor(0.0430, device='cuda:0')
tensor(0.0718, device='cuda:0')
tensor(0.0689, device='cuda:0')
tensor(0.0338, device='cuda:0')
tensor(0.0621, device='cuda:0')
tensor(0.0895, device='cuda:0')
tensor(0.0999, device='cuda:0')


  1%|▏         | 74/4958 [11:10:43<743:00:11, 547.67s/it]

------- 1st valloss=0.0686

tensor(0.0720, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0281, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0782, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0931, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0772, device='cuda:0')
tensor(0.0818, device='cuda:0')
tensor(0.0597, device='cuda:0')
tensor(0.0644, device='cuda:0')
tensor(0.1955, device='cuda:0')
tensor(0.0939, device='cuda:0')
tensor(0.0753, device='cuda:0')
tensor(0.0604, device='cuda:0')
tensor(0.1030, device='cuda:0')
tensor(0.0816, device='cuda:0')
tensor(0.0503, device='cuda:0')
tensor(0.0813, device='cuda:0')


  2%|▏         | 75/4958 [11:19:48<741:45:11, 546.86s/it]

------- 1st valloss=0.0789

tensor(0.9979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0781, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0327, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9990, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0648, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0803, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0502, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9509, device='cuda:0', grad_fn=<RsubBackward1>)
ten

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.0484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0482, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0761, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0512, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9917, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0318, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0729, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0326, device='cuda:0',

  2%|▏         | 121/4958 [18:16:29<733:27:50, 545.89s/it]

------- 1st valloss=0.0690

tensor(0.0669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1041, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0675, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0800, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0570, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0685, device='cuda:0')
tensor(0.0431, device='cuda:0')
tensor(0.0720, device='cuda:0')
tensor(0.0575, device='cuda:0')
tensor(0.0702, device='cuda:0')
tensor(0.0483, device='cuda:0')
tensor(0.1750, device='cuda:0')
tensor(0.0777, device='cuda:0')
tensor(0.1058, device='cuda:0')
tensor(0.0869, device='cuda:0')
tensor(0.0511, device='cuda:0')
tensor(0.0625, device='cuda:0')
tensor(0.0584, device='cuda:0')


  2%|▏         | 122/4958 [18:25:26<729:53:05, 543.34s/it]

tensor(0.0604, device='cuda:0')
------- 1st valloss=0.0678

tensor(0.0623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1106, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9632, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0883, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0679, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0351, device='cuda:

tensor(0.0898, device='cuda:0')
tensor(0.0457, device='cuda:0')
tensor(0.1875, device='cuda:0')
tensor(0.0494, device='cuda:0')
tensor(0.0500, device='cuda:0')
tensor(0.0754, device='cuda:0')
tensor(0.0730, device='cuda:0')
tensor(0.0692, device='cuda:0')
tensor(0.0632, device='cuda:0')
tensor(0.0837, device='cuda:0')
tensor(0.0811, device='cuda:0')
tensor(0.0427, device='cuda:0')
tensor(0.0632, device='cuda:0')
tensor(0.0619, device='cuda:0')
tensor(0.0508, device='cuda:0')
tensor(0.0563, device='cuda:0')


  2%|▏         | 123/4958 [18:34:31<730:32:03, 543.93s/it]

------- 1st valloss=0.0682

tensor(0.1160, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0653, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9401, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2296, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.04

tensor(0.0407, device='cuda:0')
tensor(0.0640, device='cuda:0')
tensor(0.1879, device='cuda:0')
tensor(0.0680, device='cuda:0')
tensor(0.0395, device='cuda:0')
tensor(0.1108, device='cuda:0')
tensor(0.0623, device='cuda:0')
tensor(0.0976, device='cuda:0')
tensor(0.0747, device='cuda:0')
tensor(0.0963, device='cuda:0')
tensor(0.1002, device='cuda:0')
tensor(0.0542, device='cuda:0')
tensor(0.0410, device='cuda:0')


  3%|▎         | 124/4958 [18:43:38<731:22:47, 544.68s/it]

------- 1st valloss=0.0759

tensor(0.0688, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0482, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0828, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0700, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0689, device='cuda:0')
tensor(0.0387, device='cuda:0')
tensor(0.0533, device='cuda:0')
tensor(0.0704, device='cuda:0')
tensor(0.1618, device='cuda:0')
tensor(0.0585, device='cuda:0')
tensor(0.0773, device='cuda:0')
tensor(0.0466, device='cuda:0')
tensor(0.0569, device='cuda:0')
tensor(0.0874, device='cuda:0')
tensor(0.0505, device='cuda:0')
tensor(0.0606, device='cuda:0')
tensor(0.0527, device='cuda:0')
tensor(0.0805, device='cuda:0')


  3%|▎         | 125/4958 [18:52:39<729:55:37, 543.71s/it]

------- 1st valloss=0.0669

tensor(0.9824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0381, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0482, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0622, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0588, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0439, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0600, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0594, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0724, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0281, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0447, device='cuda:0')
tensor(0.0417, device='cuda:0')
tensor(0.0695, device='cuda:0')
tensor(0.0581, device='cuda:0')
tensor(0.0739, device='cuda:0')
tensor(0.1156, device='cuda:0')
tensor(0.0477, device='cuda:0')
tensor(0.0720, device='cuda:0')
tensor(0.0349, device='cuda:0')
tensor(0.0480, device='cuda:0')
tensor(0.0583, device='cuda:0')
tensor(0.1044, device='cuda:0')
tensor(0.0783, device='cuda:0')
tensor(0.0551, device='cuda:0')


  3%|▎         | 126/4958 [19:01:44<730:01:13, 543.89s/it]

------- 1st valloss=0.0676

tensor(0.0460, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0255, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0791, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0658, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0845, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0333, device='cuda:0')
tensor(0.0638, device='cuda:0')
tensor(0.0550, device='cuda:0')
tensor(0.0649, device='cuda:0')
tensor(0.0511, device='cuda:0')
tensor(0.0671, device='cuda:0')
tensor(0.0567, device='cuda:0')
tensor(0.0398, device='cuda:0')
tensor(0.0697, device='cuda:0')
tensor(0.1683, device='cuda:0')
tensor(0.0550, device='cuda:0')
tensor(0.0655, device='cuda:0')
tensor(0.0723, device='cuda:0')


  3%|▎         | 127/4958 [19:10:55<732:53:30, 546.14s/it]

Checkpoint 168 saved !
------- 1st valloss=0.0625

tensor(0.1099, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0513, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0670, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0736, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0341, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0369, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1080, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0529, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0342, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9638, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0452, device='cuda:0', grad_

tensor(0.0657, device='cuda:0')
tensor(0.0478, device='cuda:0')
tensor(0.0634, device='cuda:0')
tensor(0.0883, device='cuda:0')
tensor(0.0632, device='cuda:0')
tensor(0.0664, device='cuda:0')
tensor(0.0755, device='cuda:0')
tensor(0.0542, device='cuda:0')
tensor(0.0621, device='cuda:0')
tensor(0.0316, device='cuda:0')
tensor(0.0615, device='cuda:0')
tensor(0.0463, device='cuda:0')
tensor(0.0615, device='cuda:0')


  3%|▎         | 128/4958 [19:20:01<732:48:40, 546.19s/it]

------- 1st valloss=0.0638

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0801, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5180, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0580, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9264, devi

tensor(0.0746, device='cuda:0')
tensor(0.0617, device='cuda:0')
tensor(0.0804, device='cuda:0')
tensor(0.1792, device='cuda:0')
tensor(0.0597, device='cuda:0')
tensor(0.0616, device='cuda:0')
tensor(0.0354, device='cuda:0')
tensor(0.0807, device='cuda:0')
tensor(0.0689, device='cuda:0')
tensor(0.0475, device='cuda:0')
tensor(0.0822, device='cuda:0')
tensor(0.0945, device='cuda:0')
tensor(0.1149, device='cuda:0')


  3%|▎         | 129/4958 [19:29:00<729:37:18, 543.93s/it]

Checkpoint 170 saved !
------- 1st valloss=0.0718

tensor(0.1065, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0959, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0454, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0385, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0653, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0411, device='cuda:0', grad_fn=<RsubBackward1>)


tensor(0.0602, device='cuda:0')
tensor(0.0815, device='cuda:0')
tensor(0.0484, device='cuda:0')
tensor(0.0479, device='cuda:0')
tensor(0.0632, device='cuda:0')
tensor(0.0743, device='cuda:0')
tensor(0.0903, device='cuda:0')
tensor(0.0438, device='cuda:0')
tensor(0.0554, device='cuda:0')
tensor(0.0579, device='cuda:0')
tensor(0.0565, device='cuda:0')
tensor(0.0666, device='cuda:0')
tensor(0.0486, device='cuda:0')


  3%|▎         | 130/4958 [19:37:56<726:20:05, 541.59s/it]

------- 1st valloss=0.0645

tensor(0.0741, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0458, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0937, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0726, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1273, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0414, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0510, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0528, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0999, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0445, device='cuda:0')
tensor(0.0600, device='cuda:0')
tensor(0.0679, device='cuda:0')
tensor(0.0620, device='cuda:0')
tensor(0.1633, device='cuda:0')
tensor(0.0726, device='cuda:0')
tensor(0.0380, device='cuda:0')
tensor(0.0574, device='cuda:0')
tensor(0.0606, device='cuda:0')
tensor(0.0684, device='cuda:0')
tensor(0.0491, device='cuda:0')
tensor(0.0377, device='cuda:0')
tensor(0.0779, device='cuda:0')


  3%|▎         | 131/4958 [19:46:59<726:31:43, 541.85s/it]

------- 1st valloss=0.0641

tensor(0.0608, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0725, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0310, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0591, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0620, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0541, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0666, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.02

tensor(0.0780, device='cuda:0')
tensor(0.0418, device='cuda:0')
tensor(0.0566, device='cuda:0')
tensor(0.0457, device='cuda:0')
tensor(0.0905, device='cuda:0')
tensor(0.0830, device='cuda:0')
tensor(0.0523, device='cuda:0')
tensor(0.0696, device='cuda:0')
tensor(0.0586, device='cuda:0')
tensor(0.0568, device='cuda:0')
tensor(0.0614, device='cuda:0')
tensor(0.0699, device='cuda:0')


  3%|▎         | 132/4958 [19:56:07<729:06:49, 543.89s/it]

------- 1st valloss=0.0651

tensor(0.1070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1093, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0805, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0359, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0741, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0548, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1080, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0751, device='cuda:0')
tensor(0.0572, device='cuda:0')
tensor(0.0638, device='cuda:0')
tensor(0.0379, device='cuda:0')
tensor(0.0514, device='cuda:0')
tensor(0.0588, device='cuda:0')
tensor(0.0309, device='cuda:0')
tensor(0.0926, device='cuda:0')
tensor(0.0716, device='cuda:0')
tensor(0.0736, device='cuda:0')
tensor(0.0708, device='cuda:0')
tensor(0.1879, device='cuda:0')
tensor(0.0690, device='cuda:0')


  3%|▎         | 133/4958 [20:05:06<727:05:02, 542.49s/it]

------- 1st valloss=0.0681

tensor(0.0630, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0644, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0537, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0392, device='cuda:0')
tensor(0.0678, device='cuda:0')
tensor(0.0719, device='cuda:0')
tensor(0.0568, device='cuda:0')
tensor(0.0465, device='cuda:0')
tensor(0.0522, device='cuda:0')
tensor(0.0442, device='cuda:0')
tensor(0.0397, device='cuda:0')
tensor(0.0849, device='cuda:0')
tensor(0.0758, device='cuda:0')
tensor(0.0623, device='cuda:0')
tensor(0.0491, device='cuda:0')
tensor(0.0667, device='cuda:0')
tensor(0.0465, device='cuda:0')


  3%|▎         | 134/4958 [20:14:02<724:14:12, 540.48s/it]

------- 1st valloss=0.0642

tensor(0.0603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1258, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0601, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0594, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0653, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0755, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0466, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0387, device='cuda:0')
tensor(0.0907, device='cuda:0')
tensor(0.0387, device='cuda:0')
tensor(0.1053, device='cuda:0')
tensor(0.0624, device='cuda:0')
tensor(0.0761, device='cuda:0')
tensor(0.0636, device='cuda:0')
tensor(0.0572, device='cuda:0')
tensor(0.0638, device='cuda:0')
tensor(0.0813, device='cuda:0')
tensor(0.0452, device='cuda:0')
tensor(0.1579, device='cuda:0')
tensor(0.0708, device='cuda:0')
tensor(0.0520, device='cuda:0')


  3%|▎         | 135/4958 [20:22:58<722:19:25, 539.16s/it]

------- 1st valloss=0.0664

tensor(0.0703, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0793, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0688, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9961, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0314, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0385, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

tensor(0.0639, device='cuda:0')
tensor(0.0360, device='cuda:0')
tensor(0.0525, device='cuda:0')
tensor(0.0937, device='cuda:0')
tensor(0.0486, device='cuda:0')
tensor(0.0578, device='cuda:0')
tensor(0.1586, device='cuda:0')
tensor(0.0726, device='cuda:0')
tensor(0.0768, device='cuda:0')
tensor(0.0449, device='cuda:0')
tensor(0.0649, device='cuda:0')
tensor(0.0395, device='cuda:0')
tensor(0.0403, device='cuda:0')
tensor(0.0469, device='cuda:0')
------- 1st valloss=0.0629



  3%|▎         | 136/4958 [20:32:07<725:49:35, 541.89s/it]

tensor(0.0514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0312, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1503, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1395, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0684, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4556, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0714, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9990, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0585, device='cuda:0',

tensor(0.0280, device='cuda:0')
tensor(0.0432, device='cuda:0')
tensor(0.0621, device='cuda:0')
tensor(0.0792, device='cuda:0')
tensor(0.0682, device='cuda:0')
tensor(0.1789, device='cuda:0')
tensor(0.0395, device='cuda:0')
tensor(0.0869, device='cuda:0')
tensor(0.0687, device='cuda:0')
tensor(0.0419, device='cuda:0')
tensor(0.0601, device='cuda:0')
tensor(0.0336, device='cuda:0')
tensor(0.0648, device='cuda:0')


  3%|▎         | 137/4958 [20:41:11<726:33:55, 542.55s/it]

------- 1st valloss=0.0636

tensor(0.9401, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0868, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0745, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0684, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0378, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1519, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0577, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0655, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0335, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0426, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0366, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.0502, device='cuda:0')
tensor(0.0473, device='cuda:0')
tensor(0.0474, device='cuda:0')
tensor(0.0647, device='cuda:0')
tensor(0.0739, device='cuda:0')
tensor(0.0402, device='cuda:0')
tensor(0.0618, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.1029, device='cuda:0')
tensor(0.0611, device='cuda:0')
tensor(0.0430, device='cuda:0')
tensor(0.0650, device='cuda:0')
tensor(0.0353, device='cuda:0')
tensor(0.0468, device='cuda:0')


  3%|▎         | 138/4958 [20:50:13<726:08:51, 542.35s/it]

------- 1st valloss=0.0643

tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1433, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9621, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0380, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.20

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1034, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9807, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1022, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1214, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0617, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0827, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0597, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0736, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<Rs

  3%|▎         | 154/4958 [23:15:44<727:56:20, 545.50s/it]

------- 1st valloss=0.0668

tensor(0.0306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0593, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0687, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1030, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0592, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0662, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0289, devi

tensor(0.0953, device='cuda:0')
tensor(0.0705, device='cuda:0')
tensor(0.0413, device='cuda:0')
tensor(0.0354, device='cuda:0')
tensor(0.0661, device='cuda:0')
tensor(0.0828, device='cuda:0')
tensor(0.0337, device='cuda:0')
tensor(0.0608, device='cuda:0')
tensor(0.0778, device='cuda:0')
tensor(0.1915, device='cuda:0')
tensor(0.0617, device='cuda:0')
tensor(0.0706, device='cuda:0')


  3%|▎         | 155/4958 [23:24:47<726:45:49, 544.73s/it]

------- 1st valloss=0.0671

tensor(0.0404, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0337, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0929, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0570, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0458, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0403, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0288, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0369, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0384, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9767, device='cuda:0', grad_fn=<RsubBackward1>)
ten

tensor(0.1023, device='cuda:0')
tensor(0.0381, device='cuda:0')
tensor(0.1519, device='cuda:0')
tensor(0.0756, device='cuda:0')
tensor(0.0608, device='cuda:0')
tensor(0.0478, device='cuda:0')
tensor(0.2062, device='cuda:0')
tensor(0.0419, device='cuda:0')
tensor(0.0607, device='cuda:0')
tensor(0.0676, device='cuda:0')
tensor(0.0576, device='cuda:0')
tensor(0.0566, device='cuda:0')


  3%|▎         | 156/4958 [23:33:59<729:18:28, 546.75s/it]

------- 1st valloss=0.0757

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0571, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0796, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0584, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0503, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(

In [None]:
deeplab.eval()
refine_model.eval()

with torch.no_grad():

    val_loss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
        # move data to device, convert dtype to desirable dtype
        val_losses = []
        for minibatch in range(BATCH_SIZE):
            image_1 = vbatch['image1_data'][minibatch].to(device=device, dtype=dtype)
            image_1 = image_1.view(1,1,256,256,256)

            label_1 = vbatch['image1_label'][minibatch].to(device=device, dtype=dtype)
            label_1 = label_1.view(1,3,256,256,256)

            bv_label = label_1[:, 2, :, :, :]
            bv_label = bv_label.view(1,1,256,256,256)

            original_res = [a[minibatch].item() for a in vbatch['original_resolution']]

            image_1_resize = F.interpolate(image_1, size=original_res, mode='trilinear', align_corners=True)
            image_1_resize = image_1_resize.view(1,1,original_res[0], original_res[1], original_res[2])

            bv_label_resize = F.interpolate(bv_label, size=original_res, mode='trilinear', align_corners=True)

            # Get coarse output from deeplab model from 256 resolution input
            out_coarse = deeplab(image_1)
            out_coarse = out_coarse.view(1,3,256,256,256)

            bv_coarse = out_coarse[:, 2, :, :, :]
            bv_coarse = bv_coarse.view(1,1,256,256,256)

            bv_coarse_resize = F.interpolate(bv_coarse, size=original_res, mode='trilinear', align_corners=True)
            
            box_size = 192
            half_size = int(box_size / 2)
            
            image_size_x = int(image_1_resize.shape[-3])
            image_size_y = int(image_1_resize.shape[-2])
            image_size_z = int(image_1_resize.shape[-1])
            
            x,y,z = loadbvcenter(binarize_output(bv_coarse_resize).view([1] + original_res))
            x, y, z = np.clip([x, y, z], a_min=box_size-half_size, a_max=box_size+half_size)
            x1 = max(x-half_size, 0)
            x2 = min(x+half_size, image_size_x)
            y1 = max(y-half_size, 0)
            y2 = min(y+half_size, image_size_y)
            z1 = max(z-half_size, 0)
            z2 = min(z+half_size, image_size_z)
            
            
            bbox_bv = bv_coarse_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
            bbox_bv = reshape_image(bbox_bv.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_bv = bbox_bv.view(1,1,box_size,box_size,box_size)
            
            bbox_bv_label = bv_label_resize.view(original_res)[x1:x2, y1:y2, z1:z2]
            bbox_bv_label = reshape_image(bbox_bv_label.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_bv_label = bbox_bv_label.view(1,1,box_size,box_size,box_size)

            #bbox_image = get_bounding_box_image(image_1, (256,256,256)).to(device, dtype)
            bbox_image = image_1_resize[:, :, x1:x2, y1:y2, z1:z2]
            bbox_image = reshape_image(bbox_image.squeeze(), box_size, box_size, box_size).to(device, dtype)
            bbox_image = bbox_image.view(1, 1, box_size, box_size, box_size)
            
            #bbox_iamge, bbox_bv_label, bbox_bv = get_bboxes(image_1_resize, bv_label_resize, bv_coarse_resize, 1, 200)
            
            bbox_concat = torch.cat([bbox_bv, bbox_image], dim=1)
            bbox_concat_2 = F.interpolate(bbox_concat, scale_factor=1/2, mode='trilinear', align_corners=True)
            bbox_concat_4 = F.interpolate(bbox_concat, scale_factor=1/4, mode='trilinear', align_corners=True)

            refine_out = refine_model(bbox_concat, bbox_concat_2, bbox_concat_4)

            loss = dice_loss(refine_out, bbox_bv_label)
            val_losses.append(loss)
            
            if loss.item() > .04:
                show_image_slice(image_1)
                show_image_slice(bv_label_resize)
                show_image_slice(bv_coarse)
                show_image_slice(bbox_image)
                show_image_slice(bbox_bv_label)
                show_image_slice(bbox_bv)
                show_image_slice(refine_out)
        
        loss = sum(val_losses) / BATCH_SIZE
        print(loss.item())
        val_loss += loss.item()
        val_losses = []
        '''
        if loss.item() > .05:
            show_image_slice(image_1)
            show_image_slice(label_1)
            show_image_slice(output)
        '''

    outstr = 'bv loss = {0:.4f}'\
        .format(val_loss/(v+1)) + '\n'
    print(outstr)