In [1]:
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_utility import *
from data_utils import *
from loss import *
from train import *
from deeplab_model.deeplab import *
from dense_vnet.DenseVNet import DenseVNet
from sync_batchnorm import convert_model
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
USE_GPU = True
NUM_WORKERS = 12
BATCH_SIZE = 2

dtype = torch.float32 
# define dtype, float is space efficient than double

if USE_GPU and torch.cuda.is_available():
    
    device = torch.device("cuda:0")
    
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    # magic flag that accelerate
    
    print('using GPU for training')
else:
    device = torch.device('cpu')
    print('using CPU for training')

using GPU for training


In [3]:
train_dataset = get_full_resolution_dataset(data_type = 'nii_train', 
                transform=transforms.Compose([
                random_affine(90, 15),
                random_filp(0.5)]))
# do data augumentation on train dataset

validation_dataset = get_full_resolution_dataset(data_type = 'nii_test', 
                transform=None)
# no data augumentation on validation dataset

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS)
validation_loader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True,
                    num_workers=NUM_WORKERS) # drop_last
# loaders come with auto batch division and multi-thread acceleration

In [4]:
from bv_refinement_network.RefinementModel import RefinementModel, RefinementModel_NoDown

if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    
refine_model = RefinementModel_NoDown(num_classes=1)
refine_model = nn.DataParallel(refine_model)
refine_model = convert_model(refine_model)
refine_model = refine_model.to(device, dtype)

optimizer = optim.Adam(refine_model.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer)

deeplab = DeepLab(output_stride=16)
deeplab = nn.DataParallel(deeplab)
deeplab = convert_model(deeplab)

checkpoint = torch.load('../deeplab_dilated_save/2019-08-10 09:28:43.844872 epoch: 1160.pth') # best one

deeplab.load_state_dict(checkpoint['state_dict_1'])
deeplab = deeplab.to(device, dtype)

epoch = 0

Let's use 2 GPUs!


In [5]:
test_dictionary = train_dataset[33]

image_1 = test_dictionary['image1_data'].view(1, 1, 256, 256, 256)
label_1 = test_dictionary['image1_label'].view(1, 3, 256, 256, 256)
bv_label = label_1.narrow(1,2,1).to(device, dtype)
if get_dimensions(bv_label) == 4:
    bv_label.unsqueeze_(0)

image_1 = image_1.to(device=device, dtype=dtype) 
label_1 = label_1.to(device=device, dtype=dtype)

In [8]:
def get_bboxes(image, label, output, batchsize, box_size):
    image_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    label_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    output_final = torch.zeros((batchsize, 1, box_size, box_size, box_size)).to(device, dtype)
    half_size = int(box_size/2)
    image_size_x = int(image.shape[-3])
    image_size_y = int(image.shape[-2])
    image_size_z = int(image.shape[-1])
    for b in range(batchsize):
        out = output[b]
        x,y,z = loadbvcenter(binarize_output(out))
        x, y, z = np.clip([x, y, z], a_min=box_size-half_size, a_max=box_size+half_size)
        x1 = max(x-half_size, 0)
        x2 = min(x+half_size, image_size_x)
        y1 = max(y-half_size, 0)
        y2 = min(y+half_size, image_size_y)
        z1 = max(z-half_size, 0)
        z2 = min(z+half_size, image_size_z)
        image_final[b] = image[b, :, x1:x2, y1:y2, z1:z2]
        label_final[b] = label[b, :, x1:x2, y1:y2, z1:z2]
        output_final[b] = output[b, :, x1:x2, y1:y2, z1:z2]
    return image_final, label_final, output_final

In [None]:
epochs = 5000

record = open('train_bv_refine_4.txt','a+')

logger = {'train':[], 'validation_1': []}

min_val = 1

for e in tqdm(range(epoch + 1, epochs)):
# iter over epoches
    epoch_loss = 0
        
    for t, batch in enumerate(train_loader):
    # iter over the train mini batches
        refine_model.train()
        deeplab.eval()
        # Set the model flag to train
        # 1. enable dropout
        # 2. batchnorm behave differently in train and test
        #print(batch['image1_data'])
        image_1 = batch['image1_data'].to(device=device, dtype=dtype)
        image_1 = image_1.view(BATCH_SIZE,1,256,256,256)

        label_1 = batch['image1_label'].to(device=device, dtype=dtype)
        label_1 = label_1.view(BATCH_SIZE,3,256,256,256)

        bv_label = label_1[:, 2, :, :, :]
        bv_label = bv_label.view(BATCH_SIZE,1,256,256,256)

        #original_res = [a[minibatch].item() for a in batch['original_resolution']]

        # Get coarse output from deeplab model from 256 resolution input
        out_coarse = deeplab(image_1)
        out_coarse = out_coarse.view(BATCH_SIZE,3,256,256,256)

        bv_coarse = out_coarse[:, 2, :, :, :]
        bv_coarse = bv_coarse.view(BATCH_SIZE,1,256,256,256)

        #bbox_image, bbox_label, bbox_bv = get_localization_and_label(image_1, bv_label, bv_coarse, BATCH_SIZE)
        bbox_image, bbox_label, bbox_bv = get_bboxes(image_1, bv_label, bv_coarse, BATCH_SIZE, 192)
        
        bbox_concat = torch.cat([bbox_image, bbox_bv], dim=1)
        
        #show_image_slice(bv_label)
        #show_image_slice(bbox_image)
        #show_image_slice(bbox_label)
        #show_image_slice(bbox_bv)

        #print("bbox_concat", bbox_concat.shape)

        del out_coarse
        del image_1
        del bv_coarse
        del label_1
        del bv_label
        del bbox_image
        del bbox_bv

        torch.cuda.empty_cache()

        refine_out = refine_model(bbox_concat)
        # do the inference

        #print(refine_out.shape)
        #print(bbox_bv_label.shape)

        loss = dice_loss(refine_out, bbox_label)
        
        print(loss)
        epoch_loss += loss.item()
        # record minibatch loss to epoch loss
        
        optimizer.zero_grad()
        # set the model parameter gradient to zero
        
        loss.backward()
        # calculate the gradient wrt loss
        optimizer.step()
        # take a gradient descent step
        torch.cuda.empty_cache()
        
    outstr = 'Epoch {0} finished ! Training Loss: {1:.4f}'.format(e, epoch_loss/(t+1)) + '\n'
    
    logger['train'].append(epoch_loss/(t+1))
    
    print(outstr)
    record.write(outstr)
    record.flush()

    if e%2 == 0:
    # do validation every 5 epoches
        deeplab.eval()
        refine_model.eval()
        # set model flag to eval
        # 1. disable dropout
        # 2. batchnorm behave differs

        with torch.no_grad():
        # stop taking gradient
        
            #valloss_4 = 0
            #valloss_2 = 0
            valloss_1 = 0
            
            for v, vbatch in enumerate(validation_loader):
                image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
                image_1 = image_1.view(BATCH_SIZE,1,256,256,256)

                label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)
                label_1 = label_1.view(BATCH_SIZE,3,256,256,256)

                bv_label = label_1[:, 2, :, :, :]
                bv_label = bv_label.view(BATCH_SIZE,1,256,256,256)

                #original_res = [a[minibatch].item() for a in batch['original_resolution']]

                # Get coarse output from deeplab model from 256 resolution input
                out_coarse = deeplab(image_1)
                out_coarse = out_coarse.view(BATCH_SIZE,3,256,256,256)

                bv_coarse = out_coarse[:, 2, :, :, :]
                bv_coarse = bv_coarse.view(BATCH_SIZE,1,256,256,256)

                bbox_image, bbox_label, bbox_bv = get_bboxes(image_1, bv_label, bv_coarse, BATCH_SIZE, 192)

                bbox_concat = torch.cat([bbox_image, bbox_bv], dim=1)

                #show_image_slice(label)
                #show_image_slice(bbox_image)
                #show_image_slice(bbox_label)
                #show_image_slice(bbox_bv)

                #print("bbox_concat", bbox_concat.shape)
                #seg_image_concat = torch.cat([bv_coarse, image_1], dim=1)

                del out_coarse
                del image_1
                del bv_coarse
                del label_1
                del bv_label
                del bbox_image
                del bbox_bv

                torch.cuda.empty_cache()

                refine_out = refine_model(bbox_concat)
                        
                loss = dice_loss(refine_out, bbox_label)
                
                print(loss)
            
                # calculate loss
                valloss_1 += loss.item()
                
                torch.cuda.empty_cache()
                
            
            avg_val_loss = (valloss_1 / (v+1))
            outstr = '------- 1st valloss={0:.4f}'\
                .format(avg_val_loss) + '\n'
            
            logger['validation_1'].append(avg_val_loss)
            #scheduler.step(avg_val_loss)
            
            if avg_val_loss < min_val:
                save_1('refine_bv4_save', refine_model, optimizer, logger, e, scheduler)
            elif e % 10 == 0:
                save_1('refine_bv4_save', refine_model, optimizer, logger, e, scheduler)
            
            torch.cuda.empty_cache()
            
            print(outstr)
            record.write(outstr)
            record.flush()
    


record.close()

  0%|          | 0/4999 [00:00<?, ?it/s]

tensor(0.9783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9390, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8938, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8442, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7983, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4777, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4832, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8812, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5073, device='cuda:0',

  0%|          | 1/4999 [15:21<1279:01:49, 921.27s/it]

Epoch 1 finished ! Training Loss: 0.5764

tensor(0.3663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2571, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9931, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6506, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2404, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5746, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1933, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2767, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5897, device='cuda:0', grad_fn=<RsubB

  0%|          | 2/4999 [30:13<1266:45:29, 912.61s/it]

Checkpoint 2 saved !
------- 1st valloss=0.2405

tensor(0.3615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2290, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6274, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2699, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5904, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8666, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4038, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2403, device='cuda:0', grad_fn

  0%|          | 3/4999 [43:52<1227:33:56, 884.56s/it]

Epoch 3 finished ! Training Loss: 0.4310

tensor(0.6012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2138, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4084, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1849, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2277, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6021, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1197, device='cuda:0', grad_fn=<RsubB

  0%|          | 4/4999 [58:32<1225:17:22, 883.09s/it]

Checkpoint 4 saved !
------- 1st valloss=0.2118

tensor(0.5940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2286, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2422, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9922, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5858, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3159, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2023, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5309, device='cuda:0', grad_fn

  0%|          | 5/4999 [1:12:23<1203:32:41, 867.59s/it]

Epoch 5 finished ! Training Loss: 0.3988

tensor(0.3415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3237, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5976, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5918, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1721, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6084, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1197, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1513, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5845, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1335, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2542, device='cuda:0', grad_fn=<RsubB

  0%|          | 6/4999 [1:27:19<1214:54:52, 875.96s/it]

Checkpoint 6 saved !
------- 1st valloss=0.1783

tensor(0.1698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6069, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5590, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1538, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1658, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5628, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1722, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9964, device='cuda:0', grad_fn

  0%|          | 7/4999 [1:41:16<1198:36:21, 864.38s/it]

Epoch 7 finished ! Training Loss: 0.3897

tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2442, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5416, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2293, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5659, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1393, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9866, device='cuda:0', grad_fn=<RsubB

  0%|          | 8/4999 [1:55:59<1205:50:53, 869.78s/it]

Checkpoint 8 saved !
------- 1st valloss=0.1744

tensor(0.2780, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5906, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5502, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1216, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4826, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6008, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1503, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9910, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1519, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2153, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9983, device='cuda:0', grad_fn

  0%|          | 9/4999 [2:09:40<1185:37:56, 855.37s/it]

Epoch 9 finished ! Training Loss: 0.4070

tensor(0.1971, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2050, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6033, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5723, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1617, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2051, device='cuda:0', grad_fn=<RsubB

  0%|          | 10/4999 [2:24:25<1197:23:29, 864.02s/it]

Checkpoint 10 saved !
------- 1st valloss=0.1706

tensor(0.6687, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1272, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0915, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6270, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1819, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5949, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2255, device='cuda:0', grad_f

  0%|          | 11/4999 [2:38:08<1180:06:09, 851.72s/it]

Epoch 11 finished ! Training Loss: 0.3880

tensor(0.1300, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1148, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5918, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5761, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1552, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2116, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3496, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1654, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5171, device='cuda:0', grad_fn=<Rsub

  0%|          | 12/4999 [2:52:48<1191:39:42, 860.23s/it]

Checkpoint 12 saved !
------- 1st valloss=0.1838

tensor(0.1371, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1518, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5670, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9807, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5123, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6794, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1247, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1160, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1845, device='cuda:0', grad_f

  0%|          | 13/4999 [3:06:42<1180:45:00, 852.53s/it]

Epoch 13 finished ! Training Loss: 0.3753

tensor(0.5707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1119, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1668, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1290, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1837, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6109, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1058, device='cuda:0', grad_fn=<Rsub

  0%|          | 14/4999 [3:21:33<1196:19:16, 863.94s/it]

Checkpoint 14 saved !
------- 1st valloss=0.1747

tensor(0.9911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5537, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6503, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5644, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2944, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5596, device='cuda:0', grad_f

  0%|          | 15/4999 [3:35:30<1184:46:33, 855.78s/it]

Epoch 15 finished ! Training Loss: 0.3847

tensor(0.5768, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1594, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9814, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2605, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1879, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1964, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5386, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2041, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1439, device='cuda:0', grad_fn=<Rsub

  0%|          | 16/4999 [3:50:23<1200:06:48, 867.03s/it]

Checkpoint 16 saved !
------- 1st valloss=0.1624

tensor(0.5598, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1566, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3312, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2055, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5426, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5581, device='cuda:0', grad_f

  0%|          | 17/4999 [4:04:11<1183:44:21, 855.37s/it]

Epoch 17 finished ! Training Loss: 0.3798

tensor(0.6586, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1200, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1161, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5683, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1653, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1875, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1628, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7045, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1852, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1405, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6124, device='cuda:0', grad_fn=<Rsub

  0%|          | 18/4999 [4:18:55<1195:13:00, 863.84s/it]

Checkpoint 18 saved !
------- 1st valloss=0.1608

tensor(0.6383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5932, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1433, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5580, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9976, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9837, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1220, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2031, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1646, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1420, device='cuda:0', grad_f

  0%|          | 19/4999 [4:32:39<1178:43:54, 852.10s/it]

Epoch 19 finished ! Training Loss: 0.3909

tensor(0.5695, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2074, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5541, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1230, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5746, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1660, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6873, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0916, device='cuda:0', grad_fn=<Rsub

  0%|          | 20/4999 [4:47:25<1192:26:43, 862.18s/it]

Checkpoint 20 saved !
------- 1st valloss=0.1624

tensor(0.6095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1107, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1798, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1585, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1441, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5780, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0933, device='cuda:0', grad_f

  0%|          | 21/4999 [5:01:10<1176:35:22, 850.89s/it]

Epoch 21 finished ! Training Loss: 0.3837

tensor(0.2134, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1639, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5837, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1249, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1026, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1226, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7612, device='cuda:0', grad_fn=<Rsub

  0%|          | 22/4999 [5:16:02<1193:30:46, 863.30s/it]

Checkpoint 22 saved !
------- 1st valloss=0.1572

tensor(0.1554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6104, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1877, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5327, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6202, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6624, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5943, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5529, device='cuda:0', grad_f

  0%|          | 23/4999 [5:29:48<1177:54:41, 852.19s/it]

Epoch 23 finished ! Training Loss: 0.3705

tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1273, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1816, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5621, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1910, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5714, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5651, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5877, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5599, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2168, device='cuda:0', grad_fn=<Rsub

  0%|          | 24/4999 [5:44:42<1194:57:25, 864.69s/it]

Checkpoint 24 saved !
------- 1st valloss=0.1595

tensor(0.5696, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1169, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2898, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5752, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3059, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5384, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1266, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2032, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6376, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1926, device='cuda:0', grad_f

  1%|          | 25/4999 [5:58:27<1178:22:45, 852.87s/it]

Epoch 25 finished ! Training Loss: 0.3786

tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5852, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2437, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1597, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5954, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2144, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<Rsub

  1%|          | 26/4999 [6:13:13<1191:40:46, 862.67s/it]

Checkpoint 26 saved !
------- 1st valloss=0.1536

tensor(0.5321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2134, device='cuda:0', grad_f

  1%|          | 27/4999 [6:27:14<1182:24:29, 856.13s/it]

Epoch 27 finished ! Training Loss: 0.3757

tensor(0.1343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1775, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6075, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1039, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5351, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1427, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1706, device='cuda:0', grad_fn=<Rsub

  1%|          | 28/4999 [6:41:54<1192:08:28, 863.35s/it]

Checkpoint 28 saved !
------- 1st valloss=0.1600

tensor(0.6305, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0848, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1271, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5845, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4957, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5695, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5662, device='cuda:0', grad_f

  1%|          | 29/4999 [6:55:50<1180:32:46, 855.12s/it]

Epoch 29 finished ! Training Loss: 0.3636

tensor(0.5597, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5714, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1774, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2461, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5941, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5927, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1286, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0883, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3083, device='cuda:0', grad_fn=<Rsub

  1%|          | 30/4999 [7:10:35<1192:57:38, 864.29s/it]

Checkpoint 30 saved !
------- 1st valloss=0.1583

tensor(0.5377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1124, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3352, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5779, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6141, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1067, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5671, device='cuda:0', grad_f

  1%|          | 31/4999 [7:24:32<1181:07:48, 855.89s/it]

Epoch 31 finished ! Training Loss: 0.3670

tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9790, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1793, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6807, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1908, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1672, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5785, device='cuda:0', grad_fn=<Rsub

  1%|          | 32/4999 [7:39:30<1198:37:22, 868.74s/it]

Checkpoint 32 saved !
------- 1st valloss=0.1810

tensor(0.1605, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1427, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2902, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1125, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0902, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5580, device='cuda:0', grad_f

  1%|          | 33/4999 [7:53:39<1190:06:46, 862.75s/it]

Epoch 33 finished ! Training Loss: 0.3599

tensor(0.1625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5381, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5905, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1984, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5732, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2320, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5651, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1297, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1518, device='cuda:0', grad_fn=<Rsub

  1%|          | 34/4999 [8:08:31<1201:54:31, 871.47s/it]

Checkpoint 34 saved !
------- 1st valloss=0.1572

tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2119, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5965, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1594, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1584, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1644, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1627, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0932, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1217, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2165, device='cuda:0', grad_f

  1%|          | 35/4999 [8:22:18<1183:22:51, 858.21s/it]

Epoch 35 finished ! Training Loss: 0.3620

tensor(0.0904, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5724, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1677, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1131, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5684, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5618, device='cuda:0', grad_fn=<Rsub

  1%|          | 36/4999 [8:37:12<1197:48:27, 868.85s/it]

Checkpoint 36 saved !
------- 1st valloss=0.1493

tensor(0.1643, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5624, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5889, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1097, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1588, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6066, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6048, device='cuda:0', grad_f

  1%|          | 37/4999 [8:50:59<1180:06:08, 856.18s/it]

Epoch 37 finished ! Training Loss: 0.3612

tensor(0.2017, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1090, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2274, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1282, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9895, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8959, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5528, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1239, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1508, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_fn=<Rsub

  1%|          | 38/4999 [9:05:54<1195:56:31, 867.85s/it]

Checkpoint 38 saved !
------- 1st valloss=0.1929

tensor(0.5765, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2248, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2134, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5890, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1888, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1344, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6581, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_f

  1%|          | 39/4999 [9:19:36<1176:51:26, 854.17s/it]

Epoch 39 finished ! Training Loss: 0.3673

tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1224, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1497, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1506, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1774, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1443, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5416, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1571, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5608, device='cuda:0', grad_fn=<Rsub

  1%|          | 40/4999 [9:34:21<1189:21:41, 863.42s/it]

Checkpoint 40 saved !
------- 1st valloss=0.1485

tensor(0.1244, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6837, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1508, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6126, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5860, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2168, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0956, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1317, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1915, device='cuda:0', grad_f

  1%|          | 41/4999 [9:48:20<1179:00:47, 856.08s/it]

Epoch 41 finished ! Training Loss: 0.3736

tensor(0.1766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1990, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1220, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5381, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1647, device='cuda:0', grad_fn=<Rsub

  1%|          | 42/4999 [10:03:11<1193:09:55, 866.53s/it]

Checkpoint 42 saved !
------- 1st valloss=0.1442

tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1156, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0812, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1375, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0945, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1382, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5605, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1610, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1256, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6283, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6326, device='cuda:0', grad_f

  1%|          | 43/4999 [10:17:02<1178:26:30, 856.01s/it]

Epoch 43 finished ! Training Loss: 0.3588

tensor(0.3133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1908, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0908, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3643, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5313, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5918, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1322, device='cuda:0', grad_fn=<Rsub

  1%|          | 44/4999 [10:31:41<1187:46:05, 862.96s/it]

Checkpoint 44 saved !
------- 1st valloss=0.1482

tensor(0.0994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1173, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1275, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1712, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2315, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1879, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5849, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0815, device='cuda:0', grad_f

  1%|          | 45/4999 [10:45:39<1176:55:37, 855.26s/it]

Epoch 45 finished ! Training Loss: 0.3727

tensor(0.0972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0943, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5704, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1541, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5639, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2146, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1375, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2560, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1346, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1324, device='cuda:0', grad_fn=<Rsub

  1%|          | 46/4999 [11:00:21<1187:54:03, 863.40s/it]

Checkpoint 46 saved !
------- 1st valloss=0.1431

tensor(0.1625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1290, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5691, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1270, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2251, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1268, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1229, device='cuda:0', grad_f

  1%|          | 47/4999 [11:14:05<1171:10:40, 851.42s/it]

Epoch 47 finished ! Training Loss: 0.3618

tensor(0.1458, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2116, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1132, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2257, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0918, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1218, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5666, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0990, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1581, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2653, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5699, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1796, device='cuda:0', grad_fn=<Rsub

  1%|          | 48/4999 [11:28:58<1188:17:31, 864.04s/it]

Checkpoint 48 saved !
------- 1st valloss=0.1438

tensor(0.0953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5490, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1204, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5538, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2354, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0852, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_f

  1%|          | 49/4999 [11:42:53<1176:11:19, 855.41s/it]

Epoch 49 finished ! Training Loss: 0.3501

tensor(0.0978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5574, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5787, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1153, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2105, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1352, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1571, device='cuda:0', grad_fn=<Rsub

  1%|          | 50/4999 [11:57:40<1188:56:17, 864.86s/it]

Checkpoint 50 saved !
------- 1st valloss=0.1452

tensor(0.5499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5750, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1760, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5730, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1775, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1287, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1008, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1295, device='cuda:0', grad_f

  1%|          | 51/4999 [12:11:19<1169:41:13, 851.03s/it]

Epoch 51 finished ! Training Loss: 0.3868

tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1296, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6073, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6144, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1301, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6507, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1378, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2496, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1585, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5565, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1480, device='cuda:0', grad_fn=<Rsub

  1%|          | 52/4999 [12:26:16<1188:20:32, 864.77s/it]

Checkpoint 52 saved !
------- 1st valloss=0.1494

tensor(0.1427, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2336, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1280, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1584, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9876, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1255, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9720, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1003, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5466, device='cuda:0', grad_f

  1%|          | 53/4999 [12:40:15<1177:28:12, 857.03s/it]

Epoch 53 finished ! Training Loss: 0.3690

tensor(0.1538, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3369, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2039, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1330, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4860, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2229, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1352, device='cuda:0', grad_fn=<Rsub

  1%|          | 54/4999 [12:55:02<1189:34:06, 866.02s/it]

Checkpoint 54 saved !
------- 1st valloss=0.1446

tensor(0.1188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5612, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2213, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5982, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5884, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1359, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5545, device='cuda:0', grad_f

  1%|          | 55/4999 [13:08:46<1172:03:34, 853.44s/it]

Epoch 55 finished ! Training Loss: 0.3657

tensor(0.5952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1922, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1437, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1168, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1116, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2506, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5436, device='cuda:0', grad_fn=<Rsub

  1%|          | 56/4999 [13:23:26<1182:54:34, 861.52s/it]

Checkpoint 56 saved !
------- 1st valloss=0.1429

tensor(0.1516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1260, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5971, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0932, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8296, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0812, device='cuda:0', grad_f

  1%|          | 57/4999 [13:37:14<1168:53:37, 851.48s/it]

Epoch 57 finished ! Training Loss: 0.3553

tensor(0.1328, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0875, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5855, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1088, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5502, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5488, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5730, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1590, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1905, device='cuda:0', grad_fn=<Rsub

  1%|          | 58/4999 [13:51:57<1181:26:14, 860.79s/it]

Checkpoint 58 saved !
------- 1st valloss=0.1490

tensor(0.5768, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9730, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1248, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1171, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0980, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6210, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0852, device='cuda:0', grad_f

  1%|          | 59/4999 [14:05:39<1165:10:19, 849.11s/it]

Epoch 59 finished ! Training Loss: 0.3635

tensor(0.5389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1602, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5750, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2601, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1090, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1260, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3320, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1344, device='cuda:0', grad_fn=<Rsub

  1%|          | 60/4999 [14:20:27<1180:56:09, 860.78s/it]

Checkpoint 60 saved !
------- 1st valloss=0.1437

tensor(0.5470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6030, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1556, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6036, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1585, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7526, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1001, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2213, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1466, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2417, device='cuda:0', grad_f

  1%|          | 61/4999 [14:34:10<1165:09:15, 849.44s/it]

Epoch 61 finished ! Training Loss: 0.3636

tensor(0.5565, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5460, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2233, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1695, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1268, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0971, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1773, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5757, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1317, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1682, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2862, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0970, device='cuda:0', grad_fn=<Rsub

  1%|          | 62/4999 [14:48:48<1176:51:48, 858.15s/it]

Checkpoint 62 saved !
------- 1st valloss=0.1493

tensor(0.1038, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0816, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1798, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1375, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1232, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5584, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1122, device='cuda:0', grad_f

  1%|▏         | 63/4999 [15:02:35<1163:53:43, 848.87s/it]

Epoch 63 finished ! Training Loss: 0.3645

tensor(0.6502, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1675, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1913, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5753, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1246, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0883, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1272, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1878, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1266, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2679, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3350, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 64/4999 [15:17:22<1179:19:37, 860.30s/it]

Checkpoint 64 saved !
------- 1st valloss=0.1455

tensor(0.1716, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5881, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5328, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5315, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1687, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5206, device='cuda:0', grad_f

  1%|▏         | 65/4999 [15:31:29<1173:31:51, 856.24s/it]

Epoch 65 finished ! Training Loss: 0.3558

tensor(0.5681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1733, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2305, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1767, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1037, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1300, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1828, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7644, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 66/4999 [15:46:17<1186:18:40, 865.75s/it]

Checkpoint 66 saved !
------- 1st valloss=0.1392

tensor(0.1267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1390, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5760, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1173, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1108, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1124, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0943, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6252, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1170, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1991, device='cuda:0', grad_f

  1%|▏         | 67/4999 [16:00:02<1169:20:50, 853.54s/it]

Epoch 67 finished ! Training Loss: 0.3705

tensor(0.2698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5764, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5565, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1587, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1125, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5712, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5483, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 68/4999 [16:15:02<1188:22:26, 867.60s/it]

Checkpoint 68 saved !
------- 1st valloss=0.1376

tensor(0.5349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6821, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1020, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5492, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5593, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1720, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5753, device='cuda:0', grad_f

  1%|▏         | 69/4999 [16:28:54<1173:21:26, 856.81s/it]

Epoch 69 finished ! Training Loss: 0.3485

tensor(0.1590, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5366, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1038, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1033, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5477, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 70/4999 [16:43:43<1186:26:06, 866.54s/it]

Checkpoint 70 saved !
------- 1st valloss=0.1421

tensor(0.5583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2113, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2598, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1174, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5344, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1848, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1146, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_f

  1%|▏         | 71/4999 [16:57:36<1172:24:19, 856.47s/it]

Epoch 71 finished ! Training Loss: 0.3730

tensor(0.2738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1283, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2299, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1506, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1401, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1847, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 72/4999 [17:12:19<1183:01:29, 864.40s/it]

Checkpoint 72 saved !
------- 1st valloss=0.1528

tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1800, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5692, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0845, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1096, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5767, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1518, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2722, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1108, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2029, device='cuda:0', grad_f

  1%|▏         | 73/4999 [17:26:12<1169:54:02, 854.98s/it]

Epoch 73 finished ! Training Loss: 0.3499

tensor(0.0710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5766, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6065, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2178, device='cuda:0', grad_fn=<Rsub

  1%|▏         | 74/4999 [17:41:09<1186:49:54, 867.53s/it]

Checkpoint 74 saved !
------- 1st valloss=0.1444

tensor(0.1566, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5683, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1214, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1736, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1847, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0955, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1353, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2266, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2007, device='cuda:0', grad_f

  2%|▏         | 75/4999 [17:55:14<1177:17:16, 860.73s/it]

Epoch 75 finished ! Training Loss: 0.3580

tensor(0.6069, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1315, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6031, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1120, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5461, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5759, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6149, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1346, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6156, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1003, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0978, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 76/4999 [18:10:09<1191:06:23, 871.01s/it]

Checkpoint 76 saved !
------- 1st valloss=0.1457

tensor(0.5972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5543, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5537, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5834, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5480, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5790, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0895, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0936, device='cuda:0', grad_f

  2%|▏         | 77/4999 [18:23:55<1172:14:17, 857.39s/it]

Epoch 77 finished ! Training Loss: 0.3519

tensor(0.5411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1480, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1344, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5716, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2500, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1216, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1313, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 78/4999 [18:38:40<1183:38:45, 865.91s/it]

Checkpoint 78 saved !
------- 1st valloss=0.1484

tensor(0.2324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5360, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5541, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6235, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1351, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9819, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2034, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1406, device='cuda:0', grad_f

  2%|▏         | 79/4999 [18:52:26<1166:46:45, 853.74s/it]

Epoch 79 finished ! Training Loss: 0.3614

tensor(0.5536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1915, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1508, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0928, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1280, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1884, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 80/4999 [19:07:27<1186:13:16, 868.14s/it]

Checkpoint 80 saved !
------- 1st valloss=0.1363

tensor(0.6304, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6510, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1624, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3310, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1179, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1104, device='cuda:0', grad_f

  2%|▏         | 81/4999 [19:21:23<1172:48:49, 858.51s/it]

Epoch 81 finished ! Training Loss: 0.3594

tensor(0.5476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1282, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5158, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1314, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5560, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1675, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2018, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 82/4999 [19:36:23<1189:14:27, 870.71s/it]

Checkpoint 82 saved !
------- 1st valloss=0.1390

tensor(0.5869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1080, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2613, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1269, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1164, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6239, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1113, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1765, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5376, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1251, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5957, device='cuda:0', grad_f

  2%|▏         | 83/4999 [19:50:15<1173:29:14, 859.35s/it]

Epoch 83 finished ! Training Loss: 0.3499

tensor(0.4672, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0947, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1365, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1622, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1336, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6617, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 84/4999 [20:04:58<1182:35:06, 866.19s/it]

Checkpoint 84 saved !
------- 1st valloss=0.1541

tensor(0.2096, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1049, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5897, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1458, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2346, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_f

  2%|▏         | 85/4999 [20:18:40<1164:36:57, 853.20s/it]

Epoch 85 finished ! Training Loss: 0.3581

tensor(0.1966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9888, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1742, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1186, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2088, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1572, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1593, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 86/4999 [20:33:41<1183:47:04, 867.42s/it]

Checkpoint 86 saved !
------- 1st valloss=0.1372

tensor(0.5579, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1193, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5588, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1677, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5715, device='cuda:0', grad_f

  2%|▏         | 87/4999 [20:47:24<1165:24:31, 854.13s/it]

Epoch 87 finished ! Training Loss: 0.3619

tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5138, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1228, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5725, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0908, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6082, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0964, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0867, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 88/4999 [21:02:06<1176:22:39, 862.34s/it]

Checkpoint 88 saved !
------- 1st valloss=0.1373

tensor(0.1400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4982, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5738, device='cuda:0', grad_f

  2%|▏         | 89/4999 [21:15:55<1162:31:39, 852.36s/it]

Epoch 89 finished ! Training Loss: 0.3577

tensor(0.6215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5668, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1553, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5787, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5584, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1101, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0799, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 90/4999 [21:30:51<1180:08:56, 865.46s/it]

Checkpoint 90 saved !
------- 1st valloss=0.1499

tensor(0.4855, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5728, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5355, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1225, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1352, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5826, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5393, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0710, device='cuda:0', grad_f

  2%|▏         | 91/4999 [21:44:50<1169:03:11, 857.50s/it]

Epoch 91 finished ! Training Loss: 0.3543

tensor(0.5326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4480, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2277, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5651, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1354, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6643, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 92/4999 [21:59:38<1181:23:28, 866.72s/it]

Checkpoint 92 saved !
------- 1st valloss=0.1405

tensor(0.0869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0832, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1608, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1160, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1877, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1458, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5659, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0982, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2798, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1001, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1643, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5858, device='cuda:0', grad_f

  2%|▏         | 93/4999 [22:13:26<1165:20:38, 855.12s/it]

Epoch 93 finished ! Training Loss: 0.3406

tensor(0.1170, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1265, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0938, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1266, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5965, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6198, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1100, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1527, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 94/4999 [22:28:20<1180:48:03, 866.64s/it]

Checkpoint 94 saved !
------- 1st valloss=0.1514

tensor(0.1308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1145, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6067, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1933, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1050, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6555, device='cuda:0', grad_f

  2%|▏         | 95/4999 [22:42:06<1164:04:43, 854.54s/it]

Epoch 95 finished ! Training Loss: 0.3546

tensor(0.0955, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6091, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3592, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6294, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6526, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0855, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1013, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2205, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1489, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0832, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1136, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<Rsub

  2%|▏         | 96/4999 [22:57:02<1180:56:52, 867.10s/it]

Checkpoint 96 saved !
------- 1st valloss=0.1532

tensor(0.0977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2519, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1957, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1145, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8928, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2763, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1320, device='cuda:0', grad_f

  2%|▏         | 97/4999 [23:10:55<1166:42:28, 856.82s/it]

Epoch 97 finished ! Training Loss: 0.3470

tensor(0.6051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1460, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5837, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5895, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5498, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1904, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2069, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0818, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5264, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0887, device='cuda:0', grad_fn=<Rsub

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.4295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5624, device='cuda:0', grad_fn=<RsubBackward1>)
Epoch 100 finished ! Training Loss: 0.3520

tensor(0.1399, device='cuda:0')
tensor(0.1468, device='cuda:0')
tensor(0.1828, device='cuda:0')
tensor(0.1059, device='cuda:0')
tensor(0.1240, device='cuda:0')
tensor(0.0771, device='cuda:0')
tensor(0.1357, device='cuda:0')
tensor(0.1202, device='cuda:0')
tensor(0.0810, device='cuda:0')
tensor(0.2497, device='cuda:0')
tensor(0.1972, device='cuda:0')
tensor(0.1259, device='cuda:0')
tensor(0.1326, device='cuda:0')
tensor(0.1393, device='cuda:0')
tensor(0.1666, device='cuda:0')
tensor(0.2292, device='cuda:0')
tensor(0.1069, device='cuda:0')
tensor(0.1360, device='cuda:0')
tensor(0.1248, device='cuda:0')
tensor(0.1059, device='cuda:0')
tensor(0.2439, device='cuda:0')
tensor(0.1213, device='cuda:0')
tensor(0.1051, device='c

  2%|▏         | 100/4999 [23:54:48<1185:35:33, 871.23s/it]

Checkpoint 100 saved !
------- 1st valloss=0.1434

tensor(0.5546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5490, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1440, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1091, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5510, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5808, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1627, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5538, device='cuda:0', grad_

  2%|▏         | 101/4999 [24:08:35<1167:06:21, 857.82s/it]

Epoch 101 finished ! Training Loss: 0.3564

tensor(0.1085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1624, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6237, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5310, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1496, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2099, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5605, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5857, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 102/4999 [24:23:18<1177:17:23, 865.48s/it]

Checkpoint 102 saved !
------- 1st valloss=0.1374

tensor(0.1775, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5782, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2065, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1376, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1032, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5742, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1078, device='cuda:0', grad_

  2%|▏         | 103/4999 [24:37:02<1160:04:34, 853.00s/it]

Epoch 103 finished ! Training Loss: 0.3501

tensor(0.5455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0892, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5620, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1159, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1276, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1199, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0933, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0913, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0761, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 104/4999 [24:51:52<1174:49:50, 864.02s/it]

Checkpoint 104 saved !
------- 1st valloss=0.1647

tensor(0.1068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1015, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1224, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3229, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1161, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5529, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2270, device='cuda:0', grad_

  2%|▏         | 105/4999 [25:05:46<1162:34:46, 855.19s/it]

Epoch 105 finished ! Training Loss: 0.3572

tensor(0.1094, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0865, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1385, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2178, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6405, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5339, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1022, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5374, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 106/4999 [25:20:25<1171:58:31, 862.28s/it]

Checkpoint 106 saved !
------- 1st valloss=0.1397

tensor(0.2414, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1090, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0872, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2093, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0922, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1862, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5528, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1050, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6059, device='cuda:0', grad_

  2%|▏         | 107/4999 [25:34:10<1156:29:19, 851.05s/it]

Epoch 107 finished ! Training Loss: 0.3498

tensor(0.6085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5863, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1260, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5630, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5433, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5572, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1821, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 108/4999 [25:49:12<1177:04:59, 866.39s/it]

Checkpoint 108 saved !
------- 1st valloss=0.1589

tensor(0.9738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1164, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9064, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5746, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1185, device='cuda:0', grad_

  2%|▏         | 109/4999 [26:03:13<1166:36:55, 858.86s/it]

Epoch 109 finished ! Training Loss: 0.3470

tensor(0.0802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5638, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1440, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0802, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1574, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1080, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1580, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 110/4999 [26:18:11<1182:01:25, 870.38s/it]

Checkpoint 110 saved !
------- 1st valloss=0.1418

tensor(0.6404, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5782, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5398, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0732, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0764, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1078, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5498, device='cuda:0', grad_

  2%|▏         | 111/4999 [26:31:58<1164:26:20, 857.61s/it]

Epoch 111 finished ! Training Loss: 0.3400

tensor(0.1054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5574, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1676, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1105, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5577, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1660, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 112/4999 [26:46:49<1177:46:16, 867.60s/it]

Checkpoint 112 saved !
------- 1st valloss=0.1427

tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1286, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1086, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5628, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5369, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1787, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_

  2%|▏         | 113/4999 [27:00:42<1163:24:44, 857.20s/it]

Epoch 113 finished ! Training Loss: 0.3474

tensor(0.9890, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5588, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1200, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1270, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5528, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5382, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0843, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1677, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1126, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5547, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 114/4999 [27:15:26<1174:01:24, 865.20s/it]

Checkpoint 114 saved !
------- 1st valloss=0.1306

tensor(0.1332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0954, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1833, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5327, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5461, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5131, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6395, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1050, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0879, device='cuda:0', grad_

  2%|▏         | 115/4999 [27:29:29<1164:42:40, 858.51s/it]

Epoch 115 finished ! Training Loss: 0.3395

tensor(0.5495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1679, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6767, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6164, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1241, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2115, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5345, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5827, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0847, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1954, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 116/4999 [27:44:10<1173:29:11, 865.15s/it]

Checkpoint 116 saved !
------- 1st valloss=0.1505

tensor(0.1357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5715, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4948, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1543, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1192, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2365, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_

  2%|▏         | 117/4999 [27:57:55<1157:12:36, 853.33s/it]

Epoch 117 finished ! Training Loss: 0.3527

tensor(0.1201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5635, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1745, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1033, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1107, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1154, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1912, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1179, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 118/4999 [28:12:54<1175:13:58, 866.80s/it]

Checkpoint 118 saved !
------- 1st valloss=0.1797

tensor(0.5806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1843, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1371, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1173, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1898, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1668, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1038, device='cuda:0', grad_

  2%|▏         | 119/4999 [28:26:42<1159:21:38, 855.27s/it]

Epoch 119 finished ! Training Loss: 0.3458

tensor(0.6389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0881, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1632, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2204, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5344, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1643, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5699, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0881, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5911, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 120/4999 [28:41:29<1171:59:36, 864.76s/it]

Checkpoint 120 saved !
------- 1st valloss=0.1897

tensor(0.5708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2931, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5868, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1065, device='cuda:0', grad_

  2%|▏         | 121/4999 [28:55:28<1161:29:13, 857.19s/it]

Epoch 121 finished ! Training Loss: 0.3562

tensor(0.5856, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5334, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5461, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1248, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1086, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2183, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6195, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5765, device='cuda:0', grad_fn=<RsubBac

  2%|▏         | 122/4999 [29:10:10<1171:03:00, 864.42s/it]

Checkpoint 122 saved !
------- 1st valloss=0.1409

tensor(0.5640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5323, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1666, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5566, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9774, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0781, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1038, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0974, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5883, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1717, device='cuda:0', grad_fn=<

  2%|▏         | 123/4999 [29:24:04<1158:36:28, 855.41s/it]

Epoch 123 finished ! Training Loss: 0.3532

tensor(0.9669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0709, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1546, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5798, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1313, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1105, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1091, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<Rsu

  2%|▏         | 124/4999 [29:38:42<1167:21:08, 862.04s/it]

Checkpoint 124 saved !
------- 1st valloss=0.1399

tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1296, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6078, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1258, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5828, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5900, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7668, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2370, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1343, device='cuda:0', grad_fn=<

  3%|▎         | 125/4999 [29:52:29<1153:04:51, 851.68s/it]

Epoch 125 finished ! Training Loss: 0.3622

tensor(0.5421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6116, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0991, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9835, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0947, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1395, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4370, device='cuda:0', grad_fn=<RsubBackwar

  3%|▎         | 126/4999 [30:07:28<1172:02:53, 865.87s/it]

Checkpoint 126 saved !
------- 1st valloss=0.1411

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0801, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1001, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8887, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4725, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0927, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1966, device='cuda:0', grad_fn=<Rsub

  3%|▎         | 127/4999 [30:21:15<1156:09:43, 854.31s/it]

Epoch 127 finished ! Training Loss: 0.3470

tensor(0.1145, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1069, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6703, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6205, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2168, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1230, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5447, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 128/4999 [30:35:56<1166:39:08, 862.24s/it]

Checkpoint 128 saved !
------- 1st valloss=0.1798

tensor(0.1363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5741, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5789, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1635, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1348, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5662, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0763, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0809, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5586, device='cuda:0', grad_

  3%|▎         | 129/4999 [30:49:49<1154:20:20, 853.31s/it]

Epoch 129 finished ! Training Loss: 0.3654

tensor(0.0861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0941, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1190, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5227, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1342, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5216, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1005, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5554, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1343, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 130/4999 [31:04:32<1166:12:40, 862.26s/it]

Checkpoint 130 saved !
------- 1st valloss=0.1367

tensor(0.1070, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1905, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6348, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0964, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5776, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2032, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1593, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5878, device='cuda:0', grad_fn=<

  3%|▎         | 131/4999 [31:18:24<1153:37:55, 853.14s/it]

Epoch 131 finished ! Training Loss: 0.3573

tensor(0.1345, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1726, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9897, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6355, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5694, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 132/4999 [31:33:21<1171:27:57, 866.50s/it]

Checkpoint 132 saved !
------- 1st valloss=0.1682

tensor(0.5397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0904, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1045, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5585, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5812, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1184, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9906, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1139, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7916, device='cuda:0', grad_

  3%|▎         | 133/4999 [31:47:14<1157:25:39, 856.30s/it]

Epoch 133 finished ! Training Loss: 0.3449

tensor(0.1271, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9822, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5944, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6050, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3350, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1352, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1154, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1092, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 134/4999 [32:01:56<1167:54:02, 864.22s/it]

Checkpoint 134 saved !
------- 1st valloss=0.1552

tensor(0.5504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0968, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5941, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6336, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5906, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5409, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5646, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1144, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1175, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5406, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5445, device='cuda:0', grad_

  3%|▎         | 135/4999 [32:15:40<1151:20:41, 852.15s/it]

Epoch 135 finished ! Training Loss: 0.3491

tensor(0.1456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5638, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5780, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1266, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1945, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0935, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0944, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5520, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5274, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5593, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 136/4999 [32:30:27<1165:04:20, 862.48s/it]

Checkpoint 136 saved !
------- 1st valloss=0.1412

tensor(0.0924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1264, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2034, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5529, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1235, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6332, device='cuda:0', grad_

  3%|▎         | 137/4999 [32:44:14<1150:26:50, 851.83s/it]

Epoch 137 finished ! Training Loss: 0.3511

tensor(0.1690, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1174, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1480, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7075, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0913, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6328, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5380, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5728, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5519, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1920, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 138/4999 [32:59:13<1169:25:40, 866.06s/it]

Checkpoint 138 saved !
------- 1st valloss=0.1536

tensor(0.1799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6079, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0961, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1295, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1549, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1930, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0976, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5350, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1963, device='cuda:0', grad_fn=<

  3%|▎         | 139/4999 [33:13:08<1156:32:56, 856.70s/it]

Epoch 139 finished ! Training Loss: 0.3525

tensor(0.5796, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1175, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5320, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1620, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1726, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1597, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1253, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 140/4999 [33:27:57<1169:14:02, 866.28s/it]

Checkpoint 140 saved !
------- 1st valloss=0.1566

tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5565, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0954, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1644, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2149, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5521, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5908, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1439, device='cuda:0', grad_fn=<

  3%|▎         | 141/4999 [33:41:46<1154:08:03, 855.27s/it]

Epoch 141 finished ! Training Loss: 0.3547

tensor(0.1469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2248, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5648, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5703, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1284, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3750, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1091, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5884, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1704, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 142/4999 [33:56:43<1170:42:39, 867.73s/it]

Checkpoint 142 saved !
------- 1st valloss=0.1396

tensor(0.5680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2094, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2036, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5678, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5485, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9879, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1576, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0945, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1262, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5946, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1776, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5689, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_

  3%|▎         | 143/4999 [34:10:39<1157:45:25, 858.30s/it]

Epoch 143 finished ! Training Loss: 0.3349

tensor(0.1865, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0713, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9889, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5339, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6080, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1660, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6159, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0980, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0897, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5552, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 144/4999 [34:25:39<1173:59:42, 870.52s/it]

Checkpoint 144 saved !
------- 1st valloss=0.1385

tensor(0.1740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1314, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2008, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5814, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2753, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1834, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_

  3%|▎         | 145/4999 [34:39:30<1158:06:07, 858.91s/it]

Epoch 145 finished ! Training Loss: 0.3418

tensor(0.5613, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5684, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1253, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0794, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5714, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5533, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6846, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1768, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1075, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 146/4999 [34:54:17<1169:17:03, 867.39s/it]

Checkpoint 146 saved !
------- 1st valloss=0.1326

tensor(0.1190, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2075, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1768, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0913, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9780, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1250, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5777, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1723, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1288, device='cuda:0', grad_fn=<Rsub

  3%|▎         | 147/4999 [35:08:10<1154:51:40, 856.86s/it]

Epoch 147 finished ! Training Loss: 0.3498

tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1716, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5719, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1096, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1393, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1280, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0897, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1079, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6074, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1593, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5554, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 148/4999 [35:23:06<1170:37:05, 868.73s/it]

Checkpoint 148 saved !
------- 1st valloss=0.1450

tensor(0.0709, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5747, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5183, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1347, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5535, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2217, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5688, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5840, device='cuda:0', grad_fn=<

  3%|▎         | 149/4999 [35:36:59<1155:54:32, 857.99s/it]

Epoch 149 finished ! Training Loss: 0.3441

tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1059, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1175, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5641, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0957, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 150/4999 [35:51:46<1167:17:58, 866.63s/it]

Checkpoint 150 saved !
------- 1st valloss=0.1413

tensor(0.5408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1366, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1025, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0828, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5742, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1646, device='cuda:0', grad_

  3%|▎         | 151/4999 [36:05:43<1155:06:06, 857.75s/it]

Epoch 151 finished ! Training Loss: 0.3439

tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0862, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4789, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1654, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1590, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0857, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1542, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5583, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 152/4999 [36:20:29<1166:17:37, 866.24s/it]

Checkpoint 152 saved !
------- 1st valloss=0.1566

tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1690, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5617, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2454, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5078, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5646, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5742, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5574, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1146, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3296, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0943, device='cuda:0', grad_

  3%|▎         | 153/4999 [36:34:27<1154:32:07, 857.68s/it]

Epoch 153 finished ! Training Loss: 0.3470

tensor(0.1548, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2059, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5616, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5623, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6190, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6014, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5253, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1206, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2334, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5737, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 154/4999 [36:49:23<1169:48:04, 869.20s/it]

Checkpoint 154 saved !
------- 1st valloss=0.1801

tensor(0.5235, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7877, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0759, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9764, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5679, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1158, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1671, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1097, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5288, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1123, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5041, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7114, device='cuda:0', grad_

  3%|▎         | 155/4999 [37:03:17<1155:15:13, 858.57s/it]

Epoch 155 finished ! Training Loss: 0.3439

tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1018, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5630, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4715, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5277, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5403, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6787, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 156/4999 [37:18:00<1165:02:31, 866.02s/it]

Checkpoint 156 saved !
------- 1st valloss=0.1489

tensor(0.1958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2272, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6227, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9758, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0961, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5330, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5833, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1195, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1212, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1944, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5410, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0581, device='cuda:0', grad_fn=<

  3%|▎         | 157/4999 [37:32:07<1157:12:03, 860.37s/it]

Epoch 157 finished ! Training Loss: 0.3423

tensor(0.0956, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2469, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5647, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5856, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1256, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6013, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1371, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1783, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5670, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 158/4999 [37:46:56<1168:18:31, 868.81s/it]

Checkpoint 158 saved !
------- 1st valloss=0.1413

tensor(0.1515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1289, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1156, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0873, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1892, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5386, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1227, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0933, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1482, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1082, device='cuda:0', grad_

  3%|▎         | 159/4999 [38:00:35<1148:10:22, 854.01s/it]

Epoch 159 finished ! Training Loss: 0.3504

tensor(0.5515, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5547, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5428, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9366, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1487, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0979, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1281, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1103, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0983, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1174, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5624, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 160/4999 [38:15:20<1160:11:31, 863.13s/it]

Checkpoint 160 saved !
------- 1st valloss=0.1505

tensor(0.1754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6258, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5808, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5852, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4662, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5548, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0830, device='cuda:0', grad_

  3%|▎         | 161/4999 [38:29:03<1143:50:46, 851.15s/it]

Epoch 161 finished ! Training Loss: 0.3440

tensor(0.4522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0917, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5805, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1171, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0812, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1139, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5737, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1491, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 162/4999 [38:43:45<1156:14:39, 860.55s/it]

Checkpoint 162 saved !
------- 1st valloss=0.1394

tensor(0.5569, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1227, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2220, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1116, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2164, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6669, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5697, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1188, device='cuda:0', grad_fn=<

  3%|▎         | 163/4999 [38:57:27<1140:13:26, 848.80s/it]

Epoch 163 finished ! Training Loss: 0.3435

tensor(0.0820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0968, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5428, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1605, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5543, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1780, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5416, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5723, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5359, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 164/4999 [39:12:22<1158:36:02, 862.66s/it]

Checkpoint 164 saved !
------- 1st valloss=0.1338

tensor(0.1745, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0937, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9689, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1335, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1112, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6501, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1037, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5875, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1722, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1069, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1178, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0774, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<

  3%|▎         | 165/4999 [39:26:12<1145:20:43, 852.97s/it]

Epoch 165 finished ! Training Loss: 0.3543

tensor(0.1042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6072, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5398, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5796, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1040, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1334, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1105, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6123, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 166/4999 [39:41:04<1160:58:56, 864.79s/it]

Checkpoint 166 saved !
------- 1st valloss=0.1565

tensor(0.2759, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0797, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1353, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5360, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6641, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0833, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6863, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5721, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0750, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0927, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<

  3%|▎         | 167/4999 [39:55:06<1151:18:21, 857.76s/it]

Epoch 167 finished ! Training Loss: 0.3426

tensor(0.1067, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4863, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0751, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1109, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1852, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1310, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5678, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5510, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1197, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 168/4999 [40:10:00<1165:40:11, 868.64s/it]

Checkpoint 168 saved !
------- 1st valloss=0.1320

tensor(0.0760, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1337, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0961, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8835, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1430, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1315, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1001, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2722, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1384, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0861, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2343, device='cuda:0', grad_

  3%|▎         | 169/4999 [40:23:50<1149:55:37, 857.09s/it]

Epoch 169 finished ! Training Loss: 0.3476

tensor(0.1634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1289, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5405, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1128, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0793, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1428, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1589, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1512, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4888, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 170/4999 [40:38:44<1164:40:47, 868.26s/it]

Checkpoint 170 saved !
------- 1st valloss=0.1643

tensor(0.0829, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2945, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5728, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1600, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0881, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1088, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1054, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5818, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5370, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3204, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0826, device='cuda:0', grad_

  3%|▎         | 171/4999 [40:52:32<1148:18:12, 856.23s/it]

Epoch 171 finished ! Training Loss: 0.3386

tensor(0.1077, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1590, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1057, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1043, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1334, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1287, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5475, device='cuda:0', grad_fn=<Rsu

  3%|▎         | 172/4999 [41:07:29<1164:07:43, 868.21s/it]

Checkpoint 172 saved !
------- 1st valloss=0.1387

tensor(0.1143, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5567, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2299, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2049, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1100, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1442, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9540, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2040, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5542, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3107, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0670, device='cuda:0', grad_

  3%|▎         | 173/4999 [41:21:16<1147:21:34, 855.88s/it]

Epoch 173 finished ! Training Loss: 0.3455

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5393, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2178, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6741, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8898, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1029, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1986, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1667, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1692, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1104, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1420, device='cuda:0', grad_fn=<RsubBac

  3%|▎         | 174/4999 [41:36:10<1162:29:26, 867.35s/it]

Checkpoint 174 saved !
------- 1st valloss=0.1377

tensor(0.1544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5556, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1217, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5910, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5432, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1543, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5516, device='cuda:0', grad_

  4%|▎         | 175/4999 [41:50:08<1150:34:29, 858.64s/it]

Epoch 175 finished ! Training Loss: 0.3533

tensor(0.1812, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1252, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0888, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1195, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6113, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0706, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5533, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5891, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1768, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 176/4999 [42:05:10<1167:42:02, 871.60s/it]

Checkpoint 176 saved !
------- 1st valloss=0.1369

tensor(0.1577, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0917, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1084, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1108, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1244, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6156, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9946, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5689, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1875, device='cuda:0', grad_

  4%|▎         | 177/4999 [42:19:06<1153:20:01, 861.05s/it]

Epoch 177 finished ! Training Loss: 0.3485

tensor(0.1558, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1552, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4350, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1058, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2385, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0836, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1224, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 178/4999 [42:33:48<1161:20:22, 867.21s/it]

Checkpoint 178 saved !
------- 1st valloss=0.1472

tensor(0.6188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1407, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1017, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5555, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1242, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1378, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5230, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5576, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1505, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1220, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1170, device='cuda:0', grad_

  4%|▎         | 179/4999 [42:47:42<1147:41:23, 857.20s/it]

Epoch 179 finished ! Training Loss: 0.3545

tensor(0.9824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0638, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6032, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2113, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1347, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5411, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1607, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1853, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1345, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 180/4999 [43:02:27<1158:48:07, 865.67s/it]

Checkpoint 180 saved !
------- 1st valloss=0.1322

tensor(0.1493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5794, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1180, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5204, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5352, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5743, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3141, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1130, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5212, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0935, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5450, device='cuda:0', grad_

  4%|▎         | 181/4999 [43:16:31<1149:51:59, 859.18s/it]

Epoch 181 finished ! Training Loss: 0.3366

tensor(0.1422, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5434, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1723, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5413, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5478, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1343, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1226, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1137, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1687, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5878, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 182/4999 [43:31:36<1167:55:47, 872.86s/it]

Checkpoint 182 saved !
------- 1st valloss=0.1370

tensor(0.2420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7178, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1347, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3925, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5354, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6493, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5366, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1611, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9366, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5685, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1318, device='cuda:0', grad_

  4%|▎         | 183/4999 [43:45:33<1153:21:04, 862.14s/it]

Epoch 183 finished ! Training Loss: 0.3619

tensor(0.1197, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5638, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5359, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1109, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9934, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5526, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 184/4999 [44:00:29<1166:37:56, 872.25s/it]

Checkpoint 184 saved !
------- 1st valloss=0.1352

tensor(0.1209, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1171, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5328, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5370, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1299, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1927, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5294, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5358, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1225, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_

  4%|▎         | 185/4999 [44:14:27<1152:38:54, 861.97s/it]

Epoch 185 finished ! Training Loss: 0.3474

tensor(0.9744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2126, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1857, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2110, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4924, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1320, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1065, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1390, device='cuda:0', grad_fn=<Rsu

  4%|▎         | 186/4999 [44:29:12<1161:50:57, 869.03s/it]

Checkpoint 186 saved !
------- 1st valloss=0.1346

tensor(0.5423, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5581, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1268, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0890, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5516, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5655, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5390, device='cuda:0', grad_

  4%|▎         | 187/4999 [44:42:57<1143:52:00, 855.76s/it]

Epoch 187 finished ! Training Loss: 0.3335

tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5601, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1051, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1683, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2274, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2650, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1275, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0959, device='cuda:0', grad_fn=<RsubBac

  4%|▍         | 188/4999 [44:57:55<1160:30:29, 868.39s/it]

Checkpoint 188 saved !
------- 1st valloss=0.1400

tensor(0.5368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1778, device='cuda:0', grad_fn=<RsubBackward1>)


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.0885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0889, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1952, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5572, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5317, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5712, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0845, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1023, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1517, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1342, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5468, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6123, device='cuda:0', gra

  4%|▍         | 210/4999 [50:13:41<1156:19:54, 869.24s/it]

Checkpoint 210 saved !
------- 1st valloss=0.1598

tensor(0.4857, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1876, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5316, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6075, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0905, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1379, device='cuda:0', grad_

  4%|▍         | 211/4999 [50:27:34<1141:21:17, 858.16s/it]

Epoch 211 finished ! Training Loss: 0.3309

tensor(0.1377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5658, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6361, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5389, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1597, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4581, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2063, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0998, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0816, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1318, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5593, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1009, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 212/4999 [50:42:38<1159:27:43, 871.96s/it]

Checkpoint 212 saved !
------- 1st valloss=0.1333

tensor(0.1673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5758, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1257, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0935, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5309, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1627, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5370, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1073, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0833, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3040, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6363, device='cuda:0', grad_

  4%|▍         | 213/4999 [50:56:19<1139:02:51, 856.78s/it]

Epoch 213 finished ! Training Loss: 0.3348

tensor(0.1082, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0698, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1404, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5355, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2727, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5448, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5526, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5405, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9965, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0765, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5746, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 214/4999 [51:11:01<1148:50:10, 864.33s/it]

Checkpoint 214 saved !
------- 1st valloss=0.1589

tensor(0.0844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5401, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1246, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9948, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1466, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5836, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1167, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5846, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5392, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1235, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1515, device='cuda:0', grad_

  4%|▍         | 215/4999 [51:25:02<1139:05:03, 857.17s/it]

Epoch 215 finished ! Training Loss: 0.3595

tensor(0.1748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1084, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1082, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1088, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0877, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5876, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1785, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5857, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5274, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9609, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5524, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 216/4999 [51:39:50<1151:21:36, 866.59s/it]

Checkpoint 216 saved !
------- 1st valloss=0.1709

tensor(0.5682, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1047, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1525, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5556, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5216, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1888, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1533, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5390, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5300, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5253, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5661, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1349, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1057, device='cuda:0', grad_

  4%|▍         | 217/4999 [51:53:40<1136:15:44, 855.40s/it]

Epoch 217 finished ! Training Loss: 0.3264

tensor(0.1283, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1367, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1392, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1023, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1382, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1576, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5475, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0919, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 218/4999 [52:08:23<1147:18:55, 863.91s/it]

Checkpoint 218 saved !
------- 1st valloss=0.1336

tensor(0.9333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0811, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2146, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0959, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1057, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1237, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1636, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1120, device='cuda:0', grad_

  4%|▍         | 219/4999 [52:22:13<1133:20:33, 853.56s/it]

Epoch 219 finished ! Training Loss: 0.3314

tensor(0.5610, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1202, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5212, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5400, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5161, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2018, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1226, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0801, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5445, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1015, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 220/4999 [52:36:55<1144:32:04, 862.17s/it]

Checkpoint 220 saved !
------- 1st valloss=0.1445

tensor(0.5789, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5265, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1204, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9125, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5644, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5955, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5754, device='cuda:0', grad_

  4%|▍         | 221/4999 [52:50:53<1134:34:48, 854.85s/it]

Epoch 221 finished ! Training Loss: 0.3659

tensor(0.1388, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0983, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9805, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1292, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1426, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9900, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5424, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5746, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1614, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1677, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0900, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6125, device='cuda:0', grad_fn=<Rsu

  4%|▍         | 222/4999 [53:05:42<1147:57:41, 865.12s/it]

Checkpoint 222 saved !
------- 1st valloss=0.1416

tensor(0.1404, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5291, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5250, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0955, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1621, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1631, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5467, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5351, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5423, device='cuda:0', grad_

  4%|▍         | 223/4999 [53:19:34<1134:32:02, 855.18s/it]

Epoch 223 finished ! Training Loss: 0.3414

tensor(0.1262, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2109, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5333, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5304, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1057, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2556, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5477, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1359, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1963, device='cuda:0', grad_fn=<RsubBac

  4%|▍         | 224/4999 [53:34:23<1147:38:24, 865.24s/it]

Checkpoint 224 saved !
------- 1st valloss=0.1353

tensor(0.1085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6036, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1508, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1205, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5365, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1247, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1626, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0869, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2027, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5551, device='cuda:0', grad_

  5%|▍         | 225/4999 [53:48:11<1132:49:25, 854.25s/it]

Epoch 225 finished ! Training Loss: 0.3407

tensor(0.5825, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1106, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1052, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5311, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5704, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5738, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5471, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1141, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1006, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1408, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1018, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0989, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 226/4999 [54:02:56<1144:54:02, 863.53s/it]

Checkpoint 226 saved !
------- 1st valloss=0.1334

tensor(0.5194, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1145, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1668, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5222, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5180, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0703, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2841, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2231, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1606, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4706, device='cuda:0', grad_

  5%|▍         | 227/4999 [54:16:54<1134:12:57, 855.65s/it]

Epoch 227 finished ! Training Loss: 0.3350

tensor(0.0958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1382, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6192, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0844, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0932, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1551, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5061, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6064, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5490, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9982, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 228/4999 [54:31:51<1150:44:34, 868.30s/it]

Checkpoint 228 saved !
------- 1st valloss=0.1602

tensor(0.0792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1016, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5454, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0662, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1302, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4972, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6276, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1005, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5818, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1437, device='cuda:0', grad_

  5%|▍         | 229/4999 [54:45:33<1132:01:49, 854.36s/it]

Epoch 229 finished ! Training Loss: 0.3423

tensor(0.1220, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1213, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0725, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1150, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1462, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1240, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4847, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1374, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5613, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5394, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1159, device='cuda:0', grad_fn=<RsubBac

  5%|▍         | 230/4999 [55:00:22<1145:22:57, 864.62s/it]

Checkpoint 230 saved !
------- 1st valloss=0.1711

tensor(0.0957, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1142, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4434, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4816, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1047, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1602, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1565, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0940, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5736, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1603, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1206, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1241, device='cuda:0', grad_

  5%|▍         | 231/4999 [55:14:09<1130:22:27, 853.47s/it]

Epoch 231 finished ! Training Loss: 0.3505

tensor(0.5732, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1601, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5989, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5512, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1108, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5876, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5421, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1280, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5276, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5956, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 232/4999 [55:29:04<1146:39:06, 865.94s/it]

Checkpoint 232 saved !
------- 1st valloss=0.1483

tensor(0.0733, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2484, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1093, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1114, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4648, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5563, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5511, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5730, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1825, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5607, device='cuda:0', grad_

  5%|▍         | 233/4999 [55:42:47<1129:12:57, 852.95s/it]

Epoch 233 finished ! Training Loss: 0.3476

tensor(0.6067, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1088, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1035, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1892, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2041, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1786, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9377, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1655, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1443, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5622, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 234/4999 [55:57:50<1148:57:43, 868.05s/it]

Checkpoint 234 saved !
------- 1st valloss=0.1698

tensor(0.0895, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5230, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0897, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1020, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5543, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1214, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0929, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2279, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5082, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1185, device='cuda:0', grad_

  5%|▍         | 235/4999 [56:11:42<1134:30:26, 857.31s/it]

Epoch 235 finished ! Training Loss: 0.3280

tensor(0.1002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1005, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1075, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0853, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1699, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1085, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5529, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1660, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4929, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5498, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7215, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1356, device='cuda:0', grad_fn=<RsubBac

  5%|▍         | 236/4999 [56:26:26<1144:40:48, 865.18s/it]

Checkpoint 236 saved !
------- 1st valloss=0.1320

tensor(0.5823, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0710, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2663, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5322, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1030, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5046, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5530, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1142, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0964, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4945, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6218, device='cuda:0', grad_

  5%|▍         | 237/4999 [56:40:25<1133:55:08, 857.23s/it]

Epoch 237 finished ! Training Loss: 0.3473

tensor(0.5569, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5387, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2066, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9960, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1291, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5297, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5100, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1249, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5868, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5853, device='cuda:0', grad_fn=<RsubBac

  5%|▍         | 238/4999 [56:55:21<1149:00:55, 868.82s/it]

Checkpoint 238 saved !
------- 1st valloss=0.1333

tensor(0.1319, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5570, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1073, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4959, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5386, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1272, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0948, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0987, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1763, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1019, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5418, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5179, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2343, device='cuda:0', grad_

  5%|▍         | 239/4999 [57:09:11<1133:33:42, 857.32s/it]

Epoch 239 finished ! Training Loss: 0.3236

tensor(0.1883, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5545, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5365, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1241, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1214, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5765, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5522, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5436, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2911, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5186, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1231, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 240/4999 [57:24:00<1145:43:42, 866.70s/it]

Checkpoint 240 saved !
------- 1st valloss=0.1418

tensor(0.0993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5604, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1140, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5435, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4982, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1047, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5358, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1099, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2734, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1592, device='cuda:0', grad_

  5%|▍         | 241/4999 [57:37:55<1132:59:18, 857.24s/it]

Epoch 241 finished ! Training Loss: 0.3289

tensor(0.1437, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1003, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1455, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9252, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2042, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5878, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4988, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0912, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0965, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1004, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1163, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1071, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 242/4999 [57:52:50<1147:51:15, 868.67s/it]

Checkpoint 242 saved !
------- 1st valloss=0.1533

tensor(0.2151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5286, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4744, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1049, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5682, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5480, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1157, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1257, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0779, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5459, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4735, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6065, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5357, device='cuda:0', grad_

  5%|▍         | 243/4999 [58:06:38<1131:36:05, 856.55s/it]

Epoch 243 finished ! Training Loss: 0.3313

tensor(0.1165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1821, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6265, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0841, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0717, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1417, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1239, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5504, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3578, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0739, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0831, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4652, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1560, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6119, device='cuda:0', grad_fn=<Rsu

  5%|▍         | 244/4999 [58:21:30<1145:19:53, 867.13s/it]

Checkpoint 244 saved !
------- 1st valloss=0.1405

tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6057, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0686, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5451, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5245, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0922, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5396, device='cuda:0', grad_

  5%|▍         | 245/4999 [58:35:19<1129:53:44, 855.62s/it]

Epoch 245 finished ! Training Loss: 0.3262

tensor(0.4649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0790, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0781, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1041, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5287, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5031, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1571, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1580, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5446, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1617, device='cuda:0', grad_fn=<RsubBac

  5%|▍         | 246/4999 [58:50:02<1140:33:18, 863.88s/it]

Checkpoint 246 saved !
------- 1st valloss=0.1414

tensor(0.1573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0935, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1494, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0951, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5126, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5026, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2291, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1747, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1524, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1110, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6409, device='cuda:0', grad_

  5%|▍         | 247/4999 [59:03:39<1121:49:14, 849.86s/it]

Epoch 247 finished ! Training Loss: 0.3293

tensor(0.5665, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5158, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1205, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5189, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1079, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5729, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4716, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0984, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1175, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5645, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5672, device='cuda:0', grad_fn=<RsubBac

  5%|▍         | 248/4999 [59:18:29<1137:26:11, 861.88s/it]

Checkpoint 248 saved !
------- 1st valloss=0.1350

tensor(0.1664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0910, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5587, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1259, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5454, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4874, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5224, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5402, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0950, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0968, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1061, device='cuda:0', grad_

  5%|▍         | 249/4999 [59:32:19<1124:40:35, 852.39s/it]

Epoch 249 finished ! Training Loss: 0.3447

tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9532, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0970, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1586, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4044, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0767, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1456, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0707, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1243, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5892, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9997, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0778, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0738, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 250/4999 [59:47:11<1139:44:56, 863.99s/it]

Checkpoint 250 saved !
------- 1st valloss=0.1411

tensor(0.5509, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2803, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1301, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1395, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6372, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6153, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4993, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1769, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1298, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1143, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0944, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5221, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2042, device='cuda:0', grad_

  5%|▌         | 251/4999 [60:01:01<1126:26:22, 854.08s/it]

Epoch 251 finished ! Training Loss: 0.3203

tensor(0.9326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1114, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1384, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1086, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5106, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1143, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5699, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2199, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5192, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1274, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 252/4999 [60:15:42<1136:49:23, 862.14s/it]

Checkpoint 252 saved !
------- 1st valloss=0.1499

tensor(0.1523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1237, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1269, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5353, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5285, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1216, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1600, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9846, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1882, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5429, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1818, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0720, device='cuda:0', grad_

  5%|▌         | 253/4999 [60:29:34<1124:40:27, 853.10s/it]

Epoch 253 finished ! Training Loss: 0.3396

tensor(0.1121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1292, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1373, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5045, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1332, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1257, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5385, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9921, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5113, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1202, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4774, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5055, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 254/4999 [60:44:19<1136:50:25, 862.51s/it]

Checkpoint 254 saved !
------- 1st valloss=0.1432

tensor(0.4937, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1251, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5465, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4878, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3321, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5134, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1172, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1439, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0914, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5302, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5007, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0865, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1221, device='cuda:0', grad_fn=<

  5%|▌         | 255/4999 [60:58:00<1120:19:06, 850.16s/it]

Epoch 255 finished ! Training Loss: 0.3297

tensor(0.1326, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2700, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4890, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1195, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5488, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1693, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1087, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1419, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5358, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1426, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1073, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0975, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5670, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 256/4999 [61:12:55<1137:52:44, 863.67s/it]

Checkpoint 256 saved !
------- 1st valloss=0.1440

tensor(0.5764, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4876, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1121, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5307, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5247, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5660, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1340, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5486, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1507, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6076, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4932, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1474, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1203, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1206, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5672, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4881, device='cuda:0', grad_

  5%|▌         | 257/4999 [61:26:46<1124:25:41, 853.64s/it]

Epoch 257 finished ! Training Loss: 0.3454

tensor(0.5398, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1461, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1280, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6015, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1123, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1068, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0830, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1427, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4619, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5259, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5381, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5740, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1222, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5372, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 258/4999 [61:41:44<1141:44:37, 866.96s/it]

Checkpoint 258 saved !
------- 1st valloss=0.1810

tensor(0.1610, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1090, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5691, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1473, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1307, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4873, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4957, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1115, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6850, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1414, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9714, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6170, device='cuda:0', grad_

  5%|▌         | 259/4999 [61:55:33<1126:37:15, 855.66s/it]

Epoch 259 finished ! Training Loss: 0.3395

tensor(0.6039, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5552, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1673, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1129, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5284, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1100, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5278, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4977, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5099, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5138, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1227, device='cuda:0', grad_fn=<RsubBac

  5%|▌         | 260/4999 [62:10:29<1142:11:08, 867.67s/it]

Checkpoint 260 saved !
------- 1st valloss=0.1786

tensor(0.5407, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1630, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0725, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5018, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1181, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1839, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5151, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4803, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5145, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5031, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1752, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1656, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1008, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5703, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1302, device='cuda:0', grad_

  5%|▌         | 261/4999 [62:24:18<1126:57:09, 856.27s/it]

Epoch 261 finished ! Training Loss: 0.3304

tensor(0.1293, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5305, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1919, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1681, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1148, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9629, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1199, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1736, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1166, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0741, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 262/4999 [62:39:18<1143:40:27, 869.16s/it]

Checkpoint 262 saved !
------- 1st valloss=0.1526

tensor(0.5634, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0885, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1271, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1124, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5331, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1271, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1534, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1933, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1799, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3345, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1514, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1712, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5685, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4662, device='cuda:0', grad_

  5%|▌         | 263/4999 [62:53:13<1130:11:40, 859.10s/it]

Epoch 263 finished ! Training Loss: 0.3176

tensor(0.5207, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0906, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5463, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6185, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9564, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3199, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1642, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0758, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5329, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5617, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5044, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1040, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0607, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 264/4999 [63:08:09<1144:33:52, 870.21s/it]

Checkpoint 264 saved !
------- 1st valloss=0.1788

tensor(0.2813, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1307, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5575, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1159, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5589, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5045, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1171, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5283, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5008, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0907, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1539, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1431, device='cuda:0', grad_

  5%|▌         | 265/4999 [63:21:59<1128:21:15, 858.06s/it]

Epoch 265 finished ! Training Loss: 0.3396

tensor(0.1886, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4923, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1550, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4973, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4788, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5396, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1379, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5572, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5241, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4838, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1253, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5225, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1557, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2324, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5081, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1141, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5030, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 266/4999 [63:36:37<1136:07:26, 864.16s/it]

Checkpoint 266 saved !
------- 1st valloss=0.1316

tensor(0.0978, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5135, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1789, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1303, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0860, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3182, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9858, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1308, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.8523, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5495, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1148, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5060, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1592, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5032, device='cuda:0', grad_

  5%|▌         | 267/4999 [63:50:24<1120:58:29, 852.81s/it]

Epoch 267 finished ! Training Loss: 0.3335

tensor(0.2229, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5399, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5431, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1407, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0969, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1118, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5208, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1294, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1196, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1238, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0833, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1356, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0967, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1582, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5257, device='cuda:0', grad_fn=<RsubBac

  5%|▌         | 268/4999 [64:05:29<1141:24:11, 868.54s/it]

Checkpoint 268 saved !
------- 1st valloss=0.1884

tensor(0.1470, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4815, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5577, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1024, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5270, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5893, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5618, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5264, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1518, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4762, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1532, device='cuda:0', grad_

  5%|▌         | 269/4999 [64:19:16<1124:56:49, 856.20s/it]

Epoch 269 finished ! Training Loss: 0.3388

tensor(0.2094, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5139, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4625, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1214, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1664, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5452, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5009, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5994, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1177, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0942, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5304, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1615, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1792, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6457, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1937, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 270/4999 [64:34:04<1137:13:43, 865.73s/it]

Checkpoint 270 saved !
------- 1st valloss=0.1421

tensor(0.5267, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5353, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5036, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5269, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5212, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0995, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1363, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5353, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1354, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1527, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1074, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1254, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5032, device='cuda:0', grad_

  5%|▌         | 271/4999 [64:47:50<1121:00:19, 853.56s/it]

Epoch 271 finished ! Training Loss: 0.3233

tensor(0.1674, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4753, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1633, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3536, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5078, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0789, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1028, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0939, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5843, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9846, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 272/4999 [65:02:41<1135:40:19, 864.91s/it]

Checkpoint 272 saved !
------- 1st valloss=0.1487

tensor(0.4754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1857, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5397, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1771, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0749, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9879, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2755, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0901, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0884, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1630, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1017, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1817, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0903, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1236, device='cuda:0', grad_fn=<Rsub

  5%|▌         | 273/4999 [65:16:26<1119:39:02, 852.89s/it]

Epoch 273 finished ! Training Loss: 0.3282

tensor(0.5795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4962, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1701, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2241, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2021, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1175, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9613, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4953, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1980, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1345, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1476, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1180, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1101, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1037, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5312, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0870, device='cuda:0', grad_fn=<Rsu

  5%|▌         | 274/4999 [65:31:13<1133:07:56, 863.34s/it]

Checkpoint 274 saved !
------- 1st valloss=0.1408

tensor(0.5637, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5447, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1383, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1259, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1867, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1154, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0880, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5561, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5236, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1916, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1183, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4705, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1412, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5095, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1820, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0680, device='cuda:0', grad_

  6%|▌         | 275/4999 [65:45:09<1121:52:01, 854.94s/it]

Epoch 275 finished ! Training Loss: 0.3354

tensor(0.9992, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1020, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0795, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1464, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9871, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5146, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1188, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1576, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9927, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1170, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1362, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1056, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1853, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5201, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5268, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1192, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9999, device='cuda:0', grad_fn=<Rsu

  6%|▌         | 276/4999 [66:00:10<1139:43:24, 868.73s/it]

Checkpoint 276 saved !
------- 1st valloss=0.1435

tensor(0.3760, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0958, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5376, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1438, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1748, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0759, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1127, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5442, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1034, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1649, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1158, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1742, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1248, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0756, device='cuda:0', grad_

  6%|▌         | 277/4999 [66:14:11<1128:40:09, 860.48s/it]

Epoch 277 finished ! Training Loss: 0.3289

tensor(0.1023, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0963, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4938, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0726, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5481, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5058, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2287, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1154, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4770, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5357, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1718, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0860, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1946, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5938, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1018, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6247, device='cuda:0', grad_fn=<Rsu

  6%|▌         | 278/4999 [66:28:56<1138:02:17, 867.81s/it]

Checkpoint 278 saved !
------- 1st valloss=0.1380

tensor(0.1125, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2134, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5325, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5491, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5010, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1479, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1084, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1186, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1017, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0894, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5269, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6586, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2364, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1058, device='cuda:0', grad_

  6%|▌         | 279/4999 [66:42:47<1123:21:12, 856.80s/it]

Epoch 279 finished ! Training Loss: 0.3278

tensor(0.4702, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1251, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5354, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9709, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4268, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9391, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1174, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1655, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5176, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0842, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5754, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5165, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5449, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3842, device='cuda:0', grad_fn=<Rsu

  6%|▌         | 280/4999 [66:57:44<1138:50:36, 868.79s/it]

Checkpoint 280 saved !
------- 1st valloss=0.1695

tensor(0.1117, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5253, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1450, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1694, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5443, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1483, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0731, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1062, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5685, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1124, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9996, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2323, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1247, device='cuda:0', grad_

  6%|▌         | 281/4999 [67:11:33<1122:53:20, 856.80s/it]

Epoch 281 finished ! Training Loss: 0.3278

tensor(0.2049, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.7111, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0806, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1219, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6263, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5395, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5037, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5580, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5504, device='cuda:0', grad_fn=<RsubBackward1>)


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



tensor(0.1337, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9730, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1544, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5595, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5654, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1173, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1178, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5262, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1781, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5899, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5496, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1414, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1585, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5011, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4966, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5444, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.3564, device='cuda:0',

  6%|▋         | 321/4999 [76:46:06<1113:33:40, 856.95s/it]

Epoch 321 finished ! Training Loss: 0.3372

tensor(0.1635, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1234, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1378, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4840, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5596, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1531, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2898, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6198, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1657, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1015, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4622, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1420, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1896, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0898, device='cuda:0', grad_fn=<Rsu

  6%|▋         | 322/4999 [77:00:54<1125:35:00, 866.39s/it]

Checkpoint 322 saved !
------- 1st valloss=0.1342

tensor(0.5562, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1223, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1106, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0721, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4680, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2092, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1655, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1002, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5306, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0859, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1152, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5079, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1423, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1608, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1631, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4915, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1363, device='cuda:0', grad_

  6%|▋         | 323/4999 [77:14:41<1110:06:12, 854.66s/it]

Epoch 323 finished ! Training Loss: 0.3318

tensor(0.0926, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1053, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1453, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1338, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1981, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0912, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4985, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5824, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5162, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5122, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0791, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0784, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0920, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9930, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0765, device='cuda:0', grad_fn=<RsubBac

  6%|▋         | 324/4999 [77:29:21<1119:25:08, 862.01s/it]

Checkpoint 324 saved !
------- 1st valloss=0.1731

tensor(0.1425, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5048, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1187, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1862, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1791, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9819, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1568, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5499, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4640, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1909, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0936, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4875, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0826, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4559, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5211, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6179, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1676, device='cuda:0', grad_

  7%|▋         | 325/4999 [77:43:24<1112:04:08, 856.54s/it]

Epoch 325 finished ! Training Loss: 0.3220

tensor(0.0676, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1006, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0708, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0870, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5102, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4854, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5158, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5955, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5030, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.6297, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1235, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4851, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4810, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2384, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1.0000, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1147, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1179, device='cuda:0', grad_fn=<Rsu

  7%|▋         | 326/4999 [77:58:09<1122:39:30, 864.88s/it]

Checkpoint 326 saved !
------- 1st valloss=0.1423

tensor(0.1133, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5573, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.0866, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1083, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5012, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1155, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.9472, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1415, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.4583, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1292, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.2191, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1752, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(1., device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.5368, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1772, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1255, device='cuda:0', grad_fn=<RsubBackward1>)
tensor(0.1122, device='cuda:0', grad_fn=<

In [None]:
print(image_1_resize.shape)

In [None]:
show_image_slice(image_1)
show_image_slice(label_1)
show_image_slice(bbox_bv_label)
show_image_slice(out_coarse)
show_image_slice(bbox_bv)
show_image_slice(bbox_image)

In [None]:
b = loadbvcenter(image_1.squeeze(0).cpu().detach().numpy())
x1 = int(b[0])
y1 = int(b[1])
z1 = int(b[2])
show_image_slice(bv_label[x1-64:x1+64, y1-64:y1+64, z1-64:z1+64])
c = find_bv_centroid(bv_label)
x2 = int(c[0])
y2 = int(c[1])
z2 = int(c[2])
show_image_slice(bv_label[x2-64:x2+64, y2-64:y2+64, z2-64:z2+64])

In [None]:
print(bv_coarse.shape)
print("bbox_bv_label", bbox_bv_label.shape)
print("bbox_bv", bbox_bv.shape)
print("bbox_image", bbox_image.shape)

In [None]:
deeplab.eval()

with torch.no_grad():
    
    bgloss = 0
    bdloss = 0
    bvloss = 0
    
    for v, vbatch in tqdm(enumerate(validation_loader)):
        # move data to device, convert dtype to desirable dtype
        image_1 = vbatch['image1_data'].to(device=device, dtype=dtype)
        label_1 = vbatch['image1_label'].to(device=device, dtype=dtype)

        output = deeplab(image_1)
        # do the inference
        output_numpy = output.cpu().numpy()
        
        
        #out_1 = torch.round(output)
        out_1 = torch.from_numpy((output_numpy == output_numpy.max(axis=1)[:, None]).astype(int)).to(device=device, dtype=dtype)
        loss_1 = dice_loss_3(out_1, label_1)

        bg, bd, bv = dice_loss_3_debug(out_1, label_1)
        # calculate loss
        print(bg.item(), bd.item(), bv.item(), loss_1.item())
        bgloss += bg.item()
        bdloss += bd.item()
        bvloss += bv.item()
        
        if bv.item() >= 0.2 or bd.item() >= 0.1:
            show_image_slice(image_1)
            show_image_slice(label_1)
            show_image_slice(output)

    outstr = '------- background loss = {0:.4f}, body loss = {1:.4f}, bv loss = {2:.4f}'\
        .format(bgloss/(v+1), bdloss/(v+1), bvloss/(v+1)) + '\n'
    print(outstr)