In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.nn import init
from torch.utils.data import *
import torchvision
from torchvision import transforms
import albumentations as A
from albumentations import GaussNoise, IAAAdditiveGaussianNoise, Compose, OneOf
from albumentations.pytorch import ToTensor
from efficientnet_pytorch import EfficientNet
import gc
import cv2
from tqdm import tqdm
import sklearn.metrics
import json
import functools
import itertools
import random
import sys
sys.path.append('/home/Data/FoodDetection/AI_OCR')
sys.path.append('/home/Data/FoodDetection/AI_OCR/Scatter')
sys.path.append('/home/Data/FoodDetection/AI_OCR/Whatiswrong')
import augs
import utils
import ko_dataset
from PIL import Image

In [2]:
import importlib
importlib.reload(utils)

<module 'utils' from '/home/Data/FoodDetection/AI_OCR/Whatiswrong/utils.py'>

In [3]:
MEAN = [0.5, 0.5, 0.5]
STD = [0.5, 0.5, 0.5]
IMG_HEIGHT = 56
IMG_WIDTH = 224
BATCH_SIZE = 128
EPOCH = 3
TQDM_DISABLE = False

device = torch.device('cuda')

In [4]:
ko_hand = ko_dataset.hand_dataset(dataset_mode = 'word', label_mode = 'syllable')

In [5]:
ko_gan = ko_dataset.korean_synthetic_gan()

  0%|          | 0/359997 [00:00<?, ?it/s]

359997 files will be loaded


100%|██████████| 359997/359997 [00:03<00:00, 116740.61it/s]


In [6]:
transformers = Compose([
                        OneOf([
                            augs.GridMask(num_grid=(10,20)),
                            augs.RandomAugMix(severity=3, width=3)], p =0.4),
                            ToTensor()
                       ])

hand_custom = utils.Dataset_streamer(ko_hand.dataset, resize_shape = (IMG_HEIGHT, IMG_WIDTH), transformer=transformers)
font_custom = utils.Dataset_streamer(ko_gan.dataset, resize_shape = (IMG_HEIGHT, IMG_WIDTH), transformer=ToTensor())

hand_loader = DataLoader(hand_custom, batch_size = BATCH_SIZE,  num_workers =15, shuffle=True, drop_last=True, pin_memory=True)
font_loader = DataLoader(font_custom, batch_size = BATCH_SIZE,  num_workers=15, shuffle=True,  drop_last=True, pin_memory=True )

In [7]:
# np.save('./train_images', train_images)

In [8]:
# train_images = np.load('train_images.npy', allow_pickle=True)

In [9]:
# font_images = np.load('font_images.npy')

In [10]:
# class GraphemeDataset(torch.utils.data.Dataset):
    
#     def __init__(self, data, images, transform=None, num_grapheme_root=168, num_vowel_diacritic=11, num_consonant_diacritic=8):
#         self.data = data
#         self.grapheme_root_list = np.array(data['grapheme_root'].tolist(), dtype=np.int64)
#         self.vowel_diacritic_list = np.array(data['vowel_diacritic'].tolist(), dtype=np.int64)
#         self.consonant_diacritic_list = np.array(data['consonant_diacritic'].tolist(), dtype=np.int64)
#         self.num_grapheme_root = num_grapheme_root
#         self.num_vowel_diacritic = num_vowel_diacritic
#         self.num_consonant_diacritic = num_consonant_diacritic
#         self.images = images
#         self.transform = transform
            
#     def __len__(self):
#         return len(self.data)
    
#     def __getitem__(self, idx):
#         grapheme_root = self.grapheme_root_list[idx]
#         vowel_diacritic = self.vowel_diacritic_list[idx]
#         consonant_diacritic = self.consonant_diacritic_list[idx]
#         label = (grapheme_root*self.num_vowel_diacritic+vowel_diacritic)*self.num_consonant_diacritic+consonant_diacritic
#         np_image = self.images[idx].copy()
#         out_image = self.transform(np_image)
#         return out_image, label
    
# class Albumentations:
#     def __init__(self, augmentations):
#         self.augmentations = A.Compose(augmentations)
    
#     def __call__(self, image):
#         image = self.augmentations(image=image)['image']
#         return image

In [11]:
# preprocess = [
#     A.CenterCrop(height=137, width=IMG_WIDTH),
#     A.Resize(height=IMG_HEIGHT, width=IMG_WIDTH, always_apply=True),
# ]

# augmentations = [
#     A.PadIfNeeded(min_height=256, min_width=256, border_mode=cv2.BORDER_CONSTANT, value=[255, 255, 255], always_apply=True),
#     A.imgaug.transforms.IAAAffine(shear=5, mode='constant', cval=255, always_apply=True),
#     A.ShiftScaleRotate(rotate_limit=5, border_mode=cv2.BORDER_CONSTANT, value=[255, 255, 255], mask_value=[255, 255, 255], always_apply=True),
#     A.RandomCrop(height=IMG_HEIGHT, width=IMG_WIDTH, always_apply=True),
# ]


# train_transform = transforms.Compose([
#     np.uint8,
#     transforms.Lambda(lambda x: np.array([x, x, x]).transpose((1, 2, 0)) ),
#     np.uint8,
#     Albumentations(preprocess + augmentations),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=MEAN, std=STD),
# #     transforms.ToPILImage(),
# ])
# valid_transform = transforms.Compose([
#     np.uint8,
#     transforms.Lambda(lambda x: np.array([x, x, x]).transpose((1, 2, 0)) ),
#     np.uint8,
#     Albumentations(preprocess),
#     transforms.ToTensor(),
#     transforms.Normalize(mean=MEAN, std=STD),
# #     transforms.ToPILImage(),
# ])

In [12]:
# hand_dataset = GraphemeDataset(train_data, train_images, valid_transform)
# font_dataset = GraphemeDataset(font_data, font_images, train_transform)

In [13]:
class ResnetGenerator(nn.Module):
    """Resnet-based generator that consists of Resnet blocks between a few downsampling/upsampling operations.
    We adapt Torch code and idea from Justin Johnson's neural style transfer project(https://github.com/jcjohnson/fast-neural-style)
    """

    def __init__(self, input_nc, output_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=6, padding_type='reflect'):
        """Construct a Resnet-based generator
        Parameters:
            input_nc (int)      -- the number of channels in input images
            output_nc (int)     -- the number of channels in output images
            ngf (int)           -- the number of filters in the last conv layer
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers
            n_blocks (int)      -- the number of ResNet blocks
            padding_type (str)  -- the name of padding layer in conv layers: reflect | replicate | zero
        """
        assert(n_blocks >= 0)
        super(ResnetGenerator, self).__init__()
        if type(norm_layer) == functools.partial:
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        model = [nn.ReflectionPad2d(3),
                 nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias),
                 norm_layer(ngf),
                 nn.ReLU(True)]

        n_downsampling = 2
        for i in range(n_downsampling):  # add downsampling layers
            mult = 2 ** i
            model += [nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias),
                      norm_layer(ngf * mult * 2),
                      nn.ReLU(True)]

        mult = 2 ** n_downsampling
        for i in range(n_blocks):       # add ResNet blocks

            model += [ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias)]

        for i in range(n_downsampling):  # add upsampling layers
            mult = 2 ** (n_downsampling - i)
            model += [nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2),
                                         kernel_size=3, stride=2,
                                         padding=1, output_padding=1,
                                         bias=use_bias),
                      norm_layer(int(ngf * mult / 2)),
                      nn.ReLU(True)]
        model += [nn.ReflectionPad2d(3)]
        model += [nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0)]
        model += [nn.Tanh()]

        self.model = nn.Sequential(*model)

    def forward(self, input):
        """Standard forward"""
        return self.model(input)


class ResnetBlock(nn.Module):
    """Define a Resnet block"""

    def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        """Initialize the Resnet block
        A resnet block is a conv block with skip connections
        We construct a conv block with build_conv_block function,
        and implement skip connections in <forward> function.
        Original Resnet paper: https://arxiv.org/pdf/1512.03385.pdf
        """
        super(ResnetBlock, self).__init__()
        self.conv_block = self.build_conv_block(dim, padding_type, norm_layer, use_dropout, use_bias)

    def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, use_bias):
        """Construct a convolutional block.
        Parameters:
            dim (int)           -- the number of channels in the conv layer.
            padding_type (str)  -- the name of padding layer: reflect | replicate | zero
            norm_layer          -- normalization layer
            use_dropout (bool)  -- if use dropout layers.
            use_bias (bool)     -- if the conv layer uses bias or not
        Returns a conv block (with a conv layer, a normalization layer, and a non-linearity layer (ReLU))
        """
        conv_block = []
        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)

        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim), nn.ReLU(True)]
        if use_dropout:
            conv_block += [nn.Dropout(0.5)]

        p = 0
        if padding_type == 'reflect':
            conv_block += [nn.ReflectionPad2d(1)]
        elif padding_type == 'replicate':
            conv_block += [nn.ReplicationPad2d(1)]
        elif padding_type == 'zero':
            p = 1
        else:
            raise NotImplementedError('padding [%s] is not implemented' % padding_type)
        conv_block += [nn.Conv2d(dim, dim, kernel_size=3, padding=p, bias=use_bias), norm_layer(dim)]

        return nn.Sequential(*conv_block)

    def forward(self, x):
        """Forward function (with skip connections)"""
        out = x + self.conv_block(x)  # add skip connections
        return out

In [14]:
class NLayerDiscriminator(nn.Module):
    """Defines a PatchGAN discriminator"""

    def __init__(self, input_nc, ndf=64, n_layers=3, norm_layer=nn.BatchNorm2d):
        """Construct a PatchGAN discriminator
        Parameters:
            input_nc (int)  -- the number of channels in input images
            ndf (int)       -- the number of filters in the last conv layer
            n_layers (int)  -- the number of conv layers in the discriminator
            norm_layer      -- normalization layer
        """
        super(NLayerDiscriminator, self).__init__()
        if type(norm_layer) == functools.partial:  # no need to use bias as BatchNorm2d has affine parameters
            use_bias = norm_layer.func == nn.InstanceNorm2d
        else:
            use_bias = norm_layer == nn.InstanceNorm2d

        kw = 4
        padw = 1
        sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
        nf_mult = 1
        nf_mult_prev = 1
        for n in range(1, n_layers):  # gradually increase the number of filters
            nf_mult_prev = nf_mult
            nf_mult = min(2 ** n, 8)
            sequence += [
                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
                norm_layer(ndf * nf_mult),
                nn.LeakyReLU(0.2, True)
            ]

        nf_mult_prev = nf_mult
        nf_mult = min(2 ** n_layers, 8)
        sequence += [
            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
            norm_layer(ndf * nf_mult),
            nn.LeakyReLU(0.2, True)
        ]

        sequence += [nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]  # output 1 channel prediction map
        self.model = nn.Sequential(*sequence)

    def forward(self, input):
        """Standard forward."""
        return self.model(input)

In [15]:
def init_weight(net, init_gain):
    def init_func(m):
        classname = m.__class__.__name__
        if hasattr(m, 'weight') and (classname.find('Conv') != -1 or classname.find('Linear') != -1):
            init.normal_(m.weight.data, 0.0, init_gain)
            if hasattr(m, 'bias'):
                init.constant_(m.bias.data, 0.0)
        elif classname.find('BatchNorm2d') != -1:
            init.normal_(m.weight.data, 1.0, init_gain)
            init.constant_(m.bias.data, 0.0)
    net.apply(init_func)

In [16]:
class ImagePool():
    """This class implements an image buffer that stores previously generated images.
    This buffer enables us to update discriminators using a history of generated images
    rather than the ones produced by the latest generators.
    """

    def __init__(self, pool_size):
        """Initialize the ImagePool class
        Parameters:
            pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created
        """
        self.pool_size = pool_size
        if self.pool_size > 0:  # create an empty pool
            self.num_imgs = 0
            self.images = []

    def query(self, images):
        """Return an image from the pool.
        Parameters:
            images: the latest generated images from the generator
        Returns images from the buffer.
        By 50/100, the buffer will return input images.
        By 50/100, the buffer will return images previously stored in the buffer,
        and insert the current images to the buffer.
        """
        if self.pool_size == 0:  # if the buffer size is 0, do nothing
            return images
        return_images = []
        for image in images:
            image = torch.unsqueeze(image.data, 0)
            if self.num_imgs < self.pool_size:   # if the buffer is not full; keep inserting current images to the buffer
                self.num_imgs = self.num_imgs + 1
                self.images.append(image)
                return_images.append(image)
            else:
                p = random.uniform(0, 1)
                if p > 0.5:  # by 50% chance, the buffer will return a previously stored image, and insert the current image into the buffer
                    random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
                    tmp = self.images[random_id].clone()
                    self.images[random_id] = image
                    return_images.append(tmp)
                else:       # by another 50% chance, the buffer will return the current image
                    return_images.append(image)
        return_images = torch.cat(return_images, 0)   # collect all the images and return
        return return_images 

In [17]:
class GANLoss(nn.Module):
    """Define different GAN objectives.
    The GANLoss class abstracts away the need to create the target label tensor
    that has the same size as the input.
    """

    def __init__(self, gan_mode, target_real_label=1.0, target_fake_label=0.0):
        """ Initialize the GANLoss class.
        Parameters:
            gan_mode (str) - - the type of GAN objective. It currently supports vanilla, lsgan, and wgangp.
            target_real_label (bool) - - label for a real image
            target_fake_label (bool) - - label of a fake image
        Note: Do not use sigmoid as the last layer of Discriminator.
        LSGAN needs no sigmoid. vanilla GANs will handle it with BCEWithLogitsLoss.
        """
        super(GANLoss, self).__init__()
        self.register_buffer('real_label', torch.tensor(target_real_label))
        self.register_buffer('fake_label', torch.tensor(target_fake_label))
        self.gan_mode = gan_mode
        if gan_mode == 'lsgan':
            self.loss = nn.MSELoss()
        elif gan_mode == 'vanilla':
            self.loss = nn.BCEWithLogitsLoss()
        elif gan_mode in ['wgangp']:
            self.loss = None
        else:
            raise NotImplementedError('gan mode %s not implemented' % gan_mode)

    def get_target_tensor(self, prediction, target_is_real):
        """Create label tensors with the same size as the input.
        Parameters:
            prediction (tensor) - - tpyically the prediction from a discriminator
            target_is_real (bool) - - if the ground truth label is for real images or fake images
        Returns:
            A label tensor filled with ground truth label, and with the size of the input
        """

        if target_is_real:
            target_tensor = self.real_label
        else:
            target_tensor = self.fake_label
        return target_tensor.expand_as(prediction)

    def __call__(self, prediction, target_is_real):
        """Calculate loss given Discriminator's output and grount truth labels.
        Parameters:
            prediction (tensor) - - tpyically the prediction output from a discriminator
            target_is_real (bool) - - if the ground truth label is for real images or fake images
        Returns:
            the calculated loss.
        """
        if self.gan_mode in ['lsgan', 'vanilla']:
            target_tensor = self.get_target_tensor(prediction, target_is_real)
            loss = self.loss(prediction, target_tensor)
        elif self.gan_mode == 'wgangp':
            if target_is_real:
                loss = -prediction.mean()
            else:
                loss = prediction.mean()
        return loss

In [18]:
# class BengalModel(nn.Module):
#     def __init__(self, backbone, hidden_size=2560, class_num=168*11*7):
#         super(BengalModel, self).__init__()
#         self.backbone = backbone
#         self._avg_pooling = nn.AdaptiveAvgPool2d(1)
#         self.fc = nn.Linear(hidden_size, class_num)
#         self.ln = nn.LayerNorm(hidden_size)

        
#     def forward(self, inputs):
#         bs = inputs.shape[0]
#         feature = self.backbone.extract_features(inputs)
#         feature_vector = self._avg_pooling(feature)
#         feature_vector = feature_vector.view(bs, -1)
#         feature_vector = self.ln(feature_vector)

#         out = self.fc(feature_vector)
#         return out   

In [19]:
norm_layer = functools.partial(nn.InstanceNorm2d, affine=False, track_running_stats=False)
generator_a = ResnetGenerator(input_nc=3, output_nc=3, ngf=64, norm_layer=norm_layer, use_dropout=False, n_blocks=9)
generator_b = ResnetGenerator(input_nc=3, output_nc=3, ngf=64, norm_layer=norm_layer, use_dropout=False, n_blocks=9)

discriminator_a = NLayerDiscriminator(input_nc=3, ndf=64, n_layers=3, norm_layer=norm_layer)
discriminator_b = NLayerDiscriminator(input_nc=3, ndf=64, n_layers=3, norm_layer=norm_layer)
# backbone = EfficientNet.from_name('efficientnet-b0')
# classifier = BengalModel(backbone, hidden_size=1280, class_num=168*11*8)
init_weight(generator_a, 0.02)
init_weight(generator_b, 0.02)
init_weight(discriminator_a, 0.02)
init_weight(discriminator_b, 0.02)

discriminator_loss = GANLoss('lsgan', target_real_label=1.0, target_fake_label=0.0)
# classifier_loss = nn.CrossEntropyLoss()

In [20]:
# classifier.load_state_dict(torch.load('./best.pth'))

In [21]:
class CycleGan(nn.Module):
    
    def __init__(self, 
                 generator_a, generator_b, discriminator_a, discriminator_b, discriminator_loss, lambda_a, lambda_b, device):
        super(CycleGan, self).__init__()
        self.generator_a = generator_a
        self.generator_b = generator_b
        self.discriminator_a = discriminator_a
        self.discriminator_b = discriminator_b
#         self.classifier = classifier.eval()
#         CycleGan.set_requires_grad(self.classifier, requires_grad=False)
        self.discriminator_loss = discriminator_loss
#         self.classifier_loss = classifier_loss
        self.reconstruct_loss = nn.L1Loss()
        self.device = device
        
        self.image_pool_a = ImagePool(50)
        self.image_pool_b = ImagePool(50)
        
        self.lambda_a = lambda_a
        self.lambda_b = lambda_b
#         self.lambda_cls = lambda_cls
        
        self.real_images_a = None
        self.real_images_b = None
        self.labels_a = None
        self.labels_b = None
        self.fake_images_a = None
        self.fake_images_b = None
        self.rec_images_a = None
        self.rec_images_b = None
        self.generator_a = torch.nn.DataParallel(self.generator_a)
        self.generator_b = torch.nn.DataParallel(self.generator_b)
        self.discriminator_a = torch.nn.DataParallel(self.discriminator_a)
        self.discriminator_b = torch.nn.DataParallel(self.discriminator_b)
        self.to(device)
        
    def forward(self):
        self.fake_images_a = self.generator_a(self.real_images_b)

        self.fake_images_b = self.generator_b(self.real_images_a)
        
        self.rec_images_a = self.generator_a(self.fake_images_b)
        
        self.rec_images_b = self.generator_b(self.fake_images_a)
        
#         print(f'fake image_a : {self.fake_images_a.shape}')
#         print(f'fake image_b : {self.fake_images_b.shape}')
#         print(f'fake rec_images_a : {self.rec_images_a.shape}')
#         print(f'fake rec_images_b : {self.rec_images_b.shape}')

    @staticmethod
    def set_requires_grad(nets, requires_grad=False):
        """Set requies_grad=Fasle for all the networks to avoid unnecessary computations
        Parameters:
            nets (network list)   -- a list of networks
            requires_grad (bool)  -- whether the networks require gradients or not
        """
        if not isinstance(nets, list):
            nets = [nets]
        for net in nets:
            if net is not None:
                for param in net.parameters():
                    param.requires_grad = requires_grad
                    
                    
    def generator_step(self):
        CycleGan.set_requires_grad([self.discriminator_a, self.discriminator_b], False)
        
        loss_a = self.discriminator_loss(self.discriminator_a(self.fake_images_a), True)
        loss_b = self.discriminator_loss(self.discriminator_b(self.fake_images_b), True)
        cycle_a = self.reconstruct_loss(self.rec_images_a, self.real_images_a)*self.lambda_a
        cycle_b = self.reconstruct_loss(self.rec_images_b, self.real_images_b)*self.lambda_b
#         cls_loss = self.classifier_loss(self.classifier(self.fake_images_b), self.labels_a)*self.lambda_cls
        
#         loss = loss_a + loss_b + cycle_a + cycle_b + cls_loss
        loss = loss_a + loss_b + cycle_a + cycle_b 
        print(loss)
        loss.backward()
        CycleGan.set_requires_grad([self.discriminator_a, self.discriminator_b], True)
        return loss, loss_a, loss_b, cycle_a, cycle_b
        
    def discriminator_step(self):
        pred_real_a = self.discriminator_a(self.real_images_a)
        loss_real_a = self.discriminator_loss(pred_real_a, True)
        fake_images_a = self.image_pool_a.query(self.fake_images_a).detach()
        pred_fake_a = self.discriminator_a(fake_images_a)
        loss_fake_a = self.discriminator_loss(pred_fake_a, False)
        
        pred_real_b = self.discriminator_b(self.real_images_b)
        loss_real_b = self.discriminator_loss(pred_real_b, True)
        fake_images_b = self.image_pool_b.query(self.fake_images_b).detach()
        pred_fake_b = self.discriminator_b(fake_images_b)
        loss_fake_b = self.discriminator_loss(pred_fake_b, False)
        
        loss = (loss_real_a + loss_fake_a)/2 + (loss_real_b + loss_fake_b)/2
        loss.backward()
        return loss, loss_real_a, loss_fake_a, (loss_real_a + loss_fake_a)/2, loss_real_b, loss_fake_b, (loss_real_b+loss_fake_b)/2
    
    def set_input(self, images_a, images_b, labels_a, labels_b):
        self.real_images_a = images_a.to(self.device)
        self.real_images_b = images_b.to(self.device)
        self.labels_a = labels_a
        self.labels_b = labels_b

In [22]:
model = CycleGan(generator_a=generator_a,
                generator_b=generator_b,
                discriminator_a=discriminator_a,
                discriminator_b=discriminator_b,
                discriminator_loss=discriminator_loss,
                lambda_a=10.0,
                lambda_b=10.0,
                device=device
                )

In [23]:
# hand_sampler = torch.utils.data.RandomSampler(hand_dataset, True, int(max(len(hand_dataset), len(font_dataset)))*(EPOCH))
# font_sampler = torch.utils.data.RandomSampler(font_dataset, True, int(max(len(hand_dataset), len(font_dataset)))*(EPOCH))

In [24]:
# hand_loader = torch.utils.data.DataLoader(
#     hand_dataset, 
#     batch_size=BATCH_SIZE, 
#     shuffle=False, 
#     num_workers=1, 
#     pin_memory=True, 
#     drop_last=True, 
#     sampler=hand_sampler)
# font_loader = torch.utils.data.DataLoader(
#     font_dataset, 
#     batch_size=BATCH_SIZE, 
#     shuffle=False, 
#     num_workers=1, 
#     pin_memory=True, 
#     drop_last=True, 
#     sampler=font_sampler)

hand_loader_iter = iter(hand_loader)
font_loader_iter = iter(font_loader)

In [25]:
def train_step(model, a_iter, b_iter, generator_optimizer, discriminator_optimizer, generator_scheduler, discriminator_scheduler, device):
    a_image, a_label = next(a_iter)
    b_image, b_label = next(b_iter)
    a_image = a_image.to(device)
    b_image = b_image.to(device)
#     a_label = a_label.to(device)
#     b_label = b_label.to(device)
    model.set_input(a_image, b_image, a_label, b_label)
    model.forward()
    generator_optimizer.zero_grad()
    generator_loss, generator_loss_a, generator_loss_b, cycle_a, cycle_b = model.generator_step()
    generator_optimizer.step()
    discriminator_optimizer.zero_grad()
    discriminator_loss, loss_real_a, loss_fake_a, discriminator_loss_a, loss_real_b, loss_fake_b, discriminator_loss_b = model.discriminator_step()
    discriminator_optimizer.step()
    generator_scheduler.step()
    discriminator_scheduler.step()
    return generator_loss, generator_loss_a, generator_loss_b, cycle_a, cycle_b, discriminator_loss, loss_real_a, loss_fake_a, discriminator_loss_a, loss_real_b, loss_fake_b, discriminator_loss_b

In [26]:
generator_optimizer = torch.optim.Adam(itertools.chain(generator_a.parameters(), generator_b.parameters()), lr=0.0002, betas=(0.5, 0.999))
discriminator_optimizer = torch.optim.Adam(itertools.chain(discriminator_a.parameters(), discriminator_b.parameters()), lr=0.0002, betas=(0.5, 0.999))

In [27]:
num_step_per_epoch = len(hand_loader)//EPOCH
train_steps = num_step_per_epoch*EPOCH
WARM_UP_STEP = train_steps*0.5

def warmup_linear_decay(step):
    if step < WARM_UP_STEP:
        return 1.0
    else:
        return (train_steps-step)/(train_steps-WARM_UP_STEP)
generator_scheduler = torch.optim.lr_scheduler.LambdaLR(generator_optimizer, warmup_linear_decay)
discriminator_scheduler = torch.optim.lr_scheduler.LambdaLR(discriminator_optimizer, warmup_linear_decay)

In [28]:
class LossAverager:
    def __init__(self, prefix):
        self.prefix = prefix
        self.generator_loss = []
        self.generator_loss_a = []
        self.generator_loss_b = []
        self.cycle_a = []
        self.cycle_b = []
        self.discriminator_loss = []
        self.loss_real_a = []
        self.loss_fake_a = []
        self.discriminator_loss_a = []
        self.loss_real_b = []
        self.loss_fake_b = []
        self.discriminator_loss_b = []
    
    def append(self, generator_loss, generator_loss_a, generator_loss_b, cycle_a, cycle_b, discriminator_loss, loss_real_a, loss_fake_a, discriminator_loss_a, loss_real_b, loss_fake_b, discriminator_loss_b):
        self.generator_loss.append(generator_loss.item())
        self.generator_loss_a.append(generator_loss_a.item())
        self.generator_loss_b.append(generator_loss_b.item())
        self.cycle_a.append(cycle_a.item())
        self.cycle_b.append(cycle_b.item())
        self.discriminator_loss.append(discriminator_loss.item())
        self.loss_real_a.append(loss_real_a.item())
        self.loss_fake_a.append(loss_fake_a.item())
        self.discriminator_loss_a.append(discriminator_loss_a.item())
        self.loss_real_b.append(loss_real_b.item())
        self.loss_fake_b.append(loss_fake_b.item())
        self.discriminator_loss_b.append(discriminator_loss_b.item())

    def average(self):
        metric = {}
        for key, value in self.__dict__.items():
            if isinstance(value, list):
                metric[self.prefix+'/'+key] = sum(value)/len(value)
        return metric

In [None]:
log = []

for epoch in range(EPOCH):
    model.train()
    loss_averager = LossAverager('train')
    for i in tqdm(range(num_step_per_epoch)):
        losses = train_step(model, hand_loader_iter, font_loader_iter, generator_optimizer, discriminator_optimizer, generator_scheduler, discriminator_scheduler, device)
        loss_averager.append(*losses)
    metric = loss_averager.average()
    metric['epoch'] = epoch
    model.eval()
    log.append(metric)
    torch.save(generator_b.state_dict(), 'text_generator.pth')
    with open('log.json', 'w') as fout:
        json.dump(log , fout, indent=4)

 38%|███▊      | 716/1874 [07:13<11:26,  1.69it/s]

tensor(2.7742, device='cuda:0', grad_fn=<AddBackward0>)


 38%|███▊      | 717/1874 [07:14<11:26,  1.69it/s]

tensor(2.0837, device='cuda:0', grad_fn=<AddBackward0>)


 38%|███▊      | 718/1874 [07:14<11:36,  1.66it/s]

tensor(2.9592, device='cuda:0', grad_fn=<AddBackward0>)


 38%|███▊      | 719/1874 [07:15<11:42,  1.64it/s]

tensor(1.8886, device='cuda:0', grad_fn=<AddBackward0>)


 38%|███▊      | 720/1874 [07:16<11:48,  1.63it/s]

tensor(2.5835, device='cuda:0', grad_fn=<AddBackward0>)


 38%|███▊      | 721/1874 [07:16<11:51,  1.62it/s]

tensor(1.8390, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▊      | 722/1874 [07:17<11:46,  1.63it/s]

tensor(2.8878, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▊      | 723/1874 [07:18<11:48,  1.62it/s]

tensor(2.2007, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▊      | 724/1874 [07:18<11:54,  1.61it/s]

tensor(2.5072, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▊      | 725/1874 [07:19<11:55,  1.61it/s]

tensor(2.9314, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▊      | 726/1874 [07:19<11:54,  1.61it/s]

tensor(3.2784, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 727/1874 [07:20<11:54,  1.61it/s]

tensor(2.3349, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 728/1874 [07:21<11:50,  1.61it/s]

tensor(2.0723, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 729/1874 [07:21<11:39,  1.64it/s]

tensor(2.4365, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 730/1874 [07:22<11:41,  1.63it/s]

tensor(1.9351, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 731/1874 [07:22<11:45,  1.62it/s]

tensor(2.3382, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 732/1874 [07:23<11:46,  1.62it/s]

tensor(2.1428, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 733/1874 [07:24<11:45,  1.62it/s]

tensor(2.6581, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 734/1874 [07:24<11:44,  1.62it/s]

tensor(1.8535, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 735/1874 [07:25<11:49,  1.60it/s]

tensor(2.8442, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 736/1874 [07:26<11:52,  1.60it/s]

tensor(1.8606, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 737/1874 [07:26<11:52,  1.60it/s]

tensor(2.8176, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 738/1874 [07:27<11:37,  1.63it/s]

tensor(2.4867, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 739/1874 [07:27<11:38,  1.62it/s]

tensor(1.8977, device='cuda:0', grad_fn=<AddBackward0>)


 39%|███▉      | 740/1874 [07:28<11:44,  1.61it/s]

tensor(3.0186, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 741/1874 [07:29<11:33,  1.63it/s]

tensor(2.4925, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 742/1874 [07:29<11:24,  1.65it/s]

tensor(2.6440, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 743/1874 [07:30<11:27,  1.64it/s]

tensor(2.2148, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 744/1874 [07:30<11:30,  1.64it/s]

tensor(1.7859, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 745/1874 [07:31<11:33,  1.63it/s]

tensor(3.1317, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 746/1874 [07:32<11:36,  1.62it/s]

tensor(2.3238, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 747/1874 [07:32<11:42,  1.61it/s]

tensor(2.0347, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 748/1874 [07:33<11:41,  1.61it/s]

tensor(2.1231, device='cuda:0', grad_fn=<AddBackward0>)


 40%|███▉      | 749/1874 [07:34<11:29,  1.63it/s]

tensor(1.8799, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 750/1874 [07:34<11:27,  1.63it/s]

tensor(2.5154, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 751/1874 [07:35<11:35,  1.62it/s]

tensor(2.0213, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 752/1874 [07:35<11:36,  1.61it/s]

tensor(2.3114, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 753/1874 [07:36<11:37,  1.61it/s]

tensor(2.6535, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 754/1874 [07:37<11:37,  1.61it/s]

tensor(2.1341, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 755/1874 [07:37<11:37,  1.60it/s]

tensor(2.4196, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 756/1874 [07:38<11:36,  1.60it/s]

tensor(1.8130, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 757/1874 [07:39<11:36,  1.60it/s]

tensor(2.3621, device='cuda:0', grad_fn=<AddBackward0>)


 40%|████      | 758/1874 [07:39<11:37,  1.60it/s]

tensor(1.9273, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 759/1874 [07:40<11:33,  1.61it/s]

tensor(2.7183, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 760/1874 [07:40<11:23,  1.63it/s]

tensor(2.0417, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 761/1874 [07:41<11:24,  1.63it/s]

tensor(2.5811, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 762/1874 [07:42<11:28,  1.62it/s]

tensor(1.5845, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 763/1874 [07:42<11:16,  1.64it/s]

tensor(3.1544, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 764/1874 [07:43<11:17,  1.64it/s]

tensor(1.9037, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 765/1874 [07:43<11:21,  1.63it/s]

tensor(2.1354, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 766/1874 [07:44<11:20,  1.63it/s]

tensor(1.6647, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 767/1874 [07:45<11:22,  1.62it/s]

tensor(2.1276, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 768/1874 [07:45<11:19,  1.63it/s]

tensor(1.7031, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 769/1874 [07:46<11:20,  1.62it/s]

tensor(2.0386, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 770/1874 [07:47<11:20,  1.62it/s]

tensor(2.6588, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 771/1874 [07:47<11:15,  1.63it/s]

tensor(1.8861, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 772/1874 [07:48<11:06,  1.65it/s]

tensor(2.4674, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████      | 773/1874 [07:48<11:01,  1.66it/s]

tensor(2.5350, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████▏     | 774/1874 [07:49<10:55,  1.68it/s]

tensor(2.4503, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████▏     | 775/1874 [07:50<10:54,  1.68it/s]

tensor(2.3480, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████▏     | 776/1874 [07:50<10:49,  1.69it/s]

tensor(2.1697, device='cuda:0', grad_fn=<AddBackward0>)


 41%|████▏     | 777/1874 [07:51<10:52,  1.68it/s]

tensor(1.8781, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 778/1874 [07:51<10:49,  1.69it/s]

tensor(2.9959, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 779/1874 [07:52<10:52,  1.68it/s]

tensor(1.6801, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 780/1874 [07:53<10:52,  1.68it/s]

tensor(2.5653, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 781/1874 [07:53<10:51,  1.68it/s]

tensor(2.2526, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 782/1874 [07:54<10:51,  1.68it/s]

tensor(2.8996, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 783/1874 [07:54<10:50,  1.68it/s]

tensor(1.3784, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 784/1874 [07:55<10:49,  1.68it/s]

tensor(3.4702, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 785/1874 [07:55<10:47,  1.68it/s]

tensor(2.2881, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 786/1874 [07:56<10:43,  1.69it/s]

tensor(2.8284, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 787/1874 [07:57<10:40,  1.70it/s]

tensor(2.3592, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 788/1874 [07:57<10:41,  1.69it/s]

tensor(2.3495, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 789/1874 [07:58<10:38,  1.70it/s]

tensor(2.3310, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 790/1874 [07:58<10:39,  1.69it/s]

tensor(2.1431, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 791/1874 [07:59<10:40,  1.69it/s]

tensor(2.0089, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 792/1874 [08:00<10:41,  1.69it/s]

tensor(2.0685, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 793/1874 [08:00<10:37,  1.69it/s]

tensor(2.2991, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 794/1874 [08:01<10:38,  1.69it/s]

tensor(2.1801, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 795/1874 [08:01<10:48,  1.66it/s]

tensor(2.4176, device='cuda:0', grad_fn=<AddBackward0>)


 42%|████▏     | 796/1874 [08:02<10:57,  1.64it/s]

tensor(1.6474, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 797/1874 [08:03<11:05,  1.62it/s]

tensor(3.1723, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 798/1874 [08:03<11:04,  1.62it/s]

tensor(1.8415, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 799/1874 [08:04<11:03,  1.62it/s]

tensor(1.7817, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 800/1874 [08:05<10:54,  1.64it/s]

tensor(2.4738, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 801/1874 [08:05<11:00,  1.63it/s]

tensor(1.5309, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 802/1874 [08:06<10:57,  1.63it/s]

tensor(1.6354, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 803/1874 [08:06<10:59,  1.62it/s]

tensor(1.9837, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 804/1874 [08:07<10:49,  1.65it/s]

tensor(1.6691, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 805/1874 [08:08<10:54,  1.63it/s]

tensor(1.7416, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 806/1874 [08:08<10:44,  1.66it/s]

tensor(1.5761, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 807/1874 [08:09<10:40,  1.67it/s]

tensor(1.8081, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 808/1874 [08:09<10:44,  1.65it/s]

tensor(1.8693, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 809/1874 [08:10<10:47,  1.65it/s]

tensor(1.6080, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 810/1874 [08:11<10:55,  1.62it/s]

tensor(1.6909, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 811/1874 [08:11<10:47,  1.64it/s]

tensor(1.5109, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 812/1874 [08:12<10:45,  1.65it/s]

tensor(2.3111, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 813/1874 [08:12<10:56,  1.62it/s]

tensor(1.2720, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 814/1874 [08:13<10:48,  1.64it/s]

tensor(2.9200, device='cuda:0', grad_fn=<AddBackward0>)


 43%|████▎     | 815/1874 [08:14<10:45,  1.64it/s]

tensor(1.4166, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▎     | 816/1874 [08:14<10:39,  1.65it/s]

tensor(2.2128, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▎     | 817/1874 [08:15<10:48,  1.63it/s]

tensor(1.5969, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▎     | 818/1874 [08:15<10:48,  1.63it/s]

tensor(2.2238, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▎     | 819/1874 [08:16<10:41,  1.65it/s]

tensor(1.4564, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 820/1874 [08:17<10:36,  1.66it/s]

tensor(1.6305, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 821/1874 [08:17<10:32,  1.67it/s]

tensor(1.9041, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 822/1874 [08:18<10:29,  1.67it/s]

tensor(1.4391, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 823/1874 [08:18<10:33,  1.66it/s]

tensor(2.0464, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 824/1874 [08:19<10:37,  1.65it/s]

tensor(1.8484, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 825/1874 [08:20<10:32,  1.66it/s]

tensor(1.4343, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 826/1874 [08:20<10:33,  1.65it/s]

tensor(2.4749, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 827/1874 [08:21<10:40,  1.63it/s]

tensor(1.7740, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 828/1874 [08:22<10:44,  1.62it/s]

tensor(1.9182, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 829/1874 [08:22<10:33,  1.65it/s]

tensor(2.0604, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 830/1874 [08:23<10:28,  1.66it/s]

tensor(2.0306, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 831/1874 [08:23<10:32,  1.65it/s]

tensor(1.7701, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 832/1874 [08:24<10:38,  1.63it/s]

tensor(2.7497, device='cuda:0', grad_fn=<AddBackward0>)


 44%|████▍     | 833/1874 [08:25<10:36,  1.64it/s]

tensor(1.8398, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 834/1874 [08:25<10:27,  1.66it/s]

tensor(2.9187, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 835/1874 [08:26<10:24,  1.66it/s]

tensor(1.7467, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 836/1874 [08:26<10:20,  1.67it/s]

tensor(3.9172, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 837/1874 [08:27<10:25,  1.66it/s]

tensor(1.9696, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 838/1874 [08:28<10:25,  1.66it/s]

tensor(2.1153, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 839/1874 [08:28<10:27,  1.65it/s]

tensor(1.8985, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 840/1874 [08:29<10:31,  1.64it/s]

tensor(2.2634, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 841/1874 [08:29<10:32,  1.63it/s]

tensor(2.2909, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 842/1874 [08:30<10:24,  1.65it/s]

tensor(2.2668, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▍     | 843/1874 [08:31<10:19,  1.66it/s]

tensor(2.1418, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 844/1874 [08:31<10:25,  1.65it/s]

tensor(1.9813, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 845/1874 [08:32<10:33,  1.63it/s]

tensor(3.0719, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 846/1874 [08:32<10:32,  1.62it/s]

tensor(2.5032, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 847/1874 [08:33<10:34,  1.62it/s]

tensor(2.4665, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 848/1874 [08:34<10:33,  1.62it/s]

tensor(2.5940, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 849/1874 [08:34<10:28,  1.63it/s]

tensor(2.0196, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 850/1874 [08:35<10:32,  1.62it/s]

tensor(2.3178, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 851/1874 [08:36<10:24,  1.64it/s]

tensor(1.8043, device='cuda:0', grad_fn=<AddBackward0>)


 45%|████▌     | 852/1874 [08:36<10:26,  1.63it/s]

tensor(2.9139, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 853/1874 [08:37<10:31,  1.62it/s]

tensor(1.7599, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 854/1874 [08:37<10:32,  1.61it/s]

tensor(2.3142, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 855/1874 [08:38<10:35,  1.60it/s]

tensor(2.5255, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 856/1874 [08:39<10:35,  1.60it/s]

tensor(1.4264, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 857/1874 [08:39<10:34,  1.60it/s]

tensor(2.9598, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 858/1874 [08:40<10:34,  1.60it/s]

tensor(1.8898, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 859/1874 [08:41<10:33,  1.60it/s]

tensor(1.7696, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 860/1874 [08:41<10:24,  1.62it/s]

tensor(2.6442, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 861/1874 [08:42<10:16,  1.64it/s]

tensor(1.9171, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 862/1874 [08:42<10:18,  1.64it/s]

tensor(1.8076, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 863/1874 [08:43<10:11,  1.65it/s]

tensor(2.6601, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 864/1874 [08:44<10:15,  1.64it/s]

tensor(1.5791, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 865/1874 [08:44<10:21,  1.62it/s]

tensor(2.3123, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▌     | 866/1874 [08:45<10:23,  1.62it/s]

tensor(2.5668, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▋     | 867/1874 [08:45<10:25,  1.61it/s]

tensor(1.8206, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▋     | 868/1874 [08:46<10:15,  1.64it/s]

tensor(2.5275, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▋     | 869/1874 [08:47<10:18,  1.62it/s]

tensor(1.7259, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▋     | 870/1874 [08:47<10:11,  1.64it/s]

tensor(2.0637, device='cuda:0', grad_fn=<AddBackward0>)


 46%|████▋     | 871/1874 [08:48<10:06,  1.65it/s]

tensor(2.4191, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 872/1874 [08:48<10:11,  1.64it/s]

tensor(2.2141, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 873/1874 [08:49<10:09,  1.64it/s]

tensor(1.7683, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 874/1874 [08:50<10:04,  1.65it/s]

tensor(1.8463, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 875/1874 [08:50<10:00,  1.66it/s]

tensor(2.3766, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 876/1874 [08:51<09:57,  1.67it/s]

tensor(2.4301, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 877/1874 [08:51<09:52,  1.68it/s]

tensor(1.8822, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 878/1874 [08:52<09:51,  1.68it/s]

tensor(2.4343, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 879/1874 [08:53<09:48,  1.69it/s]

tensor(2.7555, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 880/1874 [08:53<09:46,  1.70it/s]

tensor(2.5003, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 881/1874 [08:54<09:47,  1.69it/s]

tensor(2.3547, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 882/1874 [08:54<09:47,  1.69it/s]

tensor(1.7801, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 883/1874 [08:55<09:46,  1.69it/s]

tensor(2.7373, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 884/1874 [08:56<09:41,  1.70it/s]

tensor(1.8868, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 885/1874 [08:56<09:41,  1.70it/s]

tensor(2.3747, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 886/1874 [08:57<09:42,  1.70it/s]

tensor(1.6512, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 887/1874 [08:57<09:43,  1.69it/s]

tensor(3.8063, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 888/1874 [08:58<09:41,  1.70it/s]

tensor(1.8641, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 889/1874 [08:59<09:38,  1.70it/s]

tensor(3.1542, device='cuda:0', grad_fn=<AddBackward0>)


 47%|████▋     | 890/1874 [08:59<09:36,  1.71it/s]

tensor(1.7973, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 891/1874 [09:00<09:48,  1.67it/s]

tensor(2.5571, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 892/1874 [09:00<09:45,  1.68it/s]

tensor(2.2920, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 893/1874 [09:01<09:44,  1.68it/s]

tensor(2.0344, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 894/1874 [09:01<09:40,  1.69it/s]

tensor(2.5547, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 895/1874 [09:02<09:40,  1.69it/s]

tensor(2.1237, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 896/1874 [09:03<09:40,  1.68it/s]

tensor(2.1154, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 897/1874 [09:03<09:51,  1.65it/s]

tensor(1.9257, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 898/1874 [09:04<09:47,  1.66it/s]

tensor(2.0390, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 899/1874 [09:05<09:44,  1.67it/s]

tensor(1.8692, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 900/1874 [09:05<09:42,  1.67it/s]

tensor(2.7002, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 901/1874 [09:06<09:40,  1.68it/s]

tensor(2.2967, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 902/1874 [09:06<09:38,  1.68it/s]

tensor(2.0006, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 903/1874 [09:07<09:35,  1.69it/s]

tensor(2.1027, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 904/1874 [09:07<09:34,  1.69it/s]

tensor(2.3077, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 905/1874 [09:08<09:34,  1.69it/s]

tensor(2.2953, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 906/1874 [09:09<09:34,  1.68it/s]

tensor(2.2596, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 907/1874 [09:09<09:34,  1.68it/s]

tensor(2.5015, device='cuda:0', grad_fn=<AddBackward0>)


 48%|████▊     | 908/1874 [09:10<09:33,  1.68it/s]

tensor(2.6196, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▊     | 909/1874 [09:10<09:39,  1.67it/s]

tensor(2.1549, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▊     | 910/1874 [09:11<09:39,  1.66it/s]

tensor(2.5309, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▊     | 911/1874 [09:12<09:36,  1.67it/s]

tensor(2.3749, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▊     | 912/1874 [09:12<09:42,  1.65it/s]

tensor(2.4050, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▊     | 913/1874 [09:13<09:43,  1.65it/s]

tensor(2.3648, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 914/1874 [09:14<09:46,  1.64it/s]

tensor(2.3358, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 915/1874 [09:14<09:41,  1.65it/s]

tensor(2.3529, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 916/1874 [09:15<09:48,  1.63it/s]

tensor(2.2983, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 917/1874 [09:15<09:45,  1.63it/s]

tensor(2.5455, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 918/1874 [09:16<09:50,  1.62it/s]

tensor(2.0666, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 919/1874 [09:17<09:50,  1.62it/s]

tensor(2.4739, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 920/1874 [09:17<09:52,  1.61it/s]

tensor(2.0714, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 921/1874 [09:18<09:50,  1.61it/s]

tensor(2.6567, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 922/1874 [09:18<09:48,  1.62it/s]

tensor(1.9806, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 923/1874 [09:19<09:41,  1.64it/s]

tensor(2.7439, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 924/1874 [09:20<09:33,  1.66it/s]

tensor(1.8871, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 925/1874 [09:20<09:30,  1.66it/s]

tensor(2.8094, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 926/1874 [09:21<09:36,  1.65it/s]

tensor(1.8093, device='cuda:0', grad_fn=<AddBackward0>)


 49%|████▉     | 927/1874 [09:21<09:40,  1.63it/s]

tensor(2.7070, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 928/1874 [09:22<09:35,  1.64it/s]

tensor(1.9713, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 929/1874 [09:23<09:39,  1.63it/s]

tensor(2.2613, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 930/1874 [09:23<09:41,  1.62it/s]

tensor(2.3031, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 931/1874 [09:24<09:34,  1.64it/s]

tensor(2.0270, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 932/1874 [09:25<09:36,  1.63it/s]

tensor(2.6277, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 933/1874 [09:25<09:39,  1.62it/s]

tensor(2.1677, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 934/1874 [09:26<09:41,  1.62it/s]

tensor(2.5628, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 935/1874 [09:26<09:46,  1.60it/s]

tensor(1.8964, device='cuda:0', grad_fn=<AddBackward0>)


 50%|████▉     | 936/1874 [09:27<09:47,  1.60it/s]

tensor(2.4989, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 937/1874 [09:28<09:38,  1.62it/s]

tensor(2.0586, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 938/1874 [09:28<09:31,  1.64it/s]

tensor(2.3600, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 939/1874 [09:29<09:25,  1.65it/s]

tensor(2.0367, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 940/1874 [09:29<09:22,  1.66it/s]

tensor(2.2451, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 941/1874 [09:30<09:24,  1.65it/s]

tensor(1.9412, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 942/1874 [09:31<09:30,  1.63it/s]

tensor(2.5174, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 943/1874 [09:31<09:32,  1.63it/s]

tensor(2.0931, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 944/1874 [09:32<09:32,  1.62it/s]

tensor(2.5436, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 945/1874 [09:33<09:30,  1.63it/s]

tensor(2.1587, device='cuda:0', grad_fn=<AddBackward0>)


 50%|█████     | 946/1874 [09:33<09:31,  1.62it/s]

tensor(2.6165, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 947/1874 [09:34<09:23,  1.65it/s]

tensor(2.3100, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 948/1874 [09:34<09:27,  1.63it/s]

tensor(2.4669, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 949/1874 [09:35<09:31,  1.62it/s]

tensor(2.4216, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 950/1874 [09:36<09:30,  1.62it/s]

tensor(2.6462, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 951/1874 [09:36<09:33,  1.61it/s]

tensor(2.2914, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 952/1874 [09:37<09:36,  1.60it/s]

tensor(2.4507, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 953/1874 [09:37<09:32,  1.61it/s]

tensor(2.2806, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 954/1874 [09:38<09:23,  1.63it/s]

tensor(2.2327, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 955/1874 [09:39<09:26,  1.62it/s]

tensor(2.2073, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 956/1874 [09:39<09:20,  1.64it/s]

tensor(2.2678, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 957/1874 [09:40<09:30,  1.61it/s]

tensor(2.4044, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 958/1874 [09:41<09:32,  1.60it/s]

tensor(2.2088, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 959/1874 [09:41<09:29,  1.61it/s]

tensor(2.2497, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████     | 960/1874 [09:42<09:17,  1.64it/s]

tensor(2.2096, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████▏    | 961/1874 [09:42<09:11,  1.66it/s]

tensor(2.3370, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████▏    | 962/1874 [09:43<09:13,  1.65it/s]

tensor(2.3055, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████▏    | 963/1874 [09:44<09:15,  1.64it/s]

tensor(2.3619, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████▏    | 964/1874 [09:44<09:09,  1.66it/s]

tensor(2.3903, device='cuda:0', grad_fn=<AddBackward0>)


 51%|█████▏    | 965/1874 [09:45<11:07,  1.36it/s]

tensor(2.3682, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 966/1874 [09:46<10:28,  1.44it/s]

tensor(1.8929, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 967/1874 [09:46<10:00,  1.51it/s]

tensor(2.6500, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 968/1874 [09:47<09:47,  1.54it/s]

tensor(1.8951, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 969/1874 [09:48<09:40,  1.56it/s]

tensor(3.0071, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 970/1874 [09:48<09:24,  1.60it/s]

tensor(1.8547, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 971/1874 [09:49<09:17,  1.62it/s]

tensor(2.7914, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 972/1874 [09:49<09:08,  1.64it/s]

tensor(2.1594, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 973/1874 [09:50<09:10,  1.64it/s]

tensor(2.9885, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 974/1874 [09:51<09:13,  1.63it/s]

tensor(1.9208, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 975/1874 [09:51<09:10,  1.63it/s]

tensor(1.5419, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 976/1874 [09:52<08:59,  1.66it/s]

tensor(2.3455, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 977/1874 [09:52<08:54,  1.68it/s]

tensor(1.6673, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 978/1874 [09:53<09:00,  1.66it/s]

tensor(1.8695, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 979/1874 [09:54<09:06,  1.64it/s]

tensor(2.0676, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 980/1874 [09:54<08:58,  1.66it/s]

tensor(1.9076, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 981/1874 [09:55<09:06,  1.63it/s]

tensor(2.0793, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 982/1874 [09:56<09:08,  1.63it/s]

tensor(2.5945, device='cuda:0', grad_fn=<AddBackward0>)


 52%|█████▏    | 983/1874 [09:56<09:05,  1.63it/s]

tensor(2.0663, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 984/1874 [09:57<09:05,  1.63it/s]

tensor(2.5287, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 985/1874 [09:57<08:58,  1.65it/s]

tensor(2.4967, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 986/1874 [09:58<08:49,  1.68it/s]

tensor(2.6980, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 987/1874 [09:58<08:40,  1.70it/s]

tensor(2.7589, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 988/1874 [09:59<08:36,  1.72it/s]

tensor(3.2687, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 989/1874 [10:00<08:31,  1.73it/s]

tensor(2.8942, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 990/1874 [10:00<08:29,  1.74it/s]

tensor(2.4662, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 991/1874 [10:01<08:27,  1.74it/s]

tensor(2.3983, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 992/1874 [10:01<08:27,  1.74it/s]

tensor(2.5728, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 993/1874 [10:02<08:26,  1.74it/s]

tensor(2.4324, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 994/1874 [10:02<08:26,  1.74it/s]

tensor(2.7240, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 995/1874 [10:03<08:27,  1.73it/s]

tensor(2.8413, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 996/1874 [10:04<08:26,  1.73it/s]

tensor(3.5372, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 997/1874 [10:04<08:24,  1.74it/s]

tensor(4.5224, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 998/1874 [10:05<08:20,  1.75it/s]

tensor(3.5209, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 999/1874 [10:05<08:21,  1.75it/s]

tensor(3.3050, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 1000/1874 [10:06<08:21,  1.74it/s]

tensor(3.2873, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 1001/1874 [10:06<08:20,  1.74it/s]

tensor(2.6024, device='cuda:0', grad_fn=<AddBackward0>)


 53%|█████▎    | 1002/1874 [10:07<08:19,  1.74it/s]

tensor(2.6932, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▎    | 1003/1874 [10:08<08:19,  1.74it/s]

tensor(3.1685, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▎    | 1004/1874 [10:08<08:18,  1.74it/s]

tensor(3.2594, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▎    | 1005/1874 [10:09<08:18,  1.74it/s]

tensor(3.3181, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▎    | 1006/1874 [10:09<08:32,  1.69it/s]

tensor(2.2338, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▎    | 1007/1874 [10:10<08:41,  1.66it/s]

tensor(3.2081, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1008/1874 [10:11<08:48,  1.64it/s]

tensor(2.4555, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1009/1874 [10:11<08:42,  1.66it/s]

tensor(2.5816, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1010/1874 [10:12<08:46,  1.64it/s]

tensor(2.6911, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1011/1874 [10:12<08:41,  1.65it/s]

tensor(1.9557, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1012/1874 [10:13<08:42,  1.65it/s]

tensor(3.5933, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1013/1874 [10:14<08:37,  1.66it/s]

tensor(2.3803, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1014/1874 [10:14<08:42,  1.65it/s]

tensor(2.8581, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1015/1874 [10:15<08:46,  1.63it/s]

tensor(3.7411, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1016/1874 [10:16<08:48,  1.62it/s]

tensor(3.8875, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1017/1874 [10:16<08:40,  1.65it/s]

tensor(3.3020, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1018/1874 [10:17<08:33,  1.67it/s]

tensor(3.1931, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1019/1874 [10:17<08:28,  1.68it/s]

tensor(2.2957, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1020/1874 [10:18<08:35,  1.66it/s]

tensor(2.8591, device='cuda:0', grad_fn=<AddBackward0>)


 54%|█████▍    | 1021/1874 [10:19<08:40,  1.64it/s]

tensor(2.2140, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1022/1874 [10:19<08:43,  1.63it/s]

tensor(2.7530, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1023/1874 [10:20<08:36,  1.65it/s]

tensor(2.3185, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1024/1874 [10:20<08:31,  1.66it/s]

tensor(3.0435, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1025/1874 [10:21<08:35,  1.65it/s]

tensor(2.4627, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1026/1874 [10:22<08:40,  1.63it/s]

tensor(2.1045, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1027/1874 [10:22<08:32,  1.65it/s]

tensor(2.9631, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1028/1874 [10:23<08:36,  1.64it/s]

tensor(3.1121, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1029/1874 [10:23<08:39,  1.63it/s]

tensor(2.5212, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▍    | 1030/1874 [10:24<08:38,  1.63it/s]

tensor(4.2020, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1031/1874 [10:25<08:40,  1.62it/s]

tensor(2.2400, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1032/1874 [10:25<08:39,  1.62it/s]

tensor(4.3185, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1033/1874 [10:26<08:42,  1.61it/s]

tensor(2.1852, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1034/1874 [10:27<08:43,  1.61it/s]

tensor(3.0070, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1035/1874 [10:27<08:42,  1.61it/s]

tensor(2.5111, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1036/1874 [10:28<08:44,  1.60it/s]

tensor(2.6047, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1037/1874 [10:28<08:43,  1.60it/s]

tensor(2.8343, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1038/1874 [10:29<08:45,  1.59it/s]

tensor(2.4002, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1039/1874 [10:30<08:41,  1.60it/s]

tensor(2.3847, device='cuda:0', grad_fn=<AddBackward0>)


 55%|█████▌    | 1040/1874 [10:30<08:30,  1.63it/s]

tensor(2.1071, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1041/1874 [10:31<08:32,  1.62it/s]

tensor(2.7992, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1042/1874 [10:31<08:34,  1.62it/s]

tensor(2.3466, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1043/1874 [10:32<08:37,  1.61it/s]

tensor(1.9809, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1044/1874 [10:33<08:27,  1.63it/s]

tensor(2.7513, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1045/1874 [10:33<08:20,  1.66it/s]

tensor(2.4321, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1046/1874 [10:34<08:23,  1.64it/s]

tensor(2.5122, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1047/1874 [10:35<08:26,  1.63it/s]

tensor(2.0117, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1048/1874 [10:35<08:27,  1.63it/s]

tensor(2.7490, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1049/1874 [10:36<08:31,  1.61it/s]

tensor(2.3609, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1050/1874 [10:36<08:22,  1.64it/s]

tensor(2.1026, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1051/1874 [10:37<08:23,  1.64it/s]

tensor(2.8963, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1052/1874 [10:38<08:15,  1.66it/s]

tensor(1.9433, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1053/1874 [10:38<08:17,  1.65it/s]

tensor(1.8732, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▌    | 1054/1874 [10:39<08:21,  1.63it/s]

tensor(1.9382, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▋    | 1055/1874 [10:39<08:25,  1.62it/s]

tensor(2.0688, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▋    | 1056/1874 [10:40<08:25,  1.62it/s]

tensor(2.4193, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▋    | 1057/1874 [10:41<08:27,  1.61it/s]

tensor(1.6945, device='cuda:0', grad_fn=<AddBackward0>)


 56%|█████▋    | 1058/1874 [10:41<08:28,  1.60it/s]

tensor(2.1426, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1059/1874 [10:42<08:30,  1.60it/s]

tensor(1.6770, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1060/1874 [10:43<08:30,  1.59it/s]

tensor(1.8863, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1061/1874 [10:43<08:19,  1.63it/s]

tensor(1.7214, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1062/1874 [10:44<08:15,  1.64it/s]

tensor(1.8820, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1063/1874 [10:44<08:14,  1.64it/s]

tensor(1.7835, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1064/1874 [10:45<08:19,  1.62it/s]

tensor(1.7688, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1065/1874 [10:46<08:19,  1.62it/s]

tensor(1.9713, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1066/1874 [10:46<08:19,  1.62it/s]

tensor(1.8133, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1067/1874 [10:47<08:12,  1.64it/s]

tensor(1.6177, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1068/1874 [10:47<08:08,  1.65it/s]

tensor(2.8622, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1069/1874 [10:48<08:04,  1.66it/s]

tensor(2.0587, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1070/1874 [10:49<07:59,  1.68it/s]

tensor(1.7771, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1071/1874 [10:49<08:09,  1.64it/s]

tensor(2.4762, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1072/1874 [10:50<07:49,  1.71it/s]

tensor(2.4433, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1073/1874 [10:50<08:02,  1.66it/s]

tensor(3.0457, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1074/1874 [10:51<07:59,  1.67it/s]

tensor(2.8063, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1075/1874 [10:52<08:02,  1.66it/s]

tensor(2.6221, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1076/1874 [10:52<08:07,  1.64it/s]

tensor(3.1257, device='cuda:0', grad_fn=<AddBackward0>)


 57%|█████▋    | 1077/1874 [10:53<08:00,  1.66it/s]

tensor(2.2057, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1078/1874 [10:53<08:02,  1.65it/s]

tensor(2.6519, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1079/1874 [10:54<08:04,  1.64it/s]

tensor(2.2739, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1080/1874 [10:55<08:06,  1.63it/s]

tensor(2.3779, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1081/1874 [10:55<08:08,  1.62it/s]

tensor(2.4546, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1082/1874 [10:56<08:00,  1.65it/s]

tensor(2.5123, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1083/1874 [10:57<08:01,  1.64it/s]

tensor(2.4088, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1084/1874 [10:57<07:57,  1.66it/s]

tensor(2.4100, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1085/1874 [10:58<07:58,  1.65it/s]

tensor(2.4802, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1086/1874 [10:58<08:02,  1.63it/s]

tensor(2.3500, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1087/1874 [10:59<08:04,  1.62it/s]

tensor(2.2917, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1088/1874 [11:00<08:06,  1.62it/s]

tensor(2.2817, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1089/1874 [11:00<08:06,  1.61it/s]

tensor(2.6023, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1090/1874 [11:01<08:06,  1.61it/s]

tensor(2.1124, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1091/1874 [11:01<08:07,  1.61it/s]

tensor(2.7641, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1092/1874 [11:02<08:07,  1.60it/s]

tensor(2.1688, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1093/1874 [11:03<08:01,  1.62it/s]

tensor(2.4308, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1094/1874 [11:03<07:54,  1.64it/s]

tensor(2.2733, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1095/1874 [11:04<07:48,  1.66it/s]

tensor(2.4480, device='cuda:0', grad_fn=<AddBackward0>)


 58%|█████▊    | 1096/1874 [11:04<07:43,  1.68it/s]

tensor(2.4102, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▊    | 1097/1874 [11:05<07:37,  1.70it/s]

tensor(2.4241, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▊    | 1098/1874 [11:06<07:36,  1.70it/s]

tensor(2.4433, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▊    | 1099/1874 [11:06<07:37,  1.70it/s]

tensor(2.3243, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▊    | 1100/1874 [11:07<07:37,  1.69it/s]

tensor(2.9913, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1101/1874 [11:07<07:39,  1.68it/s]

tensor(2.2587, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1102/1874 [11:08<07:39,  1.68it/s]

tensor(3.0079, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1103/1874 [11:09<07:38,  1.68it/s]

tensor(2.1957, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1104/1874 [11:09<07:37,  1.68it/s]

tensor(2.8631, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1105/1874 [11:10<07:35,  1.69it/s]

tensor(2.0408, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1106/1874 [11:10<07:37,  1.68it/s]

tensor(2.7116, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1107/1874 [11:11<07:37,  1.68it/s]

tensor(2.6780, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1108/1874 [11:12<07:36,  1.68it/s]

tensor(2.2513, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1109/1874 [11:12<07:35,  1.68it/s]

tensor(2.8851, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1110/1874 [11:13<07:32,  1.69it/s]

tensor(2.5283, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1111/1874 [11:13<07:32,  1.69it/s]

tensor(2.5229, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1112/1874 [11:14<07:32,  1.68it/s]

tensor(2.2488, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1113/1874 [11:15<07:31,  1.68it/s]

tensor(2.5526, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1114/1874 [11:15<07:31,  1.68it/s]

tensor(2.5900, device='cuda:0', grad_fn=<AddBackward0>)


 59%|█████▉    | 1115/1874 [11:16<07:30,  1.68it/s]

tensor(2.2634, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1116/1874 [11:16<07:30,  1.68it/s]

tensor(2.6687, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1117/1874 [11:17<07:29,  1.68it/s]

tensor(2.6759, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1118/1874 [11:17<07:26,  1.69it/s]

tensor(2.6604, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1119/1874 [11:18<07:26,  1.69it/s]

tensor(2.7897, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1120/1874 [11:19<07:26,  1.69it/s]

tensor(2.2342, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1121/1874 [11:19<07:24,  1.69it/s]

tensor(3.4345, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1122/1874 [11:20<07:24,  1.69it/s]

tensor(3.9225, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1123/1874 [11:20<07:28,  1.67it/s]

tensor(2.5669, device='cuda:0', grad_fn=<AddBackward0>)


 60%|█████▉    | 1124/1874 [11:21<07:24,  1.69it/s]

tensor(3.4736, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1125/1874 [11:22<07:22,  1.69it/s]

tensor(2.4490, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1126/1874 [11:22<07:20,  1.70it/s]

tensor(2.4380, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1127/1874 [11:23<07:21,  1.69it/s]

tensor(2.7763, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1128/1874 [11:23<07:20,  1.69it/s]

tensor(2.4909, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1129/1874 [11:24<07:21,  1.69it/s]

tensor(2.5975, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1130/1874 [11:25<07:24,  1.67it/s]

tensor(2.7982, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1131/1874 [11:25<07:21,  1.68it/s]

tensor(2.6211, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1132/1874 [11:26<07:22,  1.68it/s]

tensor(2.5156, device='cuda:0', grad_fn=<AddBackward0>)


 60%|██████    | 1133/1874 [11:26<07:20,  1.68it/s]

tensor(2.7800, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1134/1874 [11:27<07:21,  1.68it/s]

tensor(2.5087, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1135/1874 [11:28<07:18,  1.68it/s]

tensor(2.3098, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1136/1874 [11:28<07:17,  1.69it/s]

tensor(2.7652, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1137/1874 [11:29<07:15,  1.69it/s]

tensor(1.7221, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1138/1874 [11:29<07:19,  1.67it/s]

tensor(2.7808, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1139/1874 [11:30<07:18,  1.68it/s]

tensor(2.4618, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1140/1874 [11:31<07:25,  1.65it/s]

tensor(2.5443, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1141/1874 [11:31<07:21,  1.66it/s]

tensor(2.1632, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1142/1874 [11:32<07:20,  1.66it/s]

tensor(2.6315, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1143/1874 [11:32<07:16,  1.67it/s]

tensor(2.7137, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1144/1874 [11:33<07:14,  1.68it/s]

tensor(1.9553, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1145/1874 [11:34<07:11,  1.69it/s]

tensor(3.6054, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1146/1874 [11:34<07:11,  1.69it/s]

tensor(2.4801, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████    | 1147/1874 [11:35<07:10,  1.69it/s]

tensor(2.3223, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████▏   | 1148/1874 [11:35<07:10,  1.69it/s]

tensor(2.4531, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████▏   | 1149/1874 [11:36<07:08,  1.69it/s]

tensor(2.7608, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████▏   | 1150/1874 [11:37<07:08,  1.69it/s]

tensor(2.5141, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████▏   | 1151/1874 [11:37<07:09,  1.68it/s]

tensor(2.4339, device='cuda:0', grad_fn=<AddBackward0>)


 61%|██████▏   | 1152/1874 [11:38<07:07,  1.69it/s]

tensor(2.5626, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1153/1874 [11:38<07:05,  1.69it/s]

tensor(2.4467, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1154/1874 [11:39<07:05,  1.69it/s]

tensor(1.9133, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1155/1874 [11:39<07:05,  1.69it/s]

tensor(2.9169, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1156/1874 [11:40<07:03,  1.69it/s]

tensor(2.4980, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1157/1874 [11:41<07:05,  1.68it/s]

tensor(2.3307, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1158/1874 [11:41<07:01,  1.70it/s]

tensor(2.4135, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1159/1874 [11:42<06:59,  1.70it/s]

tensor(2.0984, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1160/1874 [11:42<06:58,  1.71it/s]

tensor(2.7959, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1161/1874 [11:43<06:58,  1.70it/s]

tensor(1.7147, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1162/1874 [11:44<06:58,  1.70it/s]

tensor(3.3932, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1163/1874 [11:44<06:59,  1.70it/s]

tensor(1.9355, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1164/1874 [11:45<07:00,  1.69it/s]

tensor(2.8579, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1165/1874 [11:45<07:00,  1.69it/s]

tensor(2.5745, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1166/1874 [11:46<07:00,  1.69it/s]

tensor(2.4516, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1167/1874 [11:47<06:58,  1.69it/s]

tensor(2.4517, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1168/1874 [11:47<07:00,  1.68it/s]

tensor(2.3032, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1169/1874 [11:48<06:59,  1.68it/s]

tensor(2.4789, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1170/1874 [11:48<06:58,  1.68it/s]

tensor(2.1059, device='cuda:0', grad_fn=<AddBackward0>)


 62%|██████▏   | 1171/1874 [11:49<06:57,  1.68it/s]

tensor(2.3155, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1172/1874 [11:50<06:54,  1.69it/s]

tensor(2.1809, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1173/1874 [11:50<06:51,  1.71it/s]

tensor(2.2010, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1174/1874 [11:51<06:51,  1.70it/s]

tensor(2.1713, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1175/1874 [11:51<06:50,  1.70it/s]

tensor(2.1303, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1176/1874 [11:52<06:50,  1.70it/s]

tensor(2.4458, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1177/1874 [11:52<06:49,  1.70it/s]

tensor(1.9452, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1178/1874 [11:53<06:50,  1.70it/s]

tensor(2.6341, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1179/1874 [11:54<06:50,  1.69it/s]

tensor(2.3639, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1180/1874 [11:54<06:48,  1.70it/s]

tensor(2.4277, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1181/1874 [11:55<06:49,  1.69it/s]

tensor(2.5648, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1182/1874 [11:55<06:46,  1.70it/s]

tensor(2.0483, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1183/1874 [11:56<06:45,  1.70it/s]

tensor(2.5070, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1184/1874 [11:57<06:44,  1.70it/s]

tensor(2.4111, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1185/1874 [11:57<06:53,  1.66it/s]

tensor(2.1083, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1186/1874 [11:58<06:55,  1.65it/s]

tensor(2.9154, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1187/1874 [11:58<06:57,  1.64it/s]

tensor(2.2579, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1188/1874 [11:59<06:58,  1.64it/s]

tensor(2.5345, device='cuda:0', grad_fn=<AddBackward0>)


 63%|██████▎   | 1189/1874 [12:00<06:57,  1.64it/s]

tensor(2.1093, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▎   | 1190/1874 [12:00<06:58,  1.63it/s]

tensor(2.7570, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▎   | 1191/1874 [12:01<06:58,  1.63it/s]

tensor(2.0601, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▎   | 1192/1874 [12:01<06:53,  1.65it/s]

tensor(2.7015, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▎   | 1193/1874 [12:02<06:50,  1.66it/s]

tensor(2.1407, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▎   | 1194/1874 [12:03<06:54,  1.64it/s]

tensor(2.4136, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1195/1874 [12:03<06:51,  1.65it/s]

tensor(2.2314, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1196/1874 [12:04<06:47,  1.66it/s]

tensor(2.5205, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1197/1874 [12:04<06:43,  1.68it/s]

tensor(2.1670, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1198/1874 [12:05<06:44,  1.67it/s]

tensor(2.4736, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1199/1874 [12:06<06:40,  1.68it/s]

tensor(2.0620, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1200/1874 [12:06<06:42,  1.67it/s]

tensor(2.7008, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1201/1874 [12:07<06:46,  1.65it/s]

tensor(2.0890, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1202/1874 [12:07<06:42,  1.67it/s]

tensor(2.7348, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1203/1874 [12:08<06:41,  1.67it/s]

tensor(1.9881, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1204/1874 [12:09<06:42,  1.66it/s]

tensor(2.4003, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1205/1874 [12:09<06:44,  1.65it/s]

tensor(2.1740, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1206/1874 [12:10<06:40,  1.67it/s]

tensor(2.7482, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1207/1874 [12:10<06:36,  1.68it/s]

tensor(1.9763, device='cuda:0', grad_fn=<AddBackward0>)


 64%|██████▍   | 1208/1874 [12:11<06:41,  1.66it/s]

tensor(2.6088, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1209/1874 [12:12<06:45,  1.64it/s]

tensor(2.2295, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1210/1874 [12:12<06:47,  1.63it/s]

tensor(2.4088, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1211/1874 [12:13<06:49,  1.62it/s]

tensor(2.2668, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1212/1874 [12:14<06:42,  1.64it/s]

tensor(2.3483, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1213/1874 [12:14<06:39,  1.66it/s]

tensor(2.3004, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1214/1874 [12:15<06:36,  1.66it/s]

tensor(2.3713, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1215/1874 [12:15<06:34,  1.67it/s]

tensor(2.2532, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1216/1874 [12:16<06:32,  1.67it/s]

tensor(2.3163, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1217/1874 [12:17<06:31,  1.68it/s]

tensor(2.4873, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▍   | 1218/1874 [12:17<06:35,  1.66it/s]

tensor(2.6856, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1219/1874 [12:18<06:34,  1.66it/s]

tensor(2.2980, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1220/1874 [12:18<06:32,  1.67it/s]

tensor(2.8966, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1221/1874 [12:19<06:32,  1.66it/s]

tensor(2.1934, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1222/1874 [12:20<06:36,  1.64it/s]

tensor(2.7049, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1223/1874 [12:21<08:05,  1.34it/s]

tensor(2.2179, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1224/1874 [12:21<07:41,  1.41it/s]

tensor(2.7499, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1225/1874 [12:22<07:25,  1.46it/s]

tensor(2.1738, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1226/1874 [12:23<07:12,  1.50it/s]

tensor(2.7478, device='cuda:0', grad_fn=<AddBackward0>)


 65%|██████▌   | 1227/1874 [12:23<06:58,  1.55it/s]

tensor(2.4792, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1228/1874 [12:24<06:55,  1.55it/s]

tensor(2.4762, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1229/1874 [12:24<06:51,  1.57it/s]

tensor(2.6651, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1230/1874 [12:25<06:48,  1.58it/s]

tensor(2.2200, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1231/1874 [12:26<06:40,  1.61it/s]

tensor(2.6031, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1232/1874 [12:26<06:33,  1.63it/s]

tensor(2.3125, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1233/1874 [12:27<06:27,  1.65it/s]

tensor(2.5417, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1234/1874 [12:27<06:32,  1.63it/s]

tensor(2.4657, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1235/1874 [12:28<06:34,  1.62it/s]

tensor(2.5056, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1236/1874 [12:29<06:29,  1.64it/s]

tensor(2.2104, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1237/1874 [12:29<06:25,  1.65it/s]

tensor(2.5209, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1238/1874 [12:30<06:22,  1.66it/s]

tensor(2.2266, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1239/1874 [12:30<06:18,  1.68it/s]

tensor(2.4834, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1240/1874 [12:31<06:15,  1.69it/s]

tensor(2.1376, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▌   | 1241/1874 [12:32<06:13,  1.69it/s]

tensor(2.3532, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▋   | 1242/1874 [12:32<06:19,  1.67it/s]

tensor(2.3521, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▋   | 1243/1874 [12:33<06:24,  1.64it/s]

tensor(2.4011, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▋   | 1244/1874 [12:33<06:28,  1.62it/s]

tensor(2.4996, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▋   | 1245/1874 [12:34<06:23,  1.64it/s]

tensor(2.2731, device='cuda:0', grad_fn=<AddBackward0>)


 66%|██████▋   | 1246/1874 [12:35<06:25,  1.63it/s]

tensor(2.6069, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1247/1874 [12:35<06:24,  1.63it/s]

tensor(2.5745, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1248/1874 [12:36<06:24,  1.63it/s]

tensor(2.9224, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1249/1874 [12:36<06:20,  1.64it/s]

tensor(2.4312, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1250/1874 [12:37<06:22,  1.63it/s]

tensor(2.6703, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1251/1874 [12:38<06:20,  1.64it/s]

tensor(2.4559, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1252/1874 [12:38<06:21,  1.63it/s]

tensor(3.0145, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1253/1874 [12:39<06:15,  1.65it/s]

tensor(3.1647, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1254/1874 [12:40<06:15,  1.65it/s]

tensor(3.1274, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1255/1874 [12:40<06:12,  1.66it/s]

tensor(3.0227, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1256/1874 [12:41<06:14,  1.65it/s]

tensor(3.1392, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1257/1874 [12:41<06:10,  1.66it/s]

tensor(3.6837, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1258/1874 [12:42<06:15,  1.64it/s]

tensor(2.9215, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1259/1874 [12:43<06:13,  1.65it/s]

tensor(3.0434, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1260/1874 [12:43<06:15,  1.64it/s]

tensor(3.7889, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1261/1874 [12:44<06:18,  1.62it/s]

tensor(2.2265, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1262/1874 [12:44<06:12,  1.64it/s]

tensor(4.0455, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1263/1874 [12:45<06:07,  1.66it/s]

tensor(3.1747, device='cuda:0', grad_fn=<AddBackward0>)


 67%|██████▋   | 1264/1874 [12:46<06:06,  1.66it/s]

tensor(2.6036, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1265/1874 [12:46<06:06,  1.66it/s]

tensor(2.5731, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1266/1874 [12:47<06:00,  1.68it/s]

tensor(2.3725, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1267/1874 [12:47<06:02,  1.68it/s]

tensor(2.2678, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1268/1874 [12:48<05:59,  1.69it/s]

tensor(2.5214, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1269/1874 [12:49<05:59,  1.68it/s]

tensor(2.3359, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1270/1874 [12:49<06:00,  1.67it/s]

tensor(2.2812, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1271/1874 [12:50<05:59,  1.68it/s]

tensor(2.1119, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1272/1874 [12:50<05:58,  1.68it/s]

tensor(2.3195, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1273/1874 [12:51<05:57,  1.68it/s]

tensor(2.2753, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1274/1874 [12:52<05:56,  1.68it/s]

tensor(2.2938, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1275/1874 [12:52<05:56,  1.68it/s]

tensor(2.3221, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1276/1874 [12:53<05:55,  1.68it/s]

tensor(2.2650, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1277/1874 [12:53<05:54,  1.68it/s]

tensor(2.3519, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1278/1874 [12:54<05:54,  1.68it/s]

tensor(2.3748, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1279/1874 [12:55<05:54,  1.68it/s]

tensor(2.2061, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1280/1874 [12:55<05:53,  1.68it/s]

tensor(2.0690, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1281/1874 [12:56<05:53,  1.68it/s]

tensor(2.3433, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1282/1874 [12:56<05:52,  1.68it/s]

tensor(2.2675, device='cuda:0', grad_fn=<AddBackward0>)


 68%|██████▊   | 1283/1874 [12:57<05:53,  1.67it/s]

tensor(2.1792, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▊   | 1284/1874 [12:57<05:52,  1.67it/s]

tensor(2.3714, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▊   | 1285/1874 [12:58<05:51,  1.68it/s]

tensor(2.1788, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▊   | 1286/1874 [12:59<05:50,  1.68it/s]

tensor(2.4543, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▊   | 1287/1874 [12:59<05:49,  1.68it/s]

tensor(2.3503, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▊   | 1288/1874 [13:00<05:49,  1.68it/s]

tensor(2.1745, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1289/1874 [13:00<05:47,  1.68it/s]

tensor(2.2364, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1290/1874 [13:01<05:45,  1.69it/s]

tensor(2.2108, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1291/1874 [13:02<05:43,  1.70it/s]

tensor(2.3514, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1292/1874 [13:02<05:40,  1.71it/s]

tensor(2.2013, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1293/1874 [13:03<05:39,  1.71it/s]

tensor(2.3699, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1294/1874 [13:03<05:40,  1.70it/s]

tensor(2.2771, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1295/1874 [13:04<05:41,  1.70it/s]

tensor(2.2482, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1296/1874 [13:05<05:39,  1.70it/s]

tensor(2.3463, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1297/1874 [13:05<05:40,  1.70it/s]

tensor(2.4259, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1298/1874 [13:06<05:38,  1.70it/s]

tensor(2.2683, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1299/1874 [13:06<05:39,  1.70it/s]

tensor(2.3345, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1300/1874 [13:07<05:37,  1.70it/s]

tensor(2.2609, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1301/1874 [13:08<05:41,  1.68it/s]

tensor(2.1666, device='cuda:0', grad_fn=<AddBackward0>)


 69%|██████▉   | 1302/1874 [13:08<05:42,  1.67it/s]

tensor(2.2976, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1303/1874 [13:09<05:39,  1.68it/s]

tensor(2.4772, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1304/1874 [13:09<05:38,  1.68it/s]

tensor(2.0491, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1305/1874 [13:10<05:36,  1.69it/s]

tensor(2.4434, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1306/1874 [13:10<05:34,  1.70it/s]

tensor(2.3540, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1307/1874 [13:11<05:33,  1.70it/s]

tensor(2.2854, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1308/1874 [13:12<05:33,  1.70it/s]

tensor(2.3490, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1309/1874 [13:12<05:34,  1.69it/s]

tensor(1.9625, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1310/1874 [13:13<05:33,  1.69it/s]

tensor(2.7218, device='cuda:0', grad_fn=<AddBackward0>)


 70%|██████▉   | 1311/1874 [13:13<05:34,  1.68it/s]

tensor(1.9794, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1312/1874 [13:14<05:33,  1.69it/s]

tensor(2.2163, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1313/1874 [13:15<05:32,  1.69it/s]

tensor(2.6792, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1314/1874 [13:15<05:32,  1.68it/s]

tensor(1.8187, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1315/1874 [13:16<05:32,  1.68it/s]

tensor(2.5678, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1316/1874 [13:16<05:32,  1.68it/s]

tensor(2.2427, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1317/1874 [13:17<05:35,  1.66it/s]

tensor(2.4533, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1318/1874 [13:18<05:35,  1.66it/s]

tensor(2.2569, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1319/1874 [13:18<05:32,  1.67it/s]

tensor(2.5446, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1320/1874 [13:19<05:30,  1.67it/s]

tensor(1.9969, device='cuda:0', grad_fn=<AddBackward0>)


 70%|███████   | 1321/1874 [13:19<05:28,  1.68it/s]

tensor(3.1443, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1322/1874 [13:20<05:27,  1.68it/s]

tensor(2.3657, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1323/1874 [13:21<05:25,  1.69it/s]

tensor(1.9305, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1324/1874 [13:21<05:26,  1.68it/s]

tensor(2.9287, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1325/1874 [13:22<05:26,  1.68it/s]

tensor(2.0035, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1326/1874 [13:22<05:23,  1.69it/s]

tensor(2.7901, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1327/1874 [13:23<05:23,  1.69it/s]

tensor(2.1064, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1328/1874 [13:24<05:23,  1.69it/s]

tensor(2.2244, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1329/1874 [13:24<05:21,  1.69it/s]

tensor(2.2189, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1330/1874 [13:25<05:20,  1.70it/s]

tensor(1.7160, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1331/1874 [13:25<05:18,  1.70it/s]

tensor(3.1160, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1332/1874 [13:26<05:19,  1.70it/s]

tensor(1.5006, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1333/1874 [13:26<05:19,  1.69it/s]

tensor(2.1918, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1334/1874 [13:27<05:17,  1.70it/s]

tensor(1.4663, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████   | 1335/1874 [13:28<05:16,  1.70it/s]

tensor(1.8373, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████▏  | 1336/1874 [13:28<05:16,  1.70it/s]

tensor(1.7407, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████▏  | 1337/1874 [13:29<05:15,  1.70it/s]

tensor(1.6854, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████▏  | 1338/1874 [13:29<05:14,  1.70it/s]

tensor(1.7525, device='cuda:0', grad_fn=<AddBackward0>)


 71%|███████▏  | 1339/1874 [13:30<05:14,  1.70it/s]

tensor(1.6396, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1340/1874 [13:31<05:13,  1.70it/s]

tensor(1.5051, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1341/1874 [13:31<05:14,  1.69it/s]

tensor(2.8782, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1342/1874 [13:32<05:14,  1.69it/s]

tensor(1.3829, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1343/1874 [13:32<05:15,  1.68it/s]

tensor(1.8780, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1344/1874 [13:33<05:14,  1.68it/s]

tensor(2.2077, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1345/1874 [13:34<05:15,  1.67it/s]

tensor(1.4439, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1346/1874 [13:34<05:19,  1.65it/s]

tensor(2.2013, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1347/1874 [13:35<05:23,  1.63it/s]

tensor(1.5117, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1348/1874 [13:35<05:19,  1.65it/s]

tensor(2.4011, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1349/1874 [13:36<05:17,  1.65it/s]

tensor(1.8457, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1350/1874 [13:37<05:13,  1.67it/s]

tensor(2.6338, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1351/1874 [13:37<05:12,  1.68it/s]

tensor(2.1780, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1352/1874 [13:38<05:09,  1.69it/s]

tensor(2.4311, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1353/1874 [13:38<05:08,  1.69it/s]

tensor(1.9190, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1354/1874 [13:39<05:08,  1.68it/s]

tensor(2.2057, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1355/1874 [13:40<05:08,  1.68it/s]

tensor(1.8084, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1356/1874 [13:40<05:08,  1.68it/s]

tensor(2.1980, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1357/1874 [13:41<05:07,  1.68it/s]

tensor(2.6151, device='cuda:0', grad_fn=<AddBackward0>)


 72%|███████▏  | 1358/1874 [13:41<05:07,  1.68it/s]

tensor(2.0279, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1359/1874 [13:42<05:10,  1.66it/s]

tensor(2.1489, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1360/1874 [13:43<05:08,  1.66it/s]

tensor(2.4449, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1361/1874 [13:43<05:07,  1.67it/s]

tensor(1.9960, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1362/1874 [13:44<05:06,  1.67it/s]

tensor(2.2716, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1363/1874 [13:44<05:04,  1.68it/s]

tensor(2.0849, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1364/1874 [13:45<05:04,  1.68it/s]

tensor(2.5549, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1365/1874 [13:46<05:02,  1.68it/s]

tensor(2.3383, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1366/1874 [13:46<05:02,  1.68it/s]

tensor(2.2911, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1367/1874 [13:47<05:06,  1.66it/s]

tensor(2.2877, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1368/1874 [13:47<05:05,  1.66it/s]

tensor(2.2791, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1369/1874 [13:48<05:04,  1.66it/s]

tensor(2.8494, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1370/1874 [13:49<05:01,  1.67it/s]

tensor(2.3684, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1371/1874 [13:49<05:01,  1.67it/s]

tensor(2.2509, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1372/1874 [13:50<04:59,  1.68it/s]

tensor(2.6029, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1373/1874 [13:50<04:58,  1.68it/s]

tensor(2.0656, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1374/1874 [13:51<04:55,  1.69it/s]

tensor(2.4329, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1375/1874 [13:52<04:55,  1.69it/s]

tensor(2.5557, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1376/1874 [13:52<04:53,  1.70it/s]

tensor(2.2538, device='cuda:0', grad_fn=<AddBackward0>)


 73%|███████▎  | 1377/1874 [13:53<04:53,  1.69it/s]

tensor(2.6192, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▎  | 1378/1874 [13:53<04:52,  1.70it/s]

tensor(2.6091, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▎  | 1379/1874 [13:54<04:50,  1.70it/s]

tensor(2.1704, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▎  | 1380/1874 [13:54<04:52,  1.69it/s]

tensor(2.4854, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▎  | 1381/1874 [13:55<04:51,  1.69it/s]

tensor(1.8605, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▎  | 1382/1874 [13:56<04:55,  1.67it/s]

tensor(2.3835, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1383/1874 [13:56<04:57,  1.65it/s]

tensor(2.8532, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1384/1874 [13:57<05:01,  1.62it/s]

tensor(2.4046, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1385/1874 [13:58<04:59,  1.63it/s]

tensor(2.0447, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1386/1874 [13:58<05:00,  1.62it/s]

tensor(2.5419, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1387/1874 [13:59<05:00,  1.62it/s]

tensor(2.0399, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1388/1874 [13:59<05:01,  1.61it/s]

tensor(2.6899, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1389/1874 [14:00<05:01,  1.61it/s]

tensor(2.2612, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1390/1874 [14:01<04:56,  1.63it/s]

tensor(2.7636, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1391/1874 [14:01<04:56,  1.63it/s]

tensor(2.2713, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1392/1874 [14:02<04:54,  1.64it/s]

tensor(2.5041, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1393/1874 [14:02<04:51,  1.65it/s]

tensor(2.1649, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1394/1874 [14:03<04:48,  1.66it/s]

tensor(2.4479, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1395/1874 [14:04<04:47,  1.67it/s]

tensor(2.3160, device='cuda:0', grad_fn=<AddBackward0>)


 74%|███████▍  | 1396/1874 [14:04<04:46,  1.67it/s]

tensor(2.4516, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1397/1874 [14:05<04:44,  1.68it/s]

tensor(2.2556, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1398/1874 [14:05<04:46,  1.66it/s]

tensor(2.2497, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1399/1874 [14:06<04:47,  1.65it/s]

tensor(2.6318, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1400/1874 [14:07<04:49,  1.64it/s]

tensor(2.0459, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1401/1874 [14:07<04:54,  1.60it/s]

tensor(2.8130, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1402/1874 [14:08<04:48,  1.63it/s]

tensor(2.1384, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1403/1874 [14:09<04:45,  1.65it/s]

tensor(2.5659, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1404/1874 [14:09<04:41,  1.67it/s]

tensor(2.1982, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▍  | 1405/1874 [14:10<04:41,  1.67it/s]

tensor(2.4246, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1406/1874 [14:10<04:44,  1.64it/s]

tensor(2.4456, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1407/1874 [14:11<04:42,  1.65it/s]

tensor(2.3814, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1408/1874 [14:12<04:42,  1.65it/s]

tensor(2.2843, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1409/1874 [14:12<04:39,  1.67it/s]

tensor(2.6409, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1410/1874 [14:13<04:40,  1.66it/s]

tensor(1.9396, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1411/1874 [14:13<04:36,  1.67it/s]

tensor(2.8527, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1412/1874 [14:14<04:39,  1.65it/s]

tensor(2.2445, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1413/1874 [14:15<04:36,  1.67it/s]

tensor(2.5668, device='cuda:0', grad_fn=<AddBackward0>)


 75%|███████▌  | 1414/1874 [14:15<04:33,  1.68it/s]

tensor(2.2502, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1415/1874 [14:16<04:37,  1.66it/s]

tensor(2.4712, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1416/1874 [14:16<04:39,  1.64it/s]

tensor(2.4982, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1417/1874 [14:17<04:41,  1.62it/s]

tensor(2.4524, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1418/1874 [14:18<04:36,  1.65it/s]

tensor(2.3115, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1419/1874 [14:18<04:39,  1.63it/s]

tensor(2.5212, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1420/1874 [14:19<04:35,  1.65it/s]

tensor(2.2759, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1421/1874 [14:19<04:34,  1.65it/s]

tensor(2.4127, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1422/1874 [14:20<04:32,  1.66it/s]

tensor(2.2719, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1423/1874 [14:21<04:30,  1.67it/s]

tensor(2.2154, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1424/1874 [14:21<04:32,  1.65it/s]

tensor(2.7062, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1425/1874 [14:22<04:33,  1.64it/s]

tensor(2.3639, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1426/1874 [14:22<04:34,  1.63it/s]

tensor(2.2124, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1427/1874 [14:23<04:35,  1.62it/s]

tensor(2.8855, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▌  | 1428/1874 [14:24<04:36,  1.61it/s]

tensor(2.4371, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▋  | 1429/1874 [14:24<04:36,  1.61it/s]

tensor(2.4656, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▋  | 1430/1874 [14:25<04:35,  1.61it/s]

tensor(2.4350, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▋  | 1431/1874 [14:26<04:34,  1.61it/s]

tensor(2.3190, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▋  | 1432/1874 [14:26<04:35,  1.60it/s]

tensor(2.0622, device='cuda:0', grad_fn=<AddBackward0>)


 76%|███████▋  | 1433/1874 [14:27<04:35,  1.60it/s]

tensor(2.7521, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1434/1874 [14:27<04:35,  1.59it/s]

tensor(2.3158, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1435/1874 [14:28<04:36,  1.59it/s]

tensor(2.2935, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1436/1874 [14:29<04:35,  1.59it/s]

tensor(2.7119, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1437/1874 [14:29<04:34,  1.59it/s]

tensor(2.5511, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1438/1874 [14:30<04:32,  1.60it/s]

tensor(2.0743, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1439/1874 [14:31<04:30,  1.61it/s]

tensor(2.9327, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1440/1874 [14:31<04:30,  1.61it/s]

tensor(2.0004, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1441/1874 [14:32<04:31,  1.60it/s]

tensor(3.0267, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1442/1874 [14:32<04:25,  1.63it/s]

tensor(2.1011, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1443/1874 [14:33<04:20,  1.65it/s]

tensor(2.2576, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1444/1874 [14:34<04:22,  1.64it/s]

tensor(2.0859, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1445/1874 [14:34<04:23,  1.63it/s]

tensor(2.8554, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1446/1874 [14:35<04:23,  1.62it/s]

tensor(2.1669, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1447/1874 [14:35<04:19,  1.64it/s]

tensor(2.2898, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1448/1874 [14:36<04:16,  1.66it/s]

tensor(2.6602, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1449/1874 [14:37<04:17,  1.65it/s]

tensor(2.4813, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1450/1874 [14:37<04:16,  1.65it/s]

tensor(1.9238, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1451/1874 [14:38<04:14,  1.66it/s]

tensor(2.4911, device='cuda:0', grad_fn=<AddBackward0>)


 77%|███████▋  | 1452/1874 [14:38<04:12,  1.67it/s]

tensor(2.2865, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1453/1874 [14:39<04:11,  1.67it/s]

tensor(2.1434, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1454/1874 [14:40<04:09,  1.68it/s]

tensor(2.2018, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1455/1874 [14:40<04:08,  1.68it/s]

tensor(2.8175, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1456/1874 [14:41<04:08,  1.68it/s]

tensor(2.3409, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1457/1874 [14:41<04:06,  1.69it/s]

tensor(2.5578, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1458/1874 [14:42<04:06,  1.69it/s]

tensor(2.1990, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1459/1874 [14:43<04:05,  1.69it/s]

tensor(2.4869, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1460/1874 [14:43<04:04,  1.69it/s]

tensor(2.4287, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1461/1874 [14:44<04:08,  1.66it/s]

tensor(2.1949, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1462/1874 [14:44<04:11,  1.64it/s]

tensor(3.1271, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1463/1874 [14:45<04:14,  1.62it/s]

tensor(2.2405, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1464/1874 [14:46<04:14,  1.61it/s]

tensor(2.4260, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1465/1874 [14:46<04:15,  1.60it/s]

tensor(2.2818, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1466/1874 [14:47<04:12,  1.62it/s]

tensor(2.4633, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1467/1874 [14:48<04:12,  1.61it/s]

tensor(2.3098, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1468/1874 [14:48<04:12,  1.61it/s]

tensor(2.3593, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1469/1874 [14:49<04:12,  1.61it/s]

tensor(2.7232, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1470/1874 [14:49<04:12,  1.60it/s]

tensor(2.1523, device='cuda:0', grad_fn=<AddBackward0>)


 78%|███████▊  | 1471/1874 [14:50<04:10,  1.61it/s]

tensor(2.3971, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▊  | 1472/1874 [14:51<04:09,  1.61it/s]

tensor(2.1233, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▊  | 1473/1874 [14:51<04:09,  1.61it/s]

tensor(2.7107, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▊  | 1474/1874 [14:52<04:06,  1.62it/s]

tensor(2.0315, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▊  | 1475/1874 [14:53<04:04,  1.63it/s]

tensor(2.6810, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1476/1874 [14:53<04:05,  1.62it/s]

tensor(2.5540, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1477/1874 [14:54<04:06,  1.61it/s]

tensor(2.4111, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1478/1874 [14:54<04:05,  1.61it/s]

tensor(2.4071, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1479/1874 [14:55<04:07,  1.59it/s]

tensor(2.5080, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1480/1874 [14:56<04:03,  1.62it/s]

tensor(2.4547, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1481/1874 [14:57<04:56,  1.32it/s]

tensor(2.2325, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1482/1874 [14:57<04:38,  1.41it/s]

tensor(1.9293, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1483/1874 [14:58<04:25,  1.47it/s]

tensor(2.8730, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1484/1874 [14:59<04:14,  1.53it/s]

tensor(2.5932, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1485/1874 [14:59<04:12,  1.54it/s]

tensor(3.0542, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1486/1874 [15:00<04:09,  1.56it/s]

tensor(3.1167, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1487/1874 [15:00<04:06,  1.57it/s]

tensor(2.4637, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1488/1874 [15:01<04:03,  1.58it/s]

tensor(2.3874, device='cuda:0', grad_fn=<AddBackward0>)


 79%|███████▉  | 1489/1874 [15:02<03:58,  1.61it/s]

tensor(2.5690, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1490/1874 [15:02<03:54,  1.64it/s]

tensor(1.8514, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1491/1874 [15:03<03:50,  1.66it/s]

tensor(3.1997, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1492/1874 [15:03<03:52,  1.64it/s]

tensor(1.9962, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1493/1874 [15:04<03:49,  1.66it/s]

tensor(2.5824, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1494/1874 [15:05<03:51,  1.64it/s]

tensor(2.2358, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1495/1874 [15:05<03:52,  1.63it/s]

tensor(2.7263, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1496/1874 [15:06<03:48,  1.65it/s]

tensor(2.6350, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1497/1874 [15:06<03:49,  1.64it/s]

tensor(2.5156, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1498/1874 [15:07<03:50,  1.63it/s]

tensor(2.3948, device='cuda:0', grad_fn=<AddBackward0>)


 80%|███████▉  | 1499/1874 [15:08<03:52,  1.61it/s]

tensor(2.0440, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1500/1874 [15:08<03:52,  1.61it/s]

tensor(2.5563, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1501/1874 [15:09<03:52,  1.61it/s]

tensor(2.6283, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1502/1874 [15:10<03:47,  1.63it/s]

tensor(2.1898, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1503/1874 [15:10<03:47,  1.63it/s]

tensor(2.2319, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1504/1874 [15:11<03:48,  1.62it/s]

tensor(2.5597, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1505/1874 [15:11<03:49,  1.61it/s]

tensor(1.9079, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1506/1874 [15:12<03:50,  1.60it/s]

tensor(2.2370, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1507/1874 [15:13<03:45,  1.62it/s]

tensor(2.5423, device='cuda:0', grad_fn=<AddBackward0>)


 80%|████████  | 1508/1874 [15:13<03:44,  1.63it/s]

tensor(2.2487, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1509/1874 [15:14<03:45,  1.62it/s]

tensor(2.0780, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1510/1874 [15:15<03:46,  1.61it/s]

tensor(3.3285, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1511/1874 [15:15<03:47,  1.60it/s]

tensor(2.2419, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1512/1874 [15:16<03:46,  1.60it/s]

tensor(2.8045, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1513/1874 [15:16<03:45,  1.60it/s]

tensor(2.2729, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1514/1874 [15:17<03:43,  1.61it/s]

tensor(2.8735, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1515/1874 [15:18<03:39,  1.64it/s]

tensor(1.9714, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1516/1874 [15:18<03:39,  1.63it/s]

tensor(2.7100, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1517/1874 [15:19<03:39,  1.63it/s]

tensor(1.9749, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1518/1874 [15:19<03:38,  1.63it/s]

tensor(2.4426, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1519/1874 [15:20<03:40,  1.61it/s]

tensor(2.1418, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1520/1874 [15:21<03:39,  1.62it/s]

tensor(2.5914, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1521/1874 [15:21<03:38,  1.61it/s]

tensor(2.0185, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████  | 1522/1874 [15:22<03:37,  1.62it/s]

tensor(2.2721, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████▏ | 1523/1874 [15:23<03:33,  1.64it/s]

tensor(2.2666, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████▏ | 1524/1874 [15:23<03:34,  1.63it/s]

tensor(2.5266, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████▏ | 1525/1874 [15:24<03:31,  1.65it/s]

tensor(2.0207, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████▏ | 1526/1874 [15:24<03:33,  1.63it/s]

tensor(2.1778, device='cuda:0', grad_fn=<AddBackward0>)


 81%|████████▏ | 1527/1874 [15:25<03:29,  1.65it/s]

tensor(2.3900, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1528/1874 [15:26<03:26,  1.67it/s]

tensor(2.1778, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1529/1874 [15:26<03:24,  1.69it/s]

tensor(2.4691, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1530/1874 [15:27<03:23,  1.69it/s]

tensor(3.1652, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1531/1874 [15:27<03:22,  1.70it/s]

tensor(2.3771, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1532/1874 [15:28<03:22,  1.69it/s]

tensor(2.0876, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1533/1874 [15:28<03:21,  1.69it/s]

tensor(2.0747, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1534/1874 [15:29<03:24,  1.66it/s]

tensor(2.6423, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1535/1874 [15:30<03:23,  1.67it/s]

tensor(2.2188, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1536/1874 [15:30<03:21,  1.68it/s]

tensor(1.8517, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1537/1874 [15:31<03:20,  1.68it/s]

tensor(2.2569, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1538/1874 [15:31<03:22,  1.66it/s]

tensor(2.3825, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1539/1874 [15:32<03:21,  1.66it/s]

tensor(2.5743, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1540/1874 [15:33<03:20,  1.66it/s]

tensor(2.6319, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1541/1874 [15:33<03:19,  1.67it/s]

tensor(2.3535, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1542/1874 [15:34<03:21,  1.65it/s]

tensor(2.3033, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1543/1874 [15:35<03:22,  1.63it/s]

tensor(2.4538, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1544/1874 [15:35<03:21,  1.64it/s]

tensor(2.6635, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1545/1874 [15:36<03:18,  1.66it/s]

tensor(2.1939, device='cuda:0', grad_fn=<AddBackward0>)


 82%|████████▏ | 1546/1874 [15:36<03:19,  1.65it/s]

tensor(2.9716, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1547/1874 [15:37<03:18,  1.64it/s]

tensor(2.0757, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1548/1874 [15:38<03:18,  1.64it/s]

tensor(2.4175, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1549/1874 [15:38<03:16,  1.66it/s]

tensor(2.3829, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1550/1874 [15:39<03:16,  1.65it/s]

tensor(2.5467, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1551/1874 [15:39<03:17,  1.63it/s]

tensor(2.6700, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1552/1874 [15:40<03:15,  1.65it/s]

tensor(2.3670, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1553/1874 [15:41<03:13,  1.66it/s]

tensor(2.0280, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1554/1874 [15:41<03:13,  1.65it/s]

tensor(3.0485, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1555/1874 [15:42<03:15,  1.64it/s]

tensor(2.3900, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1556/1874 [15:42<03:14,  1.64it/s]

tensor(2.5269, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1557/1874 [15:43<03:11,  1.65it/s]

tensor(2.3950, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1558/1874 [15:44<03:12,  1.64it/s]

tensor(2.4157, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1559/1874 [15:44<03:13,  1.63it/s]

tensor(2.3842, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1560/1874 [15:45<03:12,  1.63it/s]

tensor(2.3326, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1561/1874 [15:45<03:12,  1.62it/s]

tensor(2.3870, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1562/1874 [15:46<03:12,  1.62it/s]

tensor(2.3415, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1563/1874 [15:47<03:12,  1.62it/s]

tensor(2.0419, device='cuda:0', grad_fn=<AddBackward0>)


 83%|████████▎ | 1564/1874 [15:47<03:09,  1.64it/s]

tensor(2.4383, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▎ | 1565/1874 [15:48<03:09,  1.63it/s]

tensor(2.2160, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▎ | 1566/1874 [15:49<03:10,  1.62it/s]

tensor(2.4839, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▎ | 1567/1874 [15:49<03:09,  1.62it/s]

tensor(1.9301, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▎ | 1568/1874 [15:50<03:09,  1.62it/s]

tensor(2.4305, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▎ | 1569/1874 [15:50<03:08,  1.62it/s]

tensor(2.2328, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▍ | 1570/1874 [15:51<03:08,  1.62it/s]

tensor(1.8826, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▍ | 1571/1874 [15:52<03:09,  1.60it/s]

tensor(2.5645, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▍ | 1572/1874 [15:52<03:06,  1.62it/s]

tensor(2.4276, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▍ | 1573/1874 [15:53<03:03,  1.64it/s]

tensor(2.5325, device='cuda:0', grad_fn=<AddBackward0>)


 84%|████████▍ | 1574/1874 [15:53<03:01,  1.66it/s]

tensor(2.2257, device='cuda:0', grad_fn=<AddBackward0>)


In [None]:
img, label = next(hand_loader_iter)

In [None]:
label