In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch  

In [2]:
print(torch.__version__)

1.9.1


In [3]:
data = pd.read_csv('triplets_all.csv')
N = 878
J = torch.zeros((N, N))
for i in range(data.shape[0]):
    a, b = map(int, data.ids[i].split('_'))
    c = int(data.intersize[i])
    if a != b:
        J[a][b] = c
        J[b][a] = c
print(J)

tensor([[0., 5., 2.,  ..., 0., 0., 0.],
        [5., 0., 8.,  ..., 0., 0., 0.],
        [2., 8., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 3.],
        [0., 0., 0.,  ..., 0., 3., 0.]])


In [4]:
p = torch.zeros((N, N))
for i in range(N):
    for j in range(N):
        if i != j:
            p[i][j] = 1

In [5]:
import torch
from torch.nn import Module, Linear
from torch.nn.functional import linear


def Binarize(tensor,quant_mode='det'):
    if quant_mode=='det':
        return tensor.sign()
    if quant_mode=='bin':
        return (tensor>=0).type(type(tensor))*2-1
    else:
        return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1)


class BNNLinear(Linear):
    def __init__(self, *kargs, **kwargs):
        super(BNNLinear, self).__init__(*kargs, **kwargs)
        self.register_buffer('weight_org', self.weight.data.clone())

    def forward(self, input):
        input.data=Binarize(input.data)
            
        self.weight.data=Binarize(self.weight_org)
        out = linear(input, self.weight)

        out[out == -1] = 0

        # if not self.bias is None:
        #     self.bias.org=self.bias.data.clone()
        #     out += self.bias.view(1, -1).expand_as(out.reshape(1, 16)).reshape(16)

        return out

In [6]:
import torch.nn as nn
import torch

class BNNCaffenet(nn.Module):

    def __init__(self):
        super(BNNCaffenet, self).__init__()
 
        self.features = nn.Sequential(
            # nn.BatchNorm1d(1),
            # nn.Hardtanh(inplace=True),
            BNNLinear(1, 878 * 10000),
        )

    def forward(self, x):
        return self.features(x)


    def init_w(self):
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
        return

def bnn_caffenet():
    return BNNCaffenet()

In [7]:
import os
import numpy as np
from torch import save, no_grad
from torch import tensor
import torch
from tqdm import tqdm
import shutil


class BnnClassifier():
    def __init__(self, model, device=None):
        super().__init__()
        self.model = model
        self.device = device

    def train_step(self, criterion, optimizer):
        losses = []
        data = torch.ones(1)
        target = J.type(torch.LongTensor)
        mn = 100000000
        for i in tqdm(range(100000), total=100000):
            output = self.model(data)
            loss = criterion(output, target)
            if loss < mn:
                print(loss)
#                 print(output)
            mn = min(mn, loss)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            for p in self.model.modules():
                if hasattr(p, 'weight_org'):
                    p.weight.data.copy_(p.weight_org)
            optimizer.step()
            for p in self.model.modules():
                if hasattr(p, 'weight_org'):
                    p.weight_org.data.copy_(p.weight.data.clamp_(-1,1))
        return losses

    def train(self, criterion, optimizer, epochs, scheduler):

        best_accuracy = 0.

        losses = []
        accuracies = []

        self.model.train()
        epoch_losses = self.train_step(criterion, optimizer)
        losses += epoch_losses
        epoch_losses = np.array(epoch_losses)
        lr = optimizer.param_groups[0]['lr']  
        if scheduler:     
            scheduler.step()
        

        return

In [8]:
import torch
import importlib

def myCustomLoss(my_outputs, my_labels):
    a = torch.reshape(my_outputs, (878, 10000))
    tmp = (a @ a.T - my_labels) * p
    return abs(tmp).sum()

device = torch.device('cpu')
torch.manual_seed(0)

model = BNNCaffenet()
model.to(device)

classification = BnnClassifier(model, device)

criterion = myCustomLoss

if hasattr(model, 'init_w'):
    model.init_w()

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [80, 150],
        gamma=0.1)

classification.train(criterion, optimizer, 300, scheduler)

  0%|          | 99/100000 [00:44<12:00:20,  2.31it/s]

tensor(98592448., grad_fn=<SumBackward0>)


  0%|          | 100/100000 [00:45<12:02:09,  2.31it/s]

tensor(94588896., grad_fn=<SumBackward0>)


  0%|          | 101/100000 [00:45<12:00:02,  2.31it/s]

tensor(90605552., grad_fn=<SumBackward0>)


  0%|          | 102/100000 [00:46<11:58:28,  2.32it/s]

tensor(86826032., grad_fn=<SumBackward0>)


  0%|          | 103/100000 [00:46<11:59:23,  2.31it/s]

tensor(83195128., grad_fn=<SumBackward0>)


  0%|          | 104/100000 [00:46<11:57:51,  2.32it/s]

tensor(79620424., grad_fn=<SumBackward0>)


  0%|          | 105/100000 [00:47<11:56:38,  2.32it/s]

tensor(76237400., grad_fn=<SumBackward0>)


  0%|          | 106/100000 [00:47<11:54:52,  2.33it/s]

tensor(72963696., grad_fn=<SumBackward0>)


  0%|          | 107/100000 [00:48<11:54:42,  2.33it/s]

tensor(69818304., grad_fn=<SumBackward0>)


  0%|          | 108/100000 [00:48<11:54:28,  2.33it/s]

tensor(66781724., grad_fn=<SumBackward0>)


  0%|          | 109/100000 [00:49<11:55:34,  2.33it/s]

tensor(63772868., grad_fn=<SumBackward0>)


  0%|          | 110/100000 [00:49<11:54:42,  2.33it/s]

tensor(61024408., grad_fn=<SumBackward0>)


  0%|          | 111/100000 [00:49<11:54:35,  2.33it/s]

tensor(58270572., grad_fn=<SumBackward0>)


  0%|          | 112/100000 [00:50<11:54:32,  2.33it/s]

tensor(55673180., grad_fn=<SumBackward0>)


  0%|          | 113/100000 [00:50<11:54:36,  2.33it/s]

tensor(53110932., grad_fn=<SumBackward0>)


  0%|          | 114/100000 [00:51<11:54:23,  2.33it/s]

tensor(50655464., grad_fn=<SumBackward0>)


  0%|          | 115/100000 [00:51<11:54:32,  2.33it/s]

tensor(48347248., grad_fn=<SumBackward0>)


  0%|          | 116/100000 [00:52<11:56:00,  2.32it/s]

tensor(46070868., grad_fn=<SumBackward0>)


  0%|          | 117/100000 [00:52<12:05:00,  2.30it/s]

tensor(43963372., grad_fn=<SumBackward0>)


  0%|          | 118/100000 [00:52<12:05:44,  2.29it/s]

tensor(41866832., grad_fn=<SumBackward0>)


  0%|          | 119/100000 [00:53<12:21:28,  2.25it/s]

tensor(39899188., grad_fn=<SumBackward0>)


  0%|          | 120/100000 [00:53<12:27:55,  2.23it/s]

tensor(37951964., grad_fn=<SumBackward0>)


  0%|          | 121/100000 [00:54<12:19:53,  2.25it/s]

tensor(36143168., grad_fn=<SumBackward0>)


  0%|          | 122/100000 [00:54<12:45:23,  2.17it/s]

tensor(34377276., grad_fn=<SumBackward0>)


  0%|          | 123/100000 [00:55<13:06:05,  2.12it/s]

tensor(32715472., grad_fn=<SumBackward0>)


  0%|          | 124/100000 [00:55<12:44:17,  2.18it/s]

tensor(31061284., grad_fn=<SumBackward0>)


  0%|          | 125/100000 [00:56<12:27:47,  2.23it/s]

tensor(29504040., grad_fn=<SumBackward0>)


  0%|          | 126/100000 [00:56<12:15:48,  2.26it/s]

tensor(28011820., grad_fn=<SumBackward0>)


  0%|          | 127/100000 [00:56<12:07:34,  2.29it/s]

tensor(26581136., grad_fn=<SumBackward0>)


  0%|          | 128/100000 [00:57<12:02:18,  2.30it/s]

tensor(25220066., grad_fn=<SumBackward0>)


  0%|          | 129/100000 [00:57<12:52:58,  2.15it/s]

tensor(23902484., grad_fn=<SumBackward0>)


  0%|          | 130/100000 [00:58<12:57:52,  2.14it/s]

tensor(22662900., grad_fn=<SumBackward0>)


  0%|          | 131/100000 [00:58<12:39:23,  2.19it/s]

tensor(21475402., grad_fn=<SumBackward0>)


  0%|          | 132/100000 [00:59<12:41:53,  2.18it/s]

tensor(20315372., grad_fn=<SumBackward0>)


  0%|          | 133/100000 [00:59<12:31:46,  2.21it/s]

tensor(19230936., grad_fn=<SumBackward0>)


  0%|          | 134/100000 [01:00<12:26:10,  2.23it/s]

tensor(18199622., grad_fn=<SumBackward0>)


  0%|          | 135/100000 [01:00<12:56:55,  2.14it/s]

tensor(17220362., grad_fn=<SumBackward0>)


  0%|          | 136/100000 [01:01<12:38:16,  2.19it/s]

tensor(16283730., grad_fn=<SumBackward0>)


  0%|          | 137/100000 [01:01<12:28:40,  2.22it/s]

tensor(15378760., grad_fn=<SumBackward0>)


  0%|          | 138/100000 [01:01<12:21:17,  2.25it/s]

tensor(14541224., grad_fn=<SumBackward0>)


  0%|          | 139/100000 [01:02<12:15:54,  2.26it/s]

tensor(13743800., grad_fn=<SumBackward0>)


  0%|          | 140/100000 [01:02<12:49:26,  2.16it/s]

tensor(12957448., grad_fn=<SumBackward0>)


  0%|          | 141/100000 [01:03<12:54:42,  2.15it/s]

tensor(12233560., grad_fn=<SumBackward0>)


  0%|          | 142/100000 [01:03<12:38:57,  2.19it/s]

tensor(11547516., grad_fn=<SumBackward0>)


  0%|          | 143/100000 [01:04<12:32:19,  2.21it/s]

tensor(10905564., grad_fn=<SumBackward0>)


  0%|          | 144/100000 [01:04<12:27:13,  2.23it/s]

tensor(10277824., grad_fn=<SumBackward0>)


  0%|          | 145/100000 [01:05<12:19:36,  2.25it/s]

tensor(9715702., grad_fn=<SumBackward0>)


  0%|          | 146/100000 [01:05<12:16:35,  2.26it/s]

tensor(9188658., grad_fn=<SumBackward0>)


  0%|          | 147/100000 [01:06<12:08:08,  2.29it/s]

tensor(8655760., grad_fn=<SumBackward0>)


  0%|          | 148/100000 [01:06<12:01:42,  2.31it/s]

tensor(8183436., grad_fn=<SumBackward0>)


  0%|          | 149/100000 [01:06<11:58:12,  2.32it/s]

tensor(7720836., grad_fn=<SumBackward0>)


  0%|          | 150/100000 [01:07<12:17:58,  2.26it/s]

tensor(7306498., grad_fn=<SumBackward0>)


  0%|          | 151/100000 [01:07<12:22:06,  2.24it/s]

tensor(6917582., grad_fn=<SumBackward0>)


  0%|          | 152/100000 [01:08<12:25:43,  2.23it/s]

tensor(6551780., grad_fn=<SumBackward0>)


  0%|          | 153/100000 [01:08<12:17:22,  2.26it/s]

tensor(6209014., grad_fn=<SumBackward0>)


  0%|          | 154/100000 [01:09<12:13:02,  2.27it/s]

tensor(5886684., grad_fn=<SumBackward0>)


  0%|          | 155/100000 [01:09<12:17:38,  2.26it/s]

tensor(5588458., grad_fn=<SumBackward0>)


  0%|          | 156/100000 [01:10<12:21:30,  2.24it/s]

tensor(5320210., grad_fn=<SumBackward0>)


  0%|          | 157/100000 [01:10<12:16:35,  2.26it/s]

tensor(5058164., grad_fn=<SumBackward0>)


  0%|          | 158/100000 [01:10<12:13:13,  2.27it/s]

tensor(4825588., grad_fn=<SumBackward0>)


  0%|          | 159/100000 [01:11<12:18:06,  2.25it/s]

tensor(4612612., grad_fn=<SumBackward0>)


  0%|          | 160/100000 [01:11<12:23:54,  2.24it/s]

tensor(4404934., grad_fn=<SumBackward0>)


  0%|          | 161/100000 [01:12<12:19:00,  2.25it/s]

tensor(4219972., grad_fn=<SumBackward0>)


  0%|          | 162/100000 [01:12<12:19:16,  2.25it/s]

tensor(4051496., grad_fn=<SumBackward0>)


  0%|          | 163/100000 [01:13<12:18:32,  2.25it/s]

tensor(3900264., grad_fn=<SumBackward0>)


  0%|          | 164/100000 [01:13<12:19:57,  2.25it/s]

tensor(3762742., grad_fn=<SumBackward0>)


  0%|          | 165/100000 [01:13<12:19:54,  2.25it/s]

tensor(3629182., grad_fn=<SumBackward0>)


  0%|          | 166/100000 [01:14<12:22:54,  2.24it/s]

tensor(3512460., grad_fn=<SumBackward0>)


  0%|          | 167/100000 [01:14<12:23:03,  2.24it/s]

tensor(3400716., grad_fn=<SumBackward0>)


  0%|          | 168/100000 [01:15<12:25:13,  2.23it/s]

tensor(3301868., grad_fn=<SumBackward0>)


  0%|          | 169/100000 [01:15<12:23:33,  2.24it/s]

tensor(3209460., grad_fn=<SumBackward0>)


  0%|          | 170/100000 [01:16<12:29:29,  2.22it/s]

tensor(3123758., grad_fn=<SumBackward0>)


  0%|          | 171/100000 [01:16<12:24:53,  2.23it/s]

tensor(3040052., grad_fn=<SumBackward0>)


  0%|          | 172/100000 [01:17<12:28:12,  2.22it/s]

tensor(2966498., grad_fn=<SumBackward0>)


  0%|          | 173/100000 [01:17<12:20:58,  2.25it/s]

tensor(2898182., grad_fn=<SumBackward0>)


  0%|          | 174/100000 [01:18<12:19:11,  2.25it/s]

tensor(2836272., grad_fn=<SumBackward0>)


  0%|          | 175/100000 [01:18<12:16:44,  2.26it/s]

tensor(2775582., grad_fn=<SumBackward0>)


  0%|          | 176/100000 [01:18<12:22:28,  2.24it/s]

tensor(2723470., grad_fn=<SumBackward0>)


  0%|          | 177/100000 [01:19<12:23:46,  2.24it/s]

tensor(2671366., grad_fn=<SumBackward0>)


  0%|          | 178/100000 [01:19<12:26:51,  2.23it/s]

tensor(2620248., grad_fn=<SumBackward0>)


  0%|          | 179/100000 [01:20<12:12:03,  2.27it/s]

tensor(2576716., grad_fn=<SumBackward0>)


  0%|          | 180/100000 [01:20<12:36:11,  2.20it/s]

tensor(2530766., grad_fn=<SumBackward0>)


  0%|          | 181/100000 [01:21<12:22:12,  2.24it/s]

tensor(2488332., grad_fn=<SumBackward0>)


  0%|          | 182/100000 [01:21<12:33:06,  2.21it/s]

tensor(2452894., grad_fn=<SumBackward0>)


  0%|          | 183/100000 [01:22<13:00:49,  2.13it/s]

tensor(2414076., grad_fn=<SumBackward0>)


  0%|          | 184/100000 [01:22<13:19:35,  2.08it/s]

tensor(2380228., grad_fn=<SumBackward0>)


  0%|          | 185/100000 [01:23<13:02:57,  2.12it/s]

tensor(2346074., grad_fn=<SumBackward0>)


  0%|          | 186/100000 [01:23<12:47:38,  2.17it/s]

tensor(2312174., grad_fn=<SumBackward0>)


  0%|          | 187/100000 [01:23<12:40:41,  2.19it/s]

tensor(2281266., grad_fn=<SumBackward0>)


  0%|          | 188/100000 [01:24<12:35:04,  2.20it/s]

tensor(2253456., grad_fn=<SumBackward0>)


  0%|          | 189/100000 [01:24<12:24:47,  2.23it/s]

tensor(2224244., grad_fn=<SumBackward0>)


  0%|          | 190/100000 [01:25<12:27:22,  2.23it/s]

tensor(2199096., grad_fn=<SumBackward0>)


  0%|          | 191/100000 [01:25<12:23:14,  2.24it/s]

tensor(2173902., grad_fn=<SumBackward0>)


  0%|          | 192/100000 [01:26<12:17:33,  2.26it/s]

tensor(2149334., grad_fn=<SumBackward0>)


  0%|          | 193/100000 [01:26<12:12:04,  2.27it/s]

tensor(2123788., grad_fn=<SumBackward0>)


  0%|          | 194/100000 [01:27<12:11:13,  2.27it/s]

tensor(2100772., grad_fn=<SumBackward0>)


  0%|          | 195/100000 [01:27<12:13:24,  2.27it/s]

tensor(2078210., grad_fn=<SumBackward0>)


  0%|          | 196/100000 [01:27<12:13:49,  2.27it/s]

tensor(2057826., grad_fn=<SumBackward0>)


  0%|          | 197/100000 [01:28<12:30:40,  2.22it/s]

tensor(2038494., grad_fn=<SumBackward0>)


  0%|          | 198/100000 [01:28<12:27:27,  2.23it/s]

tensor(2016232., grad_fn=<SumBackward0>)


  0%|          | 199/100000 [01:29<12:55:34,  2.14it/s]

tensor(1996398., grad_fn=<SumBackward0>)


  0%|          | 200/100000 [01:29<12:46:12,  2.17it/s]

tensor(1978022., grad_fn=<SumBackward0>)


  0%|          | 201/100000 [01:30<12:42:12,  2.18it/s]

tensor(1960700., grad_fn=<SumBackward0>)


  0%|          | 202/100000 [01:30<12:39:01,  2.19it/s]

tensor(1941850., grad_fn=<SumBackward0>)


  0%|          | 203/100000 [01:31<12:40:47,  2.19it/s]

tensor(1924950., grad_fn=<SumBackward0>)


  0%|          | 204/100000 [01:31<12:39:29,  2.19it/s]

tensor(1905174., grad_fn=<SumBackward0>)


  0%|          | 205/100000 [01:32<12:27:25,  2.23it/s]

tensor(1887248., grad_fn=<SumBackward0>)


  0%|          | 206/100000 [01:32<12:31:45,  2.21it/s]

tensor(1870496., grad_fn=<SumBackward0>)


  0%|          | 207/100000 [01:32<12:30:17,  2.22it/s]

tensor(1853734., grad_fn=<SumBackward0>)


  0%|          | 208/100000 [01:33<12:20:53,  2.24it/s]

tensor(1836382., grad_fn=<SumBackward0>)


  0%|          | 209/100000 [01:33<12:17:01,  2.26it/s]

tensor(1820060., grad_fn=<SumBackward0>)


  0%|          | 210/100000 [01:34<12:16:16,  2.26it/s]

tensor(1802994., grad_fn=<SumBackward0>)


  0%|          | 211/100000 [01:34<12:15:00,  2.26it/s]

tensor(1788938., grad_fn=<SumBackward0>)


  0%|          | 212/100000 [01:35<12:21:46,  2.24it/s]

tensor(1772562., grad_fn=<SumBackward0>)


  0%|          | 213/100000 [01:35<12:28:33,  2.22it/s]

tensor(1757542., grad_fn=<SumBackward0>)


  0%|          | 214/100000 [01:36<12:23:09,  2.24it/s]

tensor(1743412., grad_fn=<SumBackward0>)


  0%|          | 215/100000 [01:36<12:36:05,  2.20it/s]

tensor(1727848., grad_fn=<SumBackward0>)


  0%|          | 216/100000 [01:37<12:43:15,  2.18it/s]

tensor(1713020., grad_fn=<SumBackward0>)


  0%|          | 217/100000 [01:37<12:52:17,  2.15it/s]

tensor(1699846., grad_fn=<SumBackward0>)


  0%|          | 218/100000 [01:37<12:42:13,  2.18it/s]

tensor(1685752., grad_fn=<SumBackward0>)


  0%|          | 219/100000 [01:38<12:32:19,  2.21it/s]

tensor(1673294., grad_fn=<SumBackward0>)


  0%|          | 220/100000 [01:38<12:36:24,  2.20it/s]

tensor(1660896., grad_fn=<SumBackward0>)


  0%|          | 221/100000 [01:39<12:32:40,  2.21it/s]

tensor(1647270., grad_fn=<SumBackward0>)


  0%|          | 222/100000 [01:39<12:37:33,  2.20it/s]

tensor(1634042., grad_fn=<SumBackward0>)


  0%|          | 223/100000 [01:40<12:38:18,  2.19it/s]

tensor(1623070., grad_fn=<SumBackward0>)


  0%|          | 224/100000 [01:40<12:33:05,  2.21it/s]

tensor(1609780., grad_fn=<SumBackward0>)


  0%|          | 225/100000 [01:41<12:40:19,  2.19it/s]

tensor(1597998., grad_fn=<SumBackward0>)


  0%|          | 226/100000 [01:41<12:39:44,  2.19it/s]

tensor(1584782., grad_fn=<SumBackward0>)


  0%|          | 227/100000 [01:42<12:38:27,  2.19it/s]

tensor(1572824., grad_fn=<SumBackward0>)


  0%|          | 228/100000 [01:42<12:37:37,  2.19it/s]

tensor(1560836., grad_fn=<SumBackward0>)


  0%|          | 229/100000 [01:42<12:36:00,  2.20it/s]

tensor(1550962., grad_fn=<SumBackward0>)


  0%|          | 230/100000 [01:43<12:33:12,  2.21it/s]

tensor(1538714., grad_fn=<SumBackward0>)


  0%|          | 231/100000 [01:43<12:29:26,  2.22it/s]

tensor(1528672., grad_fn=<SumBackward0>)


  0%|          | 232/100000 [01:44<12:23:17,  2.24it/s]

tensor(1518412., grad_fn=<SumBackward0>)


  0%|          | 233/100000 [01:44<12:37:45,  2.19it/s]

tensor(1507546., grad_fn=<SumBackward0>)


  0%|          | 234/100000 [01:45<12:37:17,  2.20it/s]

tensor(1498382., grad_fn=<SumBackward0>)


  0%|          | 235/100000 [01:45<12:34:39,  2.20it/s]

tensor(1488650., grad_fn=<SumBackward0>)


  0%|          | 236/100000 [01:46<12:29:54,  2.22it/s]

tensor(1479694., grad_fn=<SumBackward0>)


  0%|          | 237/100000 [01:46<12:39:02,  2.19it/s]

tensor(1468674., grad_fn=<SumBackward0>)


  0%|          | 238/100000 [01:47<12:33:35,  2.21it/s]

tensor(1460298., grad_fn=<SumBackward0>)


  0%|          | 239/100000 [01:47<12:27:01,  2.23it/s]

tensor(1450500., grad_fn=<SumBackward0>)


  0%|          | 240/100000 [01:47<12:44:15,  2.18it/s]

tensor(1442678., grad_fn=<SumBackward0>)


  0%|          | 241/100000 [01:48<13:29:54,  2.05it/s]

tensor(1433646., grad_fn=<SumBackward0>)


  0%|          | 242/100000 [01:48<13:04:35,  2.12it/s]

tensor(1425458., grad_fn=<SumBackward0>)


  0%|          | 243/100000 [01:49<12:43:04,  2.18it/s]

tensor(1418082., grad_fn=<SumBackward0>)


  0%|          | 244/100000 [01:49<12:28:58,  2.22it/s]

tensor(1408892., grad_fn=<SumBackward0>)


  0%|          | 245/100000 [01:50<12:26:41,  2.23it/s]

tensor(1399610., grad_fn=<SumBackward0>)


  0%|          | 246/100000 [01:50<12:28:48,  2.22it/s]

tensor(1392586., grad_fn=<SumBackward0>)


  0%|          | 247/100000 [01:51<12:19:16,  2.25it/s]

tensor(1383506., grad_fn=<SumBackward0>)


  0%|          | 248/100000 [01:51<12:18:15,  2.25it/s]

tensor(1377634., grad_fn=<SumBackward0>)


  0%|          | 249/100000 [01:52<12:34:39,  2.20it/s]

tensor(1371352., grad_fn=<SumBackward0>)


  0%|          | 250/100000 [01:52<12:50:41,  2.16it/s]

tensor(1362688., grad_fn=<SumBackward0>)


  0%|          | 251/100000 [01:52<12:37:49,  2.19it/s]

tensor(1354044., grad_fn=<SumBackward0>)


  0%|          | 252/100000 [01:53<12:27:24,  2.22it/s]

tensor(1347368., grad_fn=<SumBackward0>)


  0%|          | 253/100000 [01:53<12:54:54,  2.15it/s]

tensor(1342020., grad_fn=<SumBackward0>)


  0%|          | 254/100000 [01:54<12:36:08,  2.20it/s]

tensor(1332970., grad_fn=<SumBackward0>)


  0%|          | 255/100000 [01:54<12:21:51,  2.24it/s]

tensor(1326864., grad_fn=<SumBackward0>)


  0%|          | 256/100000 [01:55<12:17:28,  2.25it/s]

tensor(1321182., grad_fn=<SumBackward0>)


  0%|          | 257/100000 [01:55<12:23:26,  2.24it/s]

tensor(1313010., grad_fn=<SumBackward0>)


  0%|          | 258/100000 [01:56<13:26:24,  2.06it/s]

tensor(1308532., grad_fn=<SumBackward0>)


  0%|          | 259/100000 [01:56<13:02:07,  2.13it/s]

tensor(1301070., grad_fn=<SumBackward0>)


  0%|          | 260/100000 [01:57<12:39:44,  2.19it/s]

tensor(1294568., grad_fn=<SumBackward0>)


  0%|          | 261/100000 [01:57<12:23:23,  2.24it/s]

tensor(1290632., grad_fn=<SumBackward0>)


  0%|          | 262/100000 [01:57<12:12:04,  2.27it/s]

tensor(1283800., grad_fn=<SumBackward0>)


  0%|          | 263/100000 [01:58<12:04:34,  2.29it/s]

tensor(1277430., grad_fn=<SumBackward0>)


  0%|          | 264/100000 [01:58<11:58:33,  2.31it/s]

tensor(1271116., grad_fn=<SumBackward0>)


  0%|          | 265/100000 [01:59<11:54:23,  2.33it/s]

tensor(1265474., grad_fn=<SumBackward0>)


  0%|          | 266/100000 [01:59<11:52:14,  2.33it/s]

tensor(1259196., grad_fn=<SumBackward0>)


  0%|          | 267/100000 [02:00<11:50:46,  2.34it/s]

tensor(1253572., grad_fn=<SumBackward0>)


  0%|          | 268/100000 [02:00<11:49:28,  2.34it/s]

tensor(1249008., grad_fn=<SumBackward0>)


  0%|          | 269/100000 [02:00<11:49:08,  2.34it/s]

tensor(1243940., grad_fn=<SumBackward0>)


  0%|          | 270/100000 [02:01<11:48:07,  2.35it/s]

tensor(1239962., grad_fn=<SumBackward0>)


  0%|          | 271/100000 [02:01<11:47:39,  2.35it/s]

tensor(1232962., grad_fn=<SumBackward0>)


  0%|          | 272/100000 [02:02<11:47:45,  2.35it/s]

tensor(1230360., grad_fn=<SumBackward0>)


  0%|          | 273/100000 [02:02<11:47:01,  2.35it/s]

tensor(1226704., grad_fn=<SumBackward0>)


  0%|          | 274/100000 [02:03<11:46:47,  2.35it/s]

tensor(1220668., grad_fn=<SumBackward0>)


  0%|          | 275/100000 [02:03<11:56:49,  2.32it/s]

tensor(1215626., grad_fn=<SumBackward0>)


  0%|          | 276/100000 [02:03<12:04:07,  2.30it/s]

tensor(1210256., grad_fn=<SumBackward0>)


  0%|          | 277/100000 [02:04<12:02:02,  2.30it/s]

tensor(1206654., grad_fn=<SumBackward0>)


  0%|          | 278/100000 [02:04<11:56:57,  2.32it/s]

tensor(1202168., grad_fn=<SumBackward0>)


  0%|          | 279/100000 [02:05<12:34:34,  2.20it/s]

tensor(1197772., grad_fn=<SumBackward0>)


  0%|          | 280/100000 [02:05<12:56:40,  2.14it/s]

tensor(1192294., grad_fn=<SumBackward0>)


  0%|          | 281/100000 [02:06<12:43:52,  2.18it/s]

tensor(1187638., grad_fn=<SumBackward0>)


  0%|          | 282/100000 [02:06<12:41:51,  2.18it/s]

tensor(1183244., grad_fn=<SumBackward0>)


  0%|          | 283/100000 [02:07<12:35:01,  2.20it/s]

tensor(1179436., grad_fn=<SumBackward0>)


  0%|          | 284/100000 [02:07<12:29:49,  2.22it/s]

tensor(1177206., grad_fn=<SumBackward0>)


  0%|          | 285/100000 [02:08<13:38:37,  2.03it/s]

tensor(1172804., grad_fn=<SumBackward0>)


  0%|          | 286/100000 [02:08<13:08:11,  2.11it/s]

tensor(1168654., grad_fn=<SumBackward0>)


  0%|          | 287/100000 [02:09<12:51:48,  2.15it/s]

tensor(1163766., grad_fn=<SumBackward0>)


  0%|          | 288/100000 [02:09<13:03:32,  2.12it/s]

tensor(1158956., grad_fn=<SumBackward0>)


  0%|          | 289/100000 [02:09<12:50:26,  2.16it/s]

tensor(1155514., grad_fn=<SumBackward0>)


  0%|          | 290/100000 [02:10<12:38:37,  2.19it/s]

tensor(1151480., grad_fn=<SumBackward0>)


  0%|          | 291/100000 [02:10<12:35:27,  2.20it/s]

tensor(1147204., grad_fn=<SumBackward0>)


  0%|          | 292/100000 [02:11<12:32:07,  2.21it/s]

tensor(1143974., grad_fn=<SumBackward0>)


  0%|          | 293/100000 [02:11<12:33:31,  2.21it/s]

tensor(1139500., grad_fn=<SumBackward0>)


  0%|          | 294/100000 [02:12<12:35:08,  2.20it/s]

tensor(1136750., grad_fn=<SumBackward0>)


  0%|          | 295/100000 [02:12<12:32:51,  2.21it/s]

tensor(1131762., grad_fn=<SumBackward0>)


  0%|          | 296/100000 [02:13<12:28:24,  2.22it/s]

tensor(1129108., grad_fn=<SumBackward0>)


  0%|          | 297/100000 [02:13<12:24:36,  2.23it/s]

tensor(1127072., grad_fn=<SumBackward0>)


  0%|          | 298/100000 [02:13<12:24:03,  2.23it/s]

tensor(1124132., grad_fn=<SumBackward0>)


  0%|          | 299/100000 [02:14<12:30:01,  2.22it/s]

tensor(1121204., grad_fn=<SumBackward0>)


  0%|          | 300/100000 [02:14<12:33:41,  2.20it/s]

tensor(1118644., grad_fn=<SumBackward0>)


  0%|          | 301/100000 [02:15<12:32:35,  2.21it/s]

tensor(1115384., grad_fn=<SumBackward0>)


  0%|          | 302/100000 [02:15<12:27:26,  2.22it/s]

tensor(1111772., grad_fn=<SumBackward0>)


  0%|          | 303/100000 [02:16<12:22:15,  2.24it/s]

tensor(1109420., grad_fn=<SumBackward0>)


  0%|          | 304/100000 [02:16<12:18:52,  2.25it/s]

tensor(1105532., grad_fn=<SumBackward0>)


  0%|          | 305/100000 [02:17<13:00:09,  2.13it/s]

tensor(1102946., grad_fn=<SumBackward0>)


  0%|          | 306/100000 [02:17<13:16:52,  2.09it/s]

tensor(1099550., grad_fn=<SumBackward0>)


  0%|          | 307/100000 [02:18<12:51:21,  2.15it/s]

tensor(1099028., grad_fn=<SumBackward0>)


  0%|          | 308/100000 [02:18<12:31:45,  2.21it/s]

tensor(1094118., grad_fn=<SumBackward0>)


  0%|          | 309/100000 [02:18<12:17:04,  2.25it/s]

tensor(1089982., grad_fn=<SumBackward0>)


  0%|          | 310/100000 [02:19<12:06:38,  2.29it/s]

tensor(1089306., grad_fn=<SumBackward0>)


  0%|          | 311/100000 [02:19<11:59:51,  2.31it/s]

tensor(1086206., grad_fn=<SumBackward0>)


  0%|          | 312/100000 [02:20<11:54:21,  2.33it/s]

tensor(1083660., grad_fn=<SumBackward0>)


  0%|          | 313/100000 [02:20<11:50:03,  2.34it/s]

tensor(1080104., grad_fn=<SumBackward0>)


  0%|          | 314/100000 [02:21<11:48:06,  2.35it/s]

tensor(1078826., grad_fn=<SumBackward0>)


  0%|          | 315/100000 [02:21<11:45:36,  2.35it/s]

tensor(1075052., grad_fn=<SumBackward0>)


  0%|          | 316/100000 [02:21<11:44:16,  2.36it/s]

tensor(1072312., grad_fn=<SumBackward0>)


  0%|          | 317/100000 [02:22<11:48:56,  2.34it/s]

tensor(1069660., grad_fn=<SumBackward0>)


  0%|          | 318/100000 [02:22<11:47:04,  2.35it/s]

tensor(1066882., grad_fn=<SumBackward0>)


  0%|          | 319/100000 [02:23<11:45:32,  2.35it/s]

tensor(1065438., grad_fn=<SumBackward0>)


  0%|          | 320/100000 [02:23<11:45:06,  2.36it/s]

tensor(1060322., grad_fn=<SumBackward0>)


  0%|          | 321/100000 [02:24<11:46:39,  2.35it/s]

tensor(1058680., grad_fn=<SumBackward0>)


  0%|          | 322/100000 [02:24<11:45:38,  2.35it/s]

tensor(1057546., grad_fn=<SumBackward0>)


  0%|          | 323/100000 [02:24<11:44:53,  2.36it/s]

tensor(1055746., grad_fn=<SumBackward0>)


  0%|          | 324/100000 [02:25<11:43:57,  2.36it/s]

tensor(1051276., grad_fn=<SumBackward0>)


  0%|          | 325/100000 [02:25<11:43:03,  2.36it/s]

tensor(1050966., grad_fn=<SumBackward0>)


  0%|          | 326/100000 [02:26<11:43:00,  2.36it/s]

tensor(1047316., grad_fn=<SumBackward0>)


  0%|          | 327/100000 [02:26<11:42:24,  2.37it/s]

tensor(1045198., grad_fn=<SumBackward0>)


  0%|          | 328/100000 [02:27<11:41:56,  2.37it/s]

tensor(1041850., grad_fn=<SumBackward0>)


  0%|          | 329/100000 [02:27<11:42:41,  2.36it/s]

tensor(1039040., grad_fn=<SumBackward0>)


  0%|          | 330/100000 [02:27<11:43:16,  2.36it/s]

tensor(1038500., grad_fn=<SumBackward0>)


  0%|          | 331/100000 [02:28<11:46:50,  2.35it/s]

tensor(1035236., grad_fn=<SumBackward0>)


  0%|          | 332/100000 [02:28<11:45:27,  2.35it/s]

tensor(1033322., grad_fn=<SumBackward0>)


  0%|          | 333/100000 [02:29<11:43:58,  2.36it/s]

tensor(1030898., grad_fn=<SumBackward0>)


  0%|          | 334/100000 [02:29<11:42:55,  2.36it/s]

tensor(1029582., grad_fn=<SumBackward0>)


  0%|          | 335/100000 [02:30<11:43:08,  2.36it/s]

tensor(1026134., grad_fn=<SumBackward0>)


  0%|          | 336/100000 [02:30<11:42:26,  2.36it/s]

tensor(1023526., grad_fn=<SumBackward0>)


  0%|          | 337/100000 [02:30<11:41:40,  2.37it/s]

tensor(1022546., grad_fn=<SumBackward0>)


  0%|          | 338/100000 [02:31<11:41:50,  2.37it/s]

tensor(1022384., grad_fn=<SumBackward0>)


  0%|          | 339/100000 [02:31<11:41:23,  2.37it/s]

tensor(1018462., grad_fn=<SumBackward0>)


  0%|          | 340/100000 [02:32<11:41:42,  2.37it/s]

tensor(1016186., grad_fn=<SumBackward0>)


  0%|          | 341/100000 [02:32<11:42:21,  2.36it/s]

tensor(1014516., grad_fn=<SumBackward0>)


  0%|          | 342/100000 [02:32<11:41:48,  2.37it/s]

tensor(1010934., grad_fn=<SumBackward0>)


  0%|          | 343/100000 [02:33<11:41:45,  2.37it/s]

tensor(1009552., grad_fn=<SumBackward0>)


  0%|          | 344/100000 [02:33<11:41:58,  2.37it/s]

tensor(1007984., grad_fn=<SumBackward0>)


  0%|          | 345/100000 [02:34<11:41:28,  2.37it/s]

tensor(1006168., grad_fn=<SumBackward0>)


  0%|          | 346/100000 [02:34<11:41:38,  2.37it/s]

tensor(1004996., grad_fn=<SumBackward0>)


  0%|          | 347/100000 [02:35<11:42:05,  2.37it/s]

tensor(1003544., grad_fn=<SumBackward0>)


  0%|          | 348/100000 [02:35<11:42:58,  2.36it/s]

tensor(1001180., grad_fn=<SumBackward0>)


  0%|          | 349/100000 [02:35<11:42:54,  2.36it/s]

tensor(999310., grad_fn=<SumBackward0>)


  0%|          | 350/100000 [02:36<11:43:20,  2.36it/s]

tensor(997632., grad_fn=<SumBackward0>)


  0%|          | 351/100000 [02:36<11:42:32,  2.36it/s]

tensor(994672., grad_fn=<SumBackward0>)


  0%|          | 352/100000 [02:37<11:43:29,  2.36it/s]

tensor(992296., grad_fn=<SumBackward0>)


  0%|          | 353/100000 [02:37<11:43:26,  2.36it/s]

tensor(991738., grad_fn=<SumBackward0>)


  0%|          | 356/100000 [02:38<11:42:07,  2.37it/s]

tensor(988306., grad_fn=<SumBackward0>)


  0%|          | 357/100000 [02:39<11:41:42,  2.37it/s]

tensor(984544., grad_fn=<SumBackward0>)


  0%|          | 358/100000 [02:39<11:41:54,  2.37it/s]

tensor(980446., grad_fn=<SumBackward0>)


  0%|          | 359/100000 [02:40<11:42:21,  2.36it/s]

tensor(977102., grad_fn=<SumBackward0>)


  0%|          | 360/100000 [02:40<11:42:04,  2.37it/s]

tensor(974102., grad_fn=<SumBackward0>)


  0%|          | 361/100000 [02:40<11:42:39,  2.36it/s]

tensor(971116., grad_fn=<SumBackward0>)


  0%|          | 362/100000 [02:41<11:42:41,  2.36it/s]

tensor(968468., grad_fn=<SumBackward0>)


  0%|          | 363/100000 [02:41<11:42:03,  2.37it/s]

tensor(967068., grad_fn=<SumBackward0>)


  0%|          | 364/100000 [02:42<11:42:42,  2.36it/s]

tensor(964920., grad_fn=<SumBackward0>)


  0%|          | 365/100000 [02:42<11:44:28,  2.36it/s]

tensor(962218., grad_fn=<SumBackward0>)


  0%|          | 366/100000 [02:43<11:43:22,  2.36it/s]

tensor(960514., grad_fn=<SumBackward0>)


  0%|          | 367/100000 [02:43<11:43:25,  2.36it/s]

tensor(957976., grad_fn=<SumBackward0>)


  0%|          | 368/100000 [02:43<11:43:25,  2.36it/s]

tensor(955080., grad_fn=<SumBackward0>)


  0%|          | 369/100000 [02:44<11:42:52,  2.36it/s]

tensor(953760., grad_fn=<SumBackward0>)


  0%|          | 370/100000 [02:44<11:42:44,  2.36it/s]

tensor(951358., grad_fn=<SumBackward0>)


  0%|          | 371/100000 [02:45<11:42:59,  2.36it/s]

tensor(949462., grad_fn=<SumBackward0>)


  0%|          | 372/100000 [02:45<11:42:32,  2.36it/s]

tensor(947776., grad_fn=<SumBackward0>)


  0%|          | 373/100000 [02:46<11:42:52,  2.36it/s]

tensor(945710., grad_fn=<SumBackward0>)


  0%|          | 374/100000 [02:46<11:43:02,  2.36it/s]

tensor(944642., grad_fn=<SumBackward0>)


  0%|          | 375/100000 [02:46<11:42:46,  2.36it/s]

tensor(942618., grad_fn=<SumBackward0>)


  0%|          | 376/100000 [02:47<11:42:38,  2.36it/s]

tensor(941610., grad_fn=<SumBackward0>)


  0%|          | 377/100000 [02:47<11:43:02,  2.36it/s]

tensor(939988., grad_fn=<SumBackward0>)


  0%|          | 378/100000 [02:48<11:43:11,  2.36it/s]

tensor(937674., grad_fn=<SumBackward0>)


  0%|          | 379/100000 [02:48<11:42:58,  2.36it/s]

tensor(936478., grad_fn=<SumBackward0>)


  0%|          | 380/100000 [02:49<11:43:22,  2.36it/s]

tensor(935106., grad_fn=<SumBackward0>)


  0%|          | 381/100000 [02:49<11:51:17,  2.33it/s]

tensor(933606., grad_fn=<SumBackward0>)


  0%|          | 382/100000 [02:49<11:49:15,  2.34it/s]

tensor(932034., grad_fn=<SumBackward0>)


  0%|          | 383/100000 [02:50<11:47:43,  2.35it/s]

tensor(930902., grad_fn=<SumBackward0>)


  0%|          | 384/100000 [02:50<11:46:09,  2.35it/s]

tensor(929700., grad_fn=<SumBackward0>)


  0%|          | 385/100000 [02:51<11:45:38,  2.35it/s]

tensor(929150., grad_fn=<SumBackward0>)


  0%|          | 386/100000 [02:51<11:45:38,  2.35it/s]

tensor(928198., grad_fn=<SumBackward0>)


  0%|          | 387/100000 [02:52<11:44:27,  2.36it/s]

tensor(926528., grad_fn=<SumBackward0>)


  0%|          | 388/100000 [02:52<11:44:24,  2.36it/s]

tensor(925126., grad_fn=<SumBackward0>)


  0%|          | 389/100000 [02:52<11:43:48,  2.36it/s]

tensor(924278., grad_fn=<SumBackward0>)


  0%|          | 390/100000 [02:53<11:43:48,  2.36it/s]

tensor(923480., grad_fn=<SumBackward0>)


  0%|          | 391/100000 [02:53<11:43:47,  2.36it/s]

tensor(922126., grad_fn=<SumBackward0>)


  0%|          | 392/100000 [02:54<11:44:23,  2.36it/s]

tensor(920982., grad_fn=<SumBackward0>)


  0%|          | 393/100000 [02:54<11:43:59,  2.36it/s]

tensor(919636., grad_fn=<SumBackward0>)


  0%|          | 394/100000 [02:54<11:45:09,  2.35it/s]

tensor(919020., grad_fn=<SumBackward0>)


  0%|          | 395/100000 [02:55<11:45:13,  2.35it/s]

tensor(917580., grad_fn=<SumBackward0>)


  0%|          | 396/100000 [02:55<11:44:58,  2.35it/s]

tensor(916600., grad_fn=<SumBackward0>)


  0%|          | 397/100000 [02:56<11:44:56,  2.35it/s]

tensor(916086., grad_fn=<SumBackward0>)


  0%|          | 398/100000 [02:56<11:44:58,  2.35it/s]

tensor(915350., grad_fn=<SumBackward0>)


  0%|          | 399/100000 [02:57<11:45:01,  2.35it/s]

tensor(914308., grad_fn=<SumBackward0>)


  0%|          | 400/100000 [02:57<11:44:31,  2.36it/s]

tensor(913510., grad_fn=<SumBackward0>)


  0%|          | 401/100000 [02:57<11:44:20,  2.36it/s]

tensor(912862., grad_fn=<SumBackward0>)


  0%|          | 402/100000 [02:58<11:44:11,  2.36it/s]

tensor(911302., grad_fn=<SumBackward0>)


  0%|          | 403/100000 [02:58<11:45:04,  2.35it/s]

tensor(910486., grad_fn=<SumBackward0>)


  0%|          | 404/100000 [02:59<11:45:28,  2.35it/s]

tensor(909448., grad_fn=<SumBackward0>)


  0%|          | 405/100000 [02:59<11:44:52,  2.35it/s]

tensor(909388., grad_fn=<SumBackward0>)


  0%|          | 406/100000 [03:00<11:45:16,  2.35it/s]

tensor(907634., grad_fn=<SumBackward0>)


  0%|          | 407/100000 [03:00<11:45:09,  2.35it/s]

tensor(906712., grad_fn=<SumBackward0>)


  0%|          | 408/100000 [03:00<11:44:44,  2.36it/s]

tensor(905398., grad_fn=<SumBackward0>)


  0%|          | 409/100000 [03:01<11:46:50,  2.35it/s]

tensor(904870., grad_fn=<SumBackward0>)


  0%|          | 410/100000 [03:01<11:46:49,  2.35it/s]

tensor(904622., grad_fn=<SumBackward0>)


  0%|          | 411/100000 [03:02<11:46:08,  2.35it/s]

tensor(902870., grad_fn=<SumBackward0>)


  0%|          | 412/100000 [03:02<11:46:05,  2.35it/s]

tensor(902562., grad_fn=<SumBackward0>)


  0%|          | 413/100000 [03:03<11:46:11,  2.35it/s]

tensor(902066., grad_fn=<SumBackward0>)


  0%|          | 414/100000 [03:03<11:46:10,  2.35it/s]

tensor(901084., grad_fn=<SumBackward0>)


  0%|          | 415/100000 [03:03<11:46:08,  2.35it/s]

tensor(899662., grad_fn=<SumBackward0>)


  0%|          | 416/100000 [03:04<11:46:47,  2.35it/s]

tensor(899052., grad_fn=<SumBackward0>)


  0%|          | 417/100000 [03:04<11:47:52,  2.34it/s]

tensor(897942., grad_fn=<SumBackward0>)


  0%|          | 418/100000 [03:05<11:47:38,  2.35it/s]

tensor(896886., grad_fn=<SumBackward0>)


  0%|          | 419/100000 [03:05<11:47:18,  2.35it/s]

tensor(895860., grad_fn=<SumBackward0>)


  0%|          | 420/100000 [03:06<11:47:07,  2.35it/s]

tensor(895246., grad_fn=<SumBackward0>)


  0%|          | 421/100000 [03:06<11:46:56,  2.35it/s]

tensor(894762., grad_fn=<SumBackward0>)


  0%|          | 422/100000 [03:06<11:51:50,  2.33it/s]

tensor(893692., grad_fn=<SumBackward0>)


  0%|          | 423/100000 [03:07<11:50:04,  2.34it/s]

tensor(893270., grad_fn=<SumBackward0>)


  0%|          | 424/100000 [03:07<11:48:23,  2.34it/s]

tensor(892100., grad_fn=<SumBackward0>)


  0%|          | 426/100000 [03:08<11:47:12,  2.35it/s]

tensor(891284., grad_fn=<SumBackward0>)


  0%|          | 427/100000 [03:09<11:47:05,  2.35it/s]

tensor(889870., grad_fn=<SumBackward0>)


  0%|          | 428/100000 [03:09<11:46:50,  2.35it/s]

tensor(889734., grad_fn=<SumBackward0>)


  0%|          | 429/100000 [03:09<11:46:19,  2.35it/s]

tensor(888676., grad_fn=<SumBackward0>)


  0%|          | 430/100000 [03:10<11:45:25,  2.35it/s]

tensor(888260., grad_fn=<SumBackward0>)


  0%|          | 431/100000 [03:10<11:45:39,  2.35it/s]

tensor(887556., grad_fn=<SumBackward0>)


  0%|          | 432/100000 [03:11<11:45:46,  2.35it/s]

tensor(886850., grad_fn=<SumBackward0>)


  0%|          | 433/100000 [03:11<11:45:29,  2.35it/s]

tensor(886730., grad_fn=<SumBackward0>)


  0%|          | 434/100000 [03:12<11:45:42,  2.35it/s]

tensor(885718., grad_fn=<SumBackward0>)


  0%|          | 435/100000 [03:12<11:45:39,  2.35it/s]

tensor(885558., grad_fn=<SumBackward0>)


  0%|          | 436/100000 [03:12<11:45:37,  2.35it/s]

tensor(885144., grad_fn=<SumBackward0>)


  0%|          | 437/100000 [03:13<11:46:08,  2.35it/s]

tensor(884600., grad_fn=<SumBackward0>)


  0%|          | 438/100000 [03:13<11:47:06,  2.35it/s]

tensor(883166., grad_fn=<SumBackward0>)


  0%|          | 439/100000 [03:14<11:50:03,  2.34it/s]

tensor(882510., grad_fn=<SumBackward0>)


  0%|          | 440/100000 [03:14<11:48:32,  2.34it/s]

tensor(882274., grad_fn=<SumBackward0>)


  0%|          | 441/100000 [03:15<11:47:51,  2.34it/s]

tensor(881574., grad_fn=<SumBackward0>)


  0%|          | 442/100000 [03:15<11:47:47,  2.34it/s]

tensor(880814., grad_fn=<SumBackward0>)


  0%|          | 443/100000 [03:15<11:49:18,  2.34it/s]

tensor(879876., grad_fn=<SumBackward0>)


  0%|          | 444/100000 [03:16<11:48:45,  2.34it/s]

tensor(878746., grad_fn=<SumBackward0>)


  0%|          | 445/100000 [03:16<11:48:26,  2.34it/s]

tensor(878592., grad_fn=<SumBackward0>)


  0%|          | 446/100000 [03:17<11:48:46,  2.34it/s]

tensor(877262., grad_fn=<SumBackward0>)


  0%|          | 447/100000 [03:17<11:48:11,  2.34it/s]

tensor(876756., grad_fn=<SumBackward0>)


  0%|          | 448/100000 [03:17<11:47:41,  2.34it/s]

tensor(876122., grad_fn=<SumBackward0>)


  0%|          | 449/100000 [03:18<11:47:54,  2.34it/s]

tensor(875384., grad_fn=<SumBackward0>)


  0%|          | 451/100000 [03:19<11:48:55,  2.34it/s]

tensor(874390., grad_fn=<SumBackward0>)


  0%|          | 453/100000 [03:20<11:47:18,  2.35it/s]

tensor(872954., grad_fn=<SumBackward0>)


  0%|          | 454/100000 [03:20<12:58:27,  2.13it/s]

tensor(872810., grad_fn=<SumBackward0>)


  0%|          | 455/100000 [03:21<12:50:56,  2.15it/s]

tensor(872522., grad_fn=<SumBackward0>)


  0%|          | 456/100000 [03:21<12:31:24,  2.21it/s]

tensor(871676., grad_fn=<SumBackward0>)


  0%|          | 457/100000 [03:22<12:18:03,  2.25it/s]

tensor(871402., grad_fn=<SumBackward0>)


  0%|          | 458/100000 [03:22<12:08:45,  2.28it/s]

tensor(870784., grad_fn=<SumBackward0>)


  0%|          | 459/100000 [03:22<12:02:05,  2.30it/s]

tensor(870020., grad_fn=<SumBackward0>)


  0%|          | 460/100000 [03:23<11:59:19,  2.31it/s]

tensor(868988., grad_fn=<SumBackward0>)


  0%|          | 463/100000 [03:24<11:49:18,  2.34it/s]

tensor(868088., grad_fn=<SumBackward0>)


  0%|          | 464/100000 [03:24<11:48:25,  2.34it/s]

tensor(868010., grad_fn=<SumBackward0>)


  0%|          | 468/100000 [03:26<11:45:31,  2.35it/s]

tensor(867690., grad_fn=<SumBackward0>)


  0%|          | 470/100000 [03:27<11:45:38,  2.35it/s]

tensor(867372., grad_fn=<SumBackward0>)


  0%|          | 471/100000 [03:27<11:45:33,  2.35it/s]

tensor(867230., grad_fn=<SumBackward0>)


  0%|          | 472/100000 [03:28<11:45:58,  2.35it/s]

tensor(865586., grad_fn=<SumBackward0>)


  0%|          | 473/100000 [03:28<11:46:21,  2.35it/s]

tensor(865174., grad_fn=<SumBackward0>)


  0%|          | 475/100000 [03:29<11:45:40,  2.35it/s]

tensor(864412., grad_fn=<SumBackward0>)


  0%|          | 476/100000 [03:30<11:46:28,  2.35it/s]

tensor(864042., grad_fn=<SumBackward0>)


  0%|          | 477/100000 [03:30<11:46:46,  2.35it/s]

tensor(863992., grad_fn=<SumBackward0>)


  0%|          | 478/100000 [03:30<11:47:07,  2.35it/s]

tensor(863680., grad_fn=<SumBackward0>)


  0%|          | 479/100000 [03:31<11:47:00,  2.35it/s]

tensor(863306., grad_fn=<SumBackward0>)


  0%|          | 480/100000 [03:31<11:47:46,  2.34it/s]

tensor(862128., grad_fn=<SumBackward0>)


  0%|          | 481/100000 [03:32<11:47:42,  2.34it/s]

tensor(861986., grad_fn=<SumBackward0>)


  0%|          | 483/100000 [03:33<11:46:45,  2.35it/s]

tensor(861238., grad_fn=<SumBackward0>)


  0%|          | 485/100000 [03:33<11:46:34,  2.35it/s]

tensor(860458., grad_fn=<SumBackward0>)


  0%|          | 486/100000 [03:34<11:46:38,  2.35it/s]

tensor(860062., grad_fn=<SumBackward0>)


  0%|          | 487/100000 [03:34<11:46:50,  2.35it/s]

tensor(859542., grad_fn=<SumBackward0>)


  0%|          | 488/100000 [03:35<11:49:46,  2.34it/s]

tensor(858570., grad_fn=<SumBackward0>)


  0%|          | 489/100000 [03:35<11:49:17,  2.34it/s]

tensor(858488., grad_fn=<SumBackward0>)


  0%|          | 490/100000 [03:36<11:48:42,  2.34it/s]

tensor(858030., grad_fn=<SumBackward0>)


  0%|          | 491/100000 [03:36<11:48:35,  2.34it/s]

tensor(857322., grad_fn=<SumBackward0>)


  0%|          | 492/100000 [03:36<11:48:09,  2.34it/s]

tensor(856854., grad_fn=<SumBackward0>)


  0%|          | 493/100000 [03:37<11:47:55,  2.34it/s]

tensor(856448., grad_fn=<SumBackward0>)


  0%|          | 494/100000 [03:37<11:47:43,  2.34it/s]

tensor(855696., grad_fn=<SumBackward0>)


  0%|          | 495/100000 [03:38<11:47:54,  2.34it/s]

tensor(855124., grad_fn=<SumBackward0>)


  0%|          | 496/100000 [03:38<11:47:43,  2.34it/s]

tensor(854674., grad_fn=<SumBackward0>)


  0%|          | 498/100000 [03:39<11:47:11,  2.34it/s]

tensor(853380., grad_fn=<SumBackward0>)


  0%|          | 499/100000 [03:39<11:46:45,  2.35it/s]

tensor(853152., grad_fn=<SumBackward0>)


  0%|          | 500/100000 [03:40<11:47:02,  2.35it/s]

tensor(852816., grad_fn=<SumBackward0>)


  1%|          | 502/100000 [03:41<11:46:25,  2.35it/s]

tensor(851336., grad_fn=<SumBackward0>)


  1%|          | 505/100000 [03:42<11:47:34,  2.34it/s]

tensor(850644., grad_fn=<SumBackward0>)


  1%|          | 506/100000 [03:42<11:47:16,  2.34it/s]

tensor(850134., grad_fn=<SumBackward0>)


  1%|          | 507/100000 [03:43<11:47:23,  2.34it/s]

tensor(849426., grad_fn=<SumBackward0>)


  1%|          | 508/100000 [03:43<11:47:23,  2.34it/s]

tensor(848290., grad_fn=<SumBackward0>)


  1%|          | 510/100000 [03:44<11:47:08,  2.34it/s]

tensor(847914., grad_fn=<SumBackward0>)


  1%|          | 516/100000 [03:47<12:48:29,  2.16it/s]

tensor(847460., grad_fn=<SumBackward0>)


  1%|          | 517/100000 [03:47<12:37:03,  2.19it/s]

tensor(847254., grad_fn=<SumBackward0>)


  1%|          | 518/100000 [03:48<12:34:49,  2.20it/s]

tensor(846506., grad_fn=<SumBackward0>)


  1%|          | 520/100000 [03:49<12:33:59,  2.20it/s]

tensor(846290., grad_fn=<SumBackward0>)


  1%|          | 521/100000 [03:49<12:30:55,  2.21it/s]

tensor(845736., grad_fn=<SumBackward0>)


  1%|          | 522/100000 [03:50<12:40:21,  2.18it/s]

tensor(845080., grad_fn=<SumBackward0>)


  1%|          | 523/100000 [03:50<12:35:17,  2.20it/s]

tensor(844412., grad_fn=<SumBackward0>)


  1%|          | 524/100000 [03:51<12:31:49,  2.21it/s]

tensor(843888., grad_fn=<SumBackward0>)


  1%|          | 526/100000 [03:51<12:29:02,  2.21it/s]

tensor(843288., grad_fn=<SumBackward0>)


  1%|          | 527/100000 [03:52<12:27:03,  2.22it/s]

tensor(843004., grad_fn=<SumBackward0>)


  1%|          | 528/100000 [03:52<12:20:27,  2.24it/s]

tensor(842552., grad_fn=<SumBackward0>)


  1%|          | 530/100000 [03:53<12:21:10,  2.24it/s]

tensor(841916., grad_fn=<SumBackward0>)


  1%|          | 531/100000 [03:54<12:44:02,  2.17it/s]

tensor(841262., grad_fn=<SumBackward0>)


  1%|          | 532/100000 [03:54<12:34:33,  2.20it/s]

tensor(840968., grad_fn=<SumBackward0>)


  1%|          | 533/100000 [03:55<12:30:57,  2.21it/s]

tensor(840714., grad_fn=<SumBackward0>)


  1%|          | 535/100000 [03:56<12:49:27,  2.15it/s]

tensor(840616., grad_fn=<SumBackward0>)


  1%|          | 536/100000 [03:56<12:39:43,  2.18it/s]

tensor(839922., grad_fn=<SumBackward0>)


  1%|          | 537/100000 [03:57<12:31:56,  2.20it/s]

tensor(839534., grad_fn=<SumBackward0>)


  1%|          | 538/100000 [03:57<12:31:09,  2.21it/s]

tensor(838724., grad_fn=<SumBackward0>)


  1%|          | 539/100000 [03:57<12:50:49,  2.15it/s]

tensor(838386., grad_fn=<SumBackward0>)


  1%|          | 541/100000 [03:58<13:12:50,  2.09it/s]

tensor(838166., grad_fn=<SumBackward0>)


  1%|          | 542/100000 [03:59<13:22:38,  2.07it/s]

tensor(837652., grad_fn=<SumBackward0>)


  1%|          | 543/100000 [03:59<13:17:08,  2.08it/s]

tensor(837042., grad_fn=<SumBackward0>)


  1%|          | 544/100000 [04:00<13:02:30,  2.12it/s]

tensor(836948., grad_fn=<SumBackward0>)


  1%|          | 545/100000 [04:00<12:49:39,  2.15it/s]

tensor(836904., grad_fn=<SumBackward0>)


  1%|          | 546/100000 [04:01<12:47:59,  2.16it/s]

tensor(835858., grad_fn=<SumBackward0>)


  1%|          | 548/100000 [04:02<12:47:53,  2.16it/s]

tensor(835402., grad_fn=<SumBackward0>)


  1%|          | 549/100000 [04:02<12:44:02,  2.17it/s]

tensor(834946., grad_fn=<SumBackward0>)


  1%|          | 550/100000 [04:03<12:41:32,  2.18it/s]

tensor(834720., grad_fn=<SumBackward0>)


  1%|          | 552/100000 [04:04<12:46:29,  2.16it/s]

tensor(834460., grad_fn=<SumBackward0>)


  1%|          | 553/100000 [04:04<12:38:08,  2.19it/s]

tensor(834148., grad_fn=<SumBackward0>)


  1%|          | 554/100000 [04:04<12:41:56,  2.18it/s]

tensor(833472., grad_fn=<SumBackward0>)


  1%|          | 555/100000 [04:05<12:52:22,  2.15it/s]

tensor(833132., grad_fn=<SumBackward0>)


  1%|          | 556/100000 [04:05<12:41:25,  2.18it/s]

tensor(832558., grad_fn=<SumBackward0>)


  1%|          | 558/100000 [04:06<12:43:26,  2.17it/s]

tensor(831864., grad_fn=<SumBackward0>)


  1%|          | 559/100000 [04:07<12:41:28,  2.18it/s]

tensor(831548., grad_fn=<SumBackward0>)


  1%|          | 561/100000 [04:08<12:34:56,  2.20it/s]

tensor(830690., grad_fn=<SumBackward0>)


  1%|          | 562/100000 [04:08<12:31:14,  2.21it/s]

tensor(830642., grad_fn=<SumBackward0>)


  1%|          | 565/100000 [04:09<12:46:55,  2.16it/s]

tensor(830204., grad_fn=<SumBackward0>)


  1%|          | 566/100000 [04:10<13:20:11,  2.07it/s]

tensor(829588., grad_fn=<SumBackward0>)


  1%|          | 567/100000 [04:10<13:00:01,  2.12it/s]

tensor(829120., grad_fn=<SumBackward0>)


  1%|          | 568/100000 [04:11<12:42:55,  2.17it/s]

tensor(828520., grad_fn=<SumBackward0>)


  1%|          | 569/100000 [04:11<12:36:07,  2.19it/s]

tensor(828356., grad_fn=<SumBackward0>)


  1%|          | 570/100000 [04:12<12:33:36,  2.20it/s]

tensor(828290., grad_fn=<SumBackward0>)


  1%|          | 571/100000 [04:12<12:40:37,  2.18it/s]

tensor(827558., grad_fn=<SumBackward0>)


  1%|          | 572/100000 [04:13<12:33:54,  2.20it/s]

tensor(827382., grad_fn=<SumBackward0>)


  1%|          | 573/100000 [04:13<12:20:39,  2.24it/s]

tensor(826638., grad_fn=<SumBackward0>)


  1%|          | 575/100000 [04:14<12:05:17,  2.28it/s]

tensor(826566., grad_fn=<SumBackward0>)


  1%|          | 576/100000 [04:14<12:01:41,  2.30it/s]

tensor(826238., grad_fn=<SumBackward0>)


  1%|          | 577/100000 [04:15<11:58:57,  2.30it/s]

tensor(825738., grad_fn=<SumBackward0>)


  1%|          | 578/100000 [04:15<11:57:10,  2.31it/s]

tensor(825680., grad_fn=<SumBackward0>)


  1%|          | 579/100000 [04:16<11:58:13,  2.31it/s]

tensor(825424., grad_fn=<SumBackward0>)


  1%|          | 580/100000 [04:16<11:56:45,  2.31it/s]

tensor(824686., grad_fn=<SumBackward0>)


  1%|          | 581/100000 [04:17<11:55:23,  2.32it/s]

tensor(824542., grad_fn=<SumBackward0>)


  1%|          | 582/100000 [04:17<11:54:52,  2.32it/s]

tensor(824416., grad_fn=<SumBackward0>)


  1%|          | 583/100000 [04:17<11:54:15,  2.32it/s]

tensor(824212., grad_fn=<SumBackward0>)


  1%|          | 584/100000 [04:18<11:53:21,  2.32it/s]

tensor(823500., grad_fn=<SumBackward0>)


  1%|          | 585/100000 [04:18<11:52:24,  2.33it/s]

tensor(823266., grad_fn=<SumBackward0>)


  1%|          | 586/100000 [04:19<11:52:07,  2.33it/s]

tensor(822886., grad_fn=<SumBackward0>)


  1%|          | 587/100000 [04:19<11:51:47,  2.33it/s]

tensor(822722., grad_fn=<SumBackward0>)


  1%|          | 588/100000 [04:20<11:51:48,  2.33it/s]

tensor(822662., grad_fn=<SumBackward0>)


  1%|          | 589/100000 [04:20<11:52:53,  2.32it/s]

tensor(822552., grad_fn=<SumBackward0>)


  1%|          | 590/100000 [04:20<11:52:43,  2.32it/s]

tensor(821864., grad_fn=<SumBackward0>)


  1%|          | 591/100000 [04:21<11:51:55,  2.33it/s]

tensor(821840., grad_fn=<SumBackward0>)


  1%|          | 592/100000 [04:21<11:51:38,  2.33it/s]

tensor(820992., grad_fn=<SumBackward0>)


  1%|          | 593/100000 [04:22<11:52:21,  2.33it/s]

tensor(820896., grad_fn=<SumBackward0>)


  1%|          | 594/100000 [04:22<11:51:44,  2.33it/s]

tensor(820386., grad_fn=<SumBackward0>)


  1%|          | 596/100000 [04:23<11:51:28,  2.33it/s]

tensor(820280., grad_fn=<SumBackward0>)


  1%|          | 597/100000 [04:23<11:50:59,  2.33it/s]

tensor(820260., grad_fn=<SumBackward0>)


  1%|          | 598/100000 [04:24<11:51:47,  2.33it/s]

tensor(819902., grad_fn=<SumBackward0>)


  1%|          | 599/100000 [04:24<11:52:09,  2.33it/s]

tensor(819152., grad_fn=<SumBackward0>)


  1%|          | 601/100000 [04:25<11:51:35,  2.33it/s]

tensor(819118., grad_fn=<SumBackward0>)


  1%|          | 602/100000 [04:26<11:52:04,  2.33it/s]

tensor(818250., grad_fn=<SumBackward0>)


  1%|          | 604/100000 [04:26<11:52:07,  2.33it/s]

tensor(817604., grad_fn=<SumBackward0>)


  1%|          | 606/100000 [04:27<11:51:33,  2.33it/s]

tensor(816926., grad_fn=<SumBackward0>)


  1%|          | 607/100000 [04:28<11:51:52,  2.33it/s]

tensor(816342., grad_fn=<SumBackward0>)


  1%|          | 611/100000 [04:29<11:50:48,  2.33it/s]

tensor(816328., grad_fn=<SumBackward0>)


  1%|          | 612/100000 [04:30<11:51:14,  2.33it/s]

tensor(815674., grad_fn=<SumBackward0>)


  1%|          | 613/100000 [04:30<11:51:34,  2.33it/s]

tensor(814974., grad_fn=<SumBackward0>)


  1%|          | 614/100000 [04:31<11:51:54,  2.33it/s]

tensor(814786., grad_fn=<SumBackward0>)


  1%|          | 615/100000 [04:31<11:51:23,  2.33it/s]

tensor(814542., grad_fn=<SumBackward0>)


  1%|          | 616/100000 [04:32<11:52:42,  2.32it/s]

tensor(814204., grad_fn=<SumBackward0>)


  1%|          | 618/100000 [04:32<11:52:06,  2.33it/s]

tensor(813912., grad_fn=<SumBackward0>)


  1%|          | 619/100000 [04:33<11:52:16,  2.33it/s]

tensor(813182., grad_fn=<SumBackward0>)


  1%|          | 626/100000 [04:36<11:55:54,  2.31it/s]

tensor(812578., grad_fn=<SumBackward0>)


  1%|          | 627/100000 [04:36<11:54:43,  2.32it/s]

tensor(812562., grad_fn=<SumBackward0>)


  1%|          | 635/100000 [04:40<11:51:49,  2.33it/s]

tensor(812336., grad_fn=<SumBackward0>)


  1%|          | 637/100000 [04:41<12:03:45,  2.29it/s]

tensor(811666., grad_fn=<SumBackward0>)


  1%|          | 639/100000 [04:42<12:05:26,  2.28it/s]

tensor(811562., grad_fn=<SumBackward0>)


  1%|          | 640/100000 [04:42<12:06:19,  2.28it/s]

tensor(811510., grad_fn=<SumBackward0>)


  1%|          | 641/100000 [04:42<12:06:18,  2.28it/s]

tensor(811018., grad_fn=<SumBackward0>)


  1%|          | 644/100000 [04:44<12:03:35,  2.29it/s]

tensor(810774., grad_fn=<SumBackward0>)


  1%|          | 646/100000 [04:45<12:03:22,  2.29it/s]

tensor(809928., grad_fn=<SumBackward0>)


  1%|          | 650/100000 [04:46<12:02:18,  2.29it/s]

tensor(809714., grad_fn=<SumBackward0>)


  1%|          | 651/100000 [04:47<12:02:36,  2.29it/s]

tensor(809684., grad_fn=<SumBackward0>)


  1%|          | 652/100000 [04:47<12:02:27,  2.29it/s]

tensor(808988., grad_fn=<SumBackward0>)


  1%|          | 653/100000 [04:48<12:02:30,  2.29it/s]

tensor(808940., grad_fn=<SumBackward0>)


  1%|          | 654/100000 [04:48<12:02:15,  2.29it/s]

tensor(808474., grad_fn=<SumBackward0>)


  1%|          | 659/100000 [04:50<12:02:08,  2.29it/s]

tensor(808402., grad_fn=<SumBackward0>)


  1%|          | 660/100000 [04:51<12:02:59,  2.29it/s]

tensor(808356., grad_fn=<SumBackward0>)


  1%|          | 661/100000 [04:51<12:02:21,  2.29it/s]

tensor(808038., grad_fn=<SumBackward0>)


  1%|          | 663/100000 [04:52<12:01:23,  2.30it/s]

tensor(807946., grad_fn=<SumBackward0>)


  1%|          | 664/100000 [04:52<12:01:22,  2.30it/s]

tensor(807778., grad_fn=<SumBackward0>)


  1%|          | 667/100000 [04:54<12:00:49,  2.30it/s]

tensor(807522., grad_fn=<SumBackward0>)


  1%|          | 668/100000 [04:54<12:01:33,  2.29it/s]

tensor(807066., grad_fn=<SumBackward0>)


  1%|          | 669/100000 [04:55<12:01:22,  2.29it/s]

tensor(806734., grad_fn=<SumBackward0>)


  1%|          | 672/100000 [04:56<12:00:57,  2.30it/s]

tensor(806550., grad_fn=<SumBackward0>)


  1%|          | 673/100000 [04:56<12:01:11,  2.30it/s]

tensor(806174., grad_fn=<SumBackward0>)


  1%|          | 674/100000 [04:57<12:02:40,  2.29it/s]

tensor(805892., grad_fn=<SumBackward0>)


  1%|          | 675/100000 [04:57<12:02:00,  2.29it/s]

tensor(805614., grad_fn=<SumBackward0>)


  1%|          | 676/100000 [04:58<12:01:52,  2.29it/s]

tensor(805444., grad_fn=<SumBackward0>)


  1%|          | 677/100000 [04:58<12:01:53,  2.29it/s]

tensor(804742., grad_fn=<SumBackward0>)


  1%|          | 678/100000 [04:59<12:01:33,  2.29it/s]

tensor(804482., grad_fn=<SumBackward0>)


  1%|          | 679/100000 [04:59<12:01:11,  2.30it/s]

tensor(803882., grad_fn=<SumBackward0>)


  1%|          | 680/100000 [04:59<12:01:40,  2.29it/s]

tensor(803592., grad_fn=<SumBackward0>)


  1%|          | 684/100000 [05:01<11:59:47,  2.30it/s]

tensor(803168., grad_fn=<SumBackward0>)


  1%|          | 686/100000 [05:02<11:59:08,  2.30it/s]

tensor(802454., grad_fn=<SumBackward0>)


  1%|          | 688/100000 [05:03<12:00:50,  2.30it/s]

tensor(802452., grad_fn=<SumBackward0>)


  1%|          | 690/100000 [05:04<11:59:04,  2.30it/s]

tensor(802212., grad_fn=<SumBackward0>)


  1%|          | 691/100000 [05:04<11:58:39,  2.30it/s]

tensor(801814., grad_fn=<SumBackward0>)


  1%|          | 692/100000 [05:05<11:58:42,  2.30it/s]

tensor(801418., grad_fn=<SumBackward0>)


  1%|          | 693/100000 [05:05<11:59:33,  2.30it/s]

tensor(801404., grad_fn=<SumBackward0>)


  1%|          | 694/100000 [05:06<11:59:33,  2.30it/s]

tensor(801224., grad_fn=<SumBackward0>)


  1%|          | 696/100000 [05:06<12:00:33,  2.30it/s]

tensor(800890., grad_fn=<SumBackward0>)


  1%|          | 697/100000 [05:07<12:00:13,  2.30it/s]

tensor(800518., grad_fn=<SumBackward0>)


  1%|          | 698/100000 [05:07<12:00:07,  2.30it/s]

tensor(800306., grad_fn=<SumBackward0>)


  1%|          | 701/100000 [05:09<11:58:46,  2.30it/s]

tensor(799920., grad_fn=<SumBackward0>)


  1%|          | 704/100000 [05:10<11:58:06,  2.30it/s]

tensor(799544., grad_fn=<SumBackward0>)


  1%|          | 706/100000 [05:11<11:58:51,  2.30it/s]

tensor(799000., grad_fn=<SumBackward0>)


  1%|          | 707/100000 [05:11<11:59:36,  2.30it/s]

tensor(798656., grad_fn=<SumBackward0>)


  1%|          | 709/100000 [05:12<11:59:00,  2.30it/s]

tensor(798054., grad_fn=<SumBackward0>)


  1%|          | 710/100000 [05:12<11:59:24,  2.30it/s]

tensor(798052., grad_fn=<SumBackward0>)


  1%|          | 711/100000 [05:13<11:59:32,  2.30it/s]

tensor(797390., grad_fn=<SumBackward0>)


  1%|          | 712/100000 [05:13<12:03:04,  2.29it/s]

tensor(797324., grad_fn=<SumBackward0>)


  1%|          | 714/100000 [05:14<12:04:44,  2.28it/s]

tensor(796936., grad_fn=<SumBackward0>)


  1%|          | 715/100000 [05:15<12:03:04,  2.29it/s]

tensor(796888., grad_fn=<SumBackward0>)


  1%|          | 716/100000 [05:15<12:03:10,  2.29it/s]

tensor(796760., grad_fn=<SumBackward0>)


  1%|          | 717/100000 [05:16<12:03:18,  2.29it/s]

tensor(796208., grad_fn=<SumBackward0>)


  1%|          | 719/100000 [05:16<12:01:19,  2.29it/s]

tensor(796168., grad_fn=<SumBackward0>)


  1%|          | 721/100000 [05:17<11:59:33,  2.30it/s]

tensor(795758., grad_fn=<SumBackward0>)


  1%|          | 724/100000 [05:19<11:59:53,  2.30it/s]

tensor(795252., grad_fn=<SumBackward0>)


  1%|          | 725/100000 [05:19<11:59:55,  2.30it/s]

tensor(794752., grad_fn=<SumBackward0>)


  1%|          | 726/100000 [05:19<12:00:37,  2.30it/s]

tensor(794508., grad_fn=<SumBackward0>)


  1%|          | 727/100000 [05:20<12:09:37,  2.27it/s]

tensor(794468., grad_fn=<SumBackward0>)


  1%|          | 728/100000 [05:20<12:53:13,  2.14it/s]

tensor(794028., grad_fn=<SumBackward0>)


  1%|          | 730/100000 [05:21<12:27:43,  2.21it/s]

tensor(793784., grad_fn=<SumBackward0>)


  1%|          | 731/100000 [05:22<12:20:25,  2.23it/s]

tensor(793314., grad_fn=<SumBackward0>)


  1%|          | 732/100000 [05:22<12:22:52,  2.23it/s]

tensor(793260., grad_fn=<SumBackward0>)


  1%|          | 734/100000 [05:23<12:10:31,  2.26it/s]

tensor(792892., grad_fn=<SumBackward0>)


  1%|          | 736/100000 [05:24<12:04:23,  2.28it/s]

tensor(792484., grad_fn=<SumBackward0>)


  1%|          | 737/100000 [05:24<12:03:04,  2.29it/s]

tensor(792262., grad_fn=<SumBackward0>)


  1%|          | 739/100000 [05:25<12:00:43,  2.30it/s]

tensor(791532., grad_fn=<SumBackward0>)


  1%|          | 742/100000 [05:27<11:59:54,  2.30it/s]

tensor(791454., grad_fn=<SumBackward0>)


  1%|          | 743/100000 [05:27<12:00:07,  2.30it/s]

tensor(791130., grad_fn=<SumBackward0>)


  1%|          | 745/100000 [05:28<12:00:19,  2.30it/s]

tensor(790454., grad_fn=<SumBackward0>)


  1%|          | 747/100000 [05:29<11:59:24,  2.30it/s]

tensor(790344., grad_fn=<SumBackward0>)


  1%|          | 748/100000 [05:29<11:58:18,  2.30it/s]

tensor(789924., grad_fn=<SumBackward0>)


  1%|          | 749/100000 [05:30<11:58:03,  2.30it/s]

tensor(789498., grad_fn=<SumBackward0>)


  1%|          | 752/100000 [05:31<11:59:57,  2.30it/s]

tensor(788960., grad_fn=<SumBackward0>)


  1%|          | 753/100000 [05:31<12:01:04,  2.29it/s]

tensor(788834., grad_fn=<SumBackward0>)


  1%|          | 754/100000 [05:32<12:00:06,  2.30it/s]

tensor(788372., grad_fn=<SumBackward0>)


  1%|          | 760/100000 [05:34<11:57:47,  2.30it/s]

tensor(788222., grad_fn=<SumBackward0>)


  1%|          | 761/100000 [05:35<11:57:41,  2.30it/s]

tensor(787934., grad_fn=<SumBackward0>)


  1%|          | 764/100000 [05:36<12:00:15,  2.30it/s]

tensor(787456., grad_fn=<SumBackward0>)


  1%|          | 765/100000 [05:37<12:00:50,  2.29it/s]

tensor(787392., grad_fn=<SumBackward0>)


  1%|          | 766/100000 [05:37<12:00:02,  2.30it/s]

tensor(786958., grad_fn=<SumBackward0>)


  1%|          | 768/100000 [05:38<12:00:58,  2.29it/s]

tensor(786916., grad_fn=<SumBackward0>)


  1%|          | 770/100000 [05:39<12:01:19,  2.29it/s]

tensor(786566., grad_fn=<SumBackward0>)


  1%|          | 772/100000 [05:40<12:02:17,  2.29it/s]

tensor(785704., grad_fn=<SumBackward0>)


  1%|          | 774/100000 [05:40<12:01:42,  2.29it/s]

tensor(784930., grad_fn=<SumBackward0>)


  1%|          | 776/100000 [05:41<12:00:23,  2.30it/s]

tensor(784910., grad_fn=<SumBackward0>)


  1%|          | 777/100000 [05:42<12:02:13,  2.29it/s]

tensor(784888., grad_fn=<SumBackward0>)


  1%|          | 778/100000 [05:42<12:02:54,  2.29it/s]

tensor(784494., grad_fn=<SumBackward0>)


  1%|          | 779/100000 [05:43<12:02:49,  2.29it/s]

tensor(784234., grad_fn=<SumBackward0>)


  1%|          | 781/100000 [05:44<12:01:21,  2.29it/s]

tensor(784174., grad_fn=<SumBackward0>)


  1%|          | 782/100000 [05:44<12:01:40,  2.29it/s]

tensor(783498., grad_fn=<SumBackward0>)


  1%|          | 784/100000 [05:45<12:00:30,  2.30it/s]

tensor(783460., grad_fn=<SumBackward0>)


  1%|          | 786/100000 [05:46<12:00:08,  2.30it/s]

tensor(783440., grad_fn=<SumBackward0>)


  1%|          | 787/100000 [05:46<11:59:33,  2.30it/s]

tensor(783096., grad_fn=<SumBackward0>)


  1%|          | 790/100000 [05:47<11:59:00,  2.30it/s]

tensor(782848., grad_fn=<SumBackward0>)


  1%|          | 791/100000 [05:48<11:59:56,  2.30it/s]

tensor(782610., grad_fn=<SumBackward0>)


  1%|          | 794/100000 [05:49<11:59:26,  2.30it/s]

tensor(782422., grad_fn=<SumBackward0>)


  1%|          | 800/100000 [05:52<12:00:08,  2.30it/s]

tensor(782286., grad_fn=<SumBackward0>)


  1%|          | 802/100000 [05:53<11:59:38,  2.30it/s]

tensor(782076., grad_fn=<SumBackward0>)


  1%|          | 804/100000 [05:54<11:59:13,  2.30it/s]

tensor(781870., grad_fn=<SumBackward0>)


  1%|          | 807/100000 [05:55<11:57:36,  2.30it/s]

tensor(781658., grad_fn=<SumBackward0>)


  1%|          | 815/100000 [05:58<11:57:59,  2.30it/s]

tensor(781380., grad_fn=<SumBackward0>)


  1%|          | 817/100000 [05:59<11:58:46,  2.30it/s]

tensor(781084., grad_fn=<SumBackward0>)


  1%|          | 821/100000 [06:01<11:58:40,  2.30it/s]

tensor(781076., grad_fn=<SumBackward0>)


  1%|          | 823/100000 [06:02<11:58:39,  2.30it/s]

tensor(780964., grad_fn=<SumBackward0>)


  1%|          | 824/100000 [06:02<11:59:06,  2.30it/s]

tensor(780884., grad_fn=<SumBackward0>)


  1%|          | 826/100000 [06:03<11:59:57,  2.30it/s]

tensor(780286., grad_fn=<SumBackward0>)


  1%|          | 829/100000 [06:04<12:09:08,  2.27it/s]

tensor(780148., grad_fn=<SumBackward0>)


  1%|          | 830/100000 [06:05<12:08:11,  2.27it/s]

tensor(779658., grad_fn=<SumBackward0>)


  1%|          | 831/100000 [06:05<12:05:38,  2.28it/s]

tensor(779490., grad_fn=<SumBackward0>)


  1%|          | 832/100000 [06:06<12:03:50,  2.28it/s]

tensor(779366., grad_fn=<SumBackward0>)


  1%|          | 833/100000 [06:06<12:01:43,  2.29it/s]

tensor(778716., grad_fn=<SumBackward0>)


  1%|          | 839/100000 [06:09<11:58:09,  2.30it/s]

tensor(778226., grad_fn=<SumBackward0>)


  1%|          | 842/100000 [06:10<11:58:09,  2.30it/s]

tensor(778180., grad_fn=<SumBackward0>)


  1%|          | 843/100000 [06:11<11:59:10,  2.30it/s]

tensor(778038., grad_fn=<SumBackward0>)


  1%|          | 845/100000 [06:11<12:00:05,  2.29it/s]

tensor(777886., grad_fn=<SumBackward0>)


  1%|          | 847/100000 [06:12<12:00:21,  2.29it/s]

tensor(777660., grad_fn=<SumBackward0>)


  1%|          | 848/100000 [06:13<12:00:46,  2.29it/s]

tensor(777586., grad_fn=<SumBackward0>)


  1%|          | 850/100000 [06:14<11:59:31,  2.30it/s]

tensor(777480., grad_fn=<SumBackward0>)


  1%|          | 851/100000 [06:14<12:01:26,  2.29it/s]

tensor(777414., grad_fn=<SumBackward0>)


  1%|          | 852/100000 [06:14<12:02:26,  2.29it/s]

tensor(775864., grad_fn=<SumBackward0>)


  1%|          | 859/100000 [06:18<12:00:10,  2.29it/s]

tensor(775422., grad_fn=<SumBackward0>)


  1%|          | 861/100000 [06:18<12:00:28,  2.29it/s]

tensor(774772., grad_fn=<SumBackward0>)


  1%|          | 867/100000 [06:21<11:59:02,  2.30it/s]

tensor(774710., grad_fn=<SumBackward0>)


  1%|          | 870/100000 [06:22<12:00:15,  2.29it/s]

tensor(774654., grad_fn=<SumBackward0>)


  1%|          | 872/100000 [06:23<11:59:36,  2.30it/s]

tensor(774410., grad_fn=<SumBackward0>)


  1%|          | 873/100000 [06:24<12:00:04,  2.29it/s]

tensor(774024., grad_fn=<SumBackward0>)


  1%|          | 875/100000 [06:24<12:00:08,  2.29it/s]

tensor(773524., grad_fn=<SumBackward0>)


  1%|          | 883/100000 [06:28<11:59:28,  2.30it/s]

tensor(773388., grad_fn=<SumBackward0>)


  1%|          | 884/100000 [06:28<11:59:51,  2.29it/s]

tensor(773150., grad_fn=<SumBackward0>)


  1%|          | 885/100000 [06:29<12:00:26,  2.29it/s]

tensor(772578., grad_fn=<SumBackward0>)


  1%|          | 886/100000 [06:29<12:00:05,  2.29it/s]

tensor(772548., grad_fn=<SumBackward0>)


  1%|          | 887/100000 [06:30<11:59:32,  2.30it/s]

tensor(772062., grad_fn=<SumBackward0>)


  1%|          | 889/100000 [06:31<11:58:50,  2.30it/s]

tensor(772016., grad_fn=<SumBackward0>)


  1%|          | 891/100000 [06:31<11:59:00,  2.30it/s]

tensor(771874., grad_fn=<SumBackward0>)


  1%|          | 892/100000 [06:32<11:58:58,  2.30it/s]

tensor(771634., grad_fn=<SumBackward0>)


  1%|          | 893/100000 [06:32<11:58:26,  2.30it/s]

tensor(771448., grad_fn=<SumBackward0>)


  1%|          | 897/100000 [06:34<11:58:13,  2.30it/s]

tensor(771276., grad_fn=<SumBackward0>)


  1%|          | 898/100000 [06:35<11:58:24,  2.30it/s]

tensor(771140., grad_fn=<SumBackward0>)


  1%|          | 899/100000 [06:35<11:58:24,  2.30it/s]

tensor(770880., grad_fn=<SumBackward0>)


  1%|          | 903/100000 [06:37<11:58:26,  2.30it/s]

tensor(770502., grad_fn=<SumBackward0>)


  1%|          | 905/100000 [06:38<12:00:04,  2.29it/s]

tensor(769762., grad_fn=<SumBackward0>)


  1%|          | 907/100000 [06:38<11:59:59,  2.29it/s]

tensor(769756., grad_fn=<SumBackward0>)


  1%|          | 908/100000 [06:39<11:59:46,  2.29it/s]

tensor(769636., grad_fn=<SumBackward0>)


  1%|          | 910/100000 [06:40<11:59:23,  2.30it/s]

tensor(769198., grad_fn=<SumBackward0>)


  1%|          | 914/100000 [06:41<11:59:10,  2.30it/s]

tensor(768794., grad_fn=<SumBackward0>)


  1%|          | 915/100000 [06:42<12:00:08,  2.29it/s]

tensor(768038., grad_fn=<SumBackward0>)


  1%|          | 921/100000 [06:45<11:59:37,  2.29it/s]

tensor(767834., grad_fn=<SumBackward0>)


  1%|          | 926/100000 [06:47<12:00:28,  2.29it/s]

tensor(767634., grad_fn=<SumBackward0>)


  1%|          | 927/100000 [06:47<12:00:48,  2.29it/s]

tensor(767428., grad_fn=<SumBackward0>)


  1%|          | 928/100000 [06:48<12:00:39,  2.29it/s]

tensor(767068., grad_fn=<SumBackward0>)


  1%|          | 931/100000 [06:49<11:58:48,  2.30it/s]

tensor(766342., grad_fn=<SumBackward0>)


  1%|          | 936/100000 [06:51<11:59:41,  2.29it/s]

tensor(765936., grad_fn=<SumBackward0>)


  1%|          | 937/100000 [06:52<11:59:08,  2.30it/s]

tensor(765836., grad_fn=<SumBackward0>)


  1%|          | 938/100000 [06:52<11:59:48,  2.29it/s]

tensor(765658., grad_fn=<SumBackward0>)


  1%|          | 940/100000 [06:53<11:58:57,  2.30it/s]

tensor(765492., grad_fn=<SumBackward0>)


  1%|          | 941/100000 [06:53<11:58:57,  2.30it/s]

tensor(765410., grad_fn=<SumBackward0>)


  1%|          | 942/100000 [06:54<11:59:34,  2.29it/s]

tensor(764854., grad_fn=<SumBackward0>)


  1%|          | 945/100000 [06:55<11:58:20,  2.30it/s]

tensor(764796., grad_fn=<SumBackward0>)


  1%|          | 946/100000 [06:55<11:58:28,  2.30it/s]

tensor(764502., grad_fn=<SumBackward0>)


  1%|          | 954/100000 [06:59<11:57:56,  2.30it/s]

tensor(764034., grad_fn=<SumBackward0>)


  1%|          | 957/100000 [07:00<11:58:07,  2.30it/s]

tensor(763354., grad_fn=<SumBackward0>)


  1%|          | 959/100000 [07:01<11:57:14,  2.30it/s]

tensor(763130., grad_fn=<SumBackward0>)


  1%|          | 961/100000 [07:02<11:56:37,  2.30it/s]

tensor(763058., grad_fn=<SumBackward0>)


  1%|          | 962/100000 [07:02<11:56:52,  2.30it/s]

tensor(762616., grad_fn=<SumBackward0>)


  1%|          | 969/100000 [07:05<11:58:13,  2.30it/s]

tensor(762572., grad_fn=<SumBackward0>)


  1%|          | 971/100000 [07:06<11:59:24,  2.29it/s]

tensor(762516., grad_fn=<SumBackward0>)


  1%|          | 974/100000 [07:08<12:01:19,  2.29it/s]

tensor(762400., grad_fn=<SumBackward0>)


  1%|          | 975/100000 [07:08<12:01:44,  2.29it/s]

tensor(761630., grad_fn=<SumBackward0>)


  1%|          | 976/100000 [07:08<12:00:25,  2.29it/s]

tensor(761394., grad_fn=<SumBackward0>)


  1%|          | 977/100000 [07:09<11:59:33,  2.29it/s]

tensor(761024., grad_fn=<SumBackward0>)


  1%|          | 981/100000 [07:11<11:58:58,  2.30it/s]

tensor(760876., grad_fn=<SumBackward0>)


  1%|          | 982/100000 [07:11<11:58:30,  2.30it/s]

tensor(760766., grad_fn=<SumBackward0>)


  1%|          | 984/100000 [07:12<11:58:07,  2.30it/s]

tensor(760418., grad_fn=<SumBackward0>)


  1%|          | 988/100000 [07:14<12:02:16,  2.28it/s]

tensor(760312., grad_fn=<SumBackward0>)


  1%|          | 990/100000 [07:15<11:59:43,  2.29it/s]

tensor(760020., grad_fn=<SumBackward0>)


  1%|          | 992/100000 [07:15<11:54:11,  2.31it/s]

tensor(759522., grad_fn=<SumBackward0>)


  1%|          | 998/100000 [07:18<11:52:32,  2.32it/s]

tensor(759488., grad_fn=<SumBackward0>)


  1%|          | 1006/100000 [07:22<12:45:02,  2.16it/s]

tensor(759264., grad_fn=<SumBackward0>)


  1%|          | 1008/100000 [07:23<12:17:46,  2.24it/s]

tensor(759168., grad_fn=<SumBackward0>)


  1%|          | 1009/100000 [07:23<12:09:02,  2.26it/s]

tensor(758898., grad_fn=<SumBackward0>)


  1%|          | 1011/100000 [07:24<11:58:50,  2.30it/s]

tensor(758698., grad_fn=<SumBackward0>)


  1%|          | 1016/100000 [07:26<11:50:56,  2.32it/s]

tensor(758538., grad_fn=<SumBackward0>)


  1%|          | 1021/100000 [07:28<11:49:56,  2.32it/s]

tensor(758158., grad_fn=<SumBackward0>)


  1%|          | 1022/100000 [07:29<11:51:15,  2.32it/s]

tensor(757792., grad_fn=<SumBackward0>)


  1%|          | 1023/100000 [07:29<11:51:13,  2.32it/s]

tensor(757198., grad_fn=<SumBackward0>)


  1%|          | 1035/100000 [07:34<11:48:31,  2.33it/s]

tensor(757082., grad_fn=<SumBackward0>)


  1%|          | 1038/100000 [07:36<11:49:10,  2.33it/s]

tensor(756984., grad_fn=<SumBackward0>)


  1%|          | 1039/100000 [07:36<11:49:38,  2.32it/s]

tensor(756944., grad_fn=<SumBackward0>)


  1%|          | 1040/100000 [07:36<11:49:52,  2.32it/s]

tensor(756336., grad_fn=<SumBackward0>)


  1%|          | 1041/100000 [07:37<11:50:46,  2.32it/s]

tensor(755940., grad_fn=<SumBackward0>)


  1%|          | 1046/100000 [07:39<11:49:40,  2.32it/s]

tensor(755418., grad_fn=<SumBackward0>)


  1%|          | 1051/100000 [07:41<11:49:35,  2.32it/s]

tensor(755348., grad_fn=<SumBackward0>)


  1%|          | 1052/100000 [07:42<11:49:54,  2.32it/s]

tensor(754654., grad_fn=<SumBackward0>)


  1%|          | 1058/100000 [07:44<11:57:52,  2.30it/s]

tensor(754498., grad_fn=<SumBackward0>)


  1%|          | 1060/100000 [07:45<11:55:10,  2.31it/s]

tensor(754276., grad_fn=<SumBackward0>)


  1%|          | 1061/100000 [07:45<11:53:51,  2.31it/s]

tensor(754208., grad_fn=<SumBackward0>)


  1%|          | 1066/100000 [07:48<11:50:26,  2.32it/s]

tensor(753828., grad_fn=<SumBackward0>)


  1%|          | 1071/100000 [07:50<11:50:16,  2.32it/s]

tensor(753606., grad_fn=<SumBackward0>)


  1%|          | 1072/100000 [07:50<11:49:54,  2.32it/s]

tensor(753254., grad_fn=<SumBackward0>)


  1%|          | 1077/100000 [07:52<11:50:39,  2.32it/s]

tensor(753202., grad_fn=<SumBackward0>)


  1%|          | 1080/100000 [07:54<11:52:03,  2.32it/s]

tensor(753180., grad_fn=<SumBackward0>)


  1%|          | 1081/100000 [07:54<11:52:49,  2.31it/s]

tensor(752474., grad_fn=<SumBackward0>)


  1%|          | 1084/100000 [07:55<11:52:43,  2.31it/s]

tensor(752160., grad_fn=<SumBackward0>)


  1%|          | 1087/100000 [07:57<11:52:54,  2.31it/s]

tensor(751838., grad_fn=<SumBackward0>)


  1%|          | 1092/100000 [07:59<11:52:10,  2.31it/s]

tensor(751676., grad_fn=<SumBackward0>)


  1%|          | 1094/100000 [08:00<11:51:29,  2.32it/s]

tensor(751666., grad_fn=<SumBackward0>)


  1%|          | 1096/100000 [08:01<11:52:42,  2.31it/s]

tensor(751470., grad_fn=<SumBackward0>)


  1%|          | 1100/100000 [08:02<11:51:50,  2.32it/s]

tensor(751326., grad_fn=<SumBackward0>)


  1%|          | 1101/100000 [08:03<11:52:49,  2.31it/s]

tensor(751136., grad_fn=<SumBackward0>)


  1%|          | 1102/100000 [08:03<11:54:18,  2.31it/s]

tensor(751068., grad_fn=<SumBackward0>)


  1%|          | 1103/100000 [08:04<11:53:31,  2.31it/s]

tensor(751018., grad_fn=<SumBackward0>)


  1%|          | 1104/100000 [08:04<11:53:21,  2.31it/s]

tensor(750970., grad_fn=<SumBackward0>)


  1%|          | 1105/100000 [08:04<11:53:18,  2.31it/s]

tensor(750656., grad_fn=<SumBackward0>)


  1%|          | 1107/100000 [08:05<11:51:59,  2.31it/s]

tensor(750614., grad_fn=<SumBackward0>)


  1%|          | 1111/100000 [08:07<11:51:49,  2.32it/s]

tensor(750366., grad_fn=<SumBackward0>)


  1%|          | 1114/100000 [08:08<11:51:26,  2.32it/s]

tensor(750030., grad_fn=<SumBackward0>)


  1%|          | 1116/100000 [08:09<11:52:09,  2.31it/s]

tensor(749820., grad_fn=<SumBackward0>)


  1%|          | 1119/100000 [08:10<11:52:11,  2.31it/s]

tensor(749802., grad_fn=<SumBackward0>)


  1%|          | 1125/100000 [08:13<11:52:56,  2.31it/s]

tensor(749662., grad_fn=<SumBackward0>)


  1%|          | 1127/100000 [08:14<11:55:52,  2.30it/s]

tensor(749452., grad_fn=<SumBackward0>)


  1%|          | 1129/100000 [08:15<11:54:14,  2.31it/s]

tensor(749292., grad_fn=<SumBackward0>)


  1%|          | 1132/100000 [08:16<11:52:18,  2.31it/s]

tensor(749128., grad_fn=<SumBackward0>)


  1%|          | 1133/100000 [08:17<11:52:04,  2.31it/s]

tensor(748736., grad_fn=<SumBackward0>)


  1%|          | 1137/100000 [08:18<11:53:07,  2.31it/s]

tensor(748570., grad_fn=<SumBackward0>)


  1%|          | 1140/100000 [08:20<11:52:28,  2.31it/s]

tensor(748428., grad_fn=<SumBackward0>)


  1%|          | 1142/100000 [08:20<11:51:41,  2.32it/s]

tensor(748322., grad_fn=<SumBackward0>)


  1%|          | 1144/100000 [08:21<11:51:59,  2.31it/s]

tensor(748282., grad_fn=<SumBackward0>)


  1%|          | 1145/100000 [08:22<11:51:54,  2.31it/s]

tensor(748212., grad_fn=<SumBackward0>)


  1%|          | 1152/100000 [08:25<11:50:57,  2.32it/s]

tensor(748038., grad_fn=<SumBackward0>)


  1%|          | 1157/100000 [08:27<11:50:56,  2.32it/s]

tensor(747350., grad_fn=<SumBackward0>)


  1%|          | 1158/100000 [08:27<11:52:05,  2.31it/s]

tensor(747054., grad_fn=<SumBackward0>)


  1%|          | 1160/100000 [08:28<11:51:19,  2.32it/s]

tensor(746508., grad_fn=<SumBackward0>)


  1%|          | 1161/100000 [08:29<11:52:16,  2.31it/s]

tensor(746244., grad_fn=<SumBackward0>)


  1%|          | 1163/100000 [08:30<11:50:55,  2.32it/s]

tensor(746212., grad_fn=<SumBackward0>)


  1%|          | 1172/100000 [08:33<11:50:45,  2.32it/s]

tensor(746192., grad_fn=<SumBackward0>)


  1%|          | 1173/100000 [08:34<11:51:55,  2.31it/s]

tensor(745832., grad_fn=<SumBackward0>)


  1%|          | 1181/100000 [08:37<11:57:08,  2.30it/s]

tensor(745814., grad_fn=<SumBackward0>)


  1%|          | 1182/100000 [08:38<11:56:28,  2.30it/s]

tensor(745750., grad_fn=<SumBackward0>)


  1%|          | 1183/100000 [08:38<11:55:17,  2.30it/s]

tensor(745582., grad_fn=<SumBackward0>)


  1%|          | 1187/100000 [08:40<11:51:43,  2.31it/s]

tensor(745342., grad_fn=<SumBackward0>)


  1%|          | 1193/100000 [08:43<11:51:27,  2.31it/s]

tensor(744762., grad_fn=<SumBackward0>)


  1%|          | 1201/100000 [08:46<11:52:02,  2.31it/s]

tensor(744536., grad_fn=<SumBackward0>)


  1%|          | 1204/100000 [08:47<11:52:18,  2.31it/s]

tensor(744314., grad_fn=<SumBackward0>)


  1%|          | 1205/100000 [08:48<11:52:31,  2.31it/s]

tensor(744004., grad_fn=<SumBackward0>)


  1%|          | 1206/100000 [08:48<11:53:44,  2.31it/s]

tensor(743656., grad_fn=<SumBackward0>)


  1%|          | 1214/100000 [08:52<11:51:45,  2.31it/s]

tensor(743162., grad_fn=<SumBackward0>)


  1%|          | 1215/100000 [08:52<11:52:12,  2.31it/s]

tensor(742968., grad_fn=<SumBackward0>)


  1%|          | 1217/100000 [08:53<11:50:49,  2.32it/s]

tensor(742804., grad_fn=<SumBackward0>)


  1%|          | 1220/100000 [08:54<11:50:01,  2.32it/s]

tensor(742732., grad_fn=<SumBackward0>)


  1%|          | 1221/100000 [08:55<11:50:46,  2.32it/s]

tensor(742576., grad_fn=<SumBackward0>)


  1%|          | 1223/100000 [08:55<11:49:46,  2.32it/s]

tensor(742224., grad_fn=<SumBackward0>)


  1%|          | 1229/100000 [08:58<11:50:23,  2.32it/s]

tensor(741688., grad_fn=<SumBackward0>)


  1%|          | 1238/100000 [09:02<11:50:26,  2.32it/s]

tensor(741344., grad_fn=<SumBackward0>)


  1%|          | 1242/100000 [09:04<11:50:44,  2.32it/s]

tensor(740988., grad_fn=<SumBackward0>)


  1%|          | 1247/100000 [09:06<11:50:57,  2.32it/s]

tensor(740958., grad_fn=<SumBackward0>)


  1%|          | 1249/100000 [09:07<11:51:11,  2.31it/s]

tensor(740726., grad_fn=<SumBackward0>)


  1%|▏         | 1252/100000 [09:08<11:49:10,  2.32it/s]

tensor(740720., grad_fn=<SumBackward0>)


  1%|▏         | 1254/100000 [09:09<11:49:32,  2.32it/s]

tensor(740606., grad_fn=<SumBackward0>)


  1%|▏         | 1255/100000 [09:09<11:55:00,  2.30it/s]

tensor(740558., grad_fn=<SumBackward0>)


  1%|▏         | 1256/100000 [09:10<11:53:34,  2.31it/s]

tensor(740534., grad_fn=<SumBackward0>)


  1%|▏         | 1261/100000 [09:12<11:50:48,  2.32it/s]

tensor(740380., grad_fn=<SumBackward0>)


  1%|▏         | 1263/100000 [09:13<11:50:14,  2.32it/s]

tensor(740212., grad_fn=<SumBackward0>)


  1%|▏         | 1268/100000 [09:15<11:49:51,  2.32it/s]

tensor(740178., grad_fn=<SumBackward0>)


  1%|▏         | 1269/100000 [09:15<11:50:16,  2.32it/s]

tensor(740036., grad_fn=<SumBackward0>)


  1%|▏         | 1270/100000 [09:16<11:50:28,  2.32it/s]

tensor(739610., grad_fn=<SumBackward0>)


  1%|▏         | 1271/100000 [09:16<11:50:09,  2.32it/s]

tensor(739606., grad_fn=<SumBackward0>)


  1%|▏         | 1278/100000 [09:19<11:50:10,  2.32it/s]

tensor(739294., grad_fn=<SumBackward0>)


  1%|▏         | 1279/100000 [09:20<11:50:24,  2.32it/s]

tensor(738572., grad_fn=<SumBackward0>)


  1%|▏         | 1283/100000 [09:22<12:52:32,  2.13it/s]

tensor(738364., grad_fn=<SumBackward0>)


  1%|▏         | 1290/100000 [09:25<11:54:57,  2.30it/s]

tensor(737816., grad_fn=<SumBackward0>)


  1%|▏         | 1291/100000 [09:25<11:53:24,  2.31it/s]

tensor(737810., grad_fn=<SumBackward0>)


  1%|▏         | 1301/100000 [09:29<11:51:38,  2.31it/s]

tensor(737776., grad_fn=<SumBackward0>)


  1%|▏         | 1303/100000 [09:30<11:50:44,  2.31it/s]

tensor(737702., grad_fn=<SumBackward0>)


  1%|▏         | 1304/100000 [09:31<11:50:39,  2.31it/s]

tensor(737368., grad_fn=<SumBackward0>)


  1%|▏         | 1308/100000 [09:32<11:49:48,  2.32it/s]

tensor(736670., grad_fn=<SumBackward0>)


  1%|▏         | 1337/100000 [09:45<11:50:59,  2.31it/s]

tensor(736242., grad_fn=<SumBackward0>)


  1%|▏         | 1338/100000 [09:45<11:51:16,  2.31it/s]

tensor(736176., grad_fn=<SumBackward0>)


  1%|▏         | 1339/100000 [09:46<11:51:33,  2.31it/s]

tensor(735898., grad_fn=<SumBackward0>)


  1%|▏         | 1340/100000 [09:46<11:51:18,  2.31it/s]

tensor(735358., grad_fn=<SumBackward0>)


  1%|▏         | 1345/100000 [09:48<11:51:34,  2.31it/s]

tensor(735074., grad_fn=<SumBackward0>)


  1%|▏         | 1354/100000 [09:52<11:50:36,  2.31it/s]

tensor(734984., grad_fn=<SumBackward0>)


  1%|▏         | 1357/100000 [09:54<11:51:06,  2.31it/s]

tensor(734934., grad_fn=<SumBackward0>)


  1%|▏         | 1361/100000 [09:55<11:51:32,  2.31it/s]

tensor(734718., grad_fn=<SumBackward0>)


  1%|▏         | 1366/100000 [09:58<11:51:39,  2.31it/s]

tensor(734234., grad_fn=<SumBackward0>)


  1%|▏         | 1368/100000 [09:58<11:52:18,  2.31it/s]

tensor(733870., grad_fn=<SumBackward0>)


  1%|▏         | 1379/100000 [10:03<11:52:25,  2.31it/s]

tensor(733800., grad_fn=<SumBackward0>)


  1%|▏         | 1380/100000 [10:04<11:52:58,  2.31it/s]

tensor(733618., grad_fn=<SumBackward0>)


  1%|▏         | 1381/100000 [10:04<11:52:16,  2.31it/s]

tensor(733544., grad_fn=<SumBackward0>)


  1%|▏         | 1383/100000 [10:05<11:51:51,  2.31it/s]

tensor(733000., grad_fn=<SumBackward0>)


  1%|▏         | 1386/100000 [10:06<11:50:47,  2.31it/s]

tensor(732726., grad_fn=<SumBackward0>)


  1%|▏         | 1388/100000 [10:07<11:50:43,  2.31it/s]

tensor(732626., grad_fn=<SumBackward0>)


  1%|▏         | 1396/100000 [10:11<11:51:26,  2.31it/s]

tensor(732608., grad_fn=<SumBackward0>)


  1%|▏         | 1397/100000 [10:11<11:51:19,  2.31it/s]

tensor(732096., grad_fn=<SumBackward0>)


  1%|▏         | 1402/100000 [10:13<11:50:28,  2.31it/s]

tensor(731790., grad_fn=<SumBackward0>)


  1%|▏         | 1407/100000 [10:15<11:50:04,  2.31it/s]

tensor(731430., grad_fn=<SumBackward0>)


  1%|▏         | 1410/100000 [10:17<11:50:08,  2.31it/s]

tensor(731020., grad_fn=<SumBackward0>)


  1%|▏         | 1413/100000 [10:18<11:50:53,  2.31it/s]

tensor(730858., grad_fn=<SumBackward0>)


  1%|▏         | 1416/100000 [10:19<11:49:54,  2.31it/s]

tensor(730856., grad_fn=<SumBackward0>)


  1%|▏         | 1418/100000 [10:20<11:49:33,  2.32it/s]

tensor(730636., grad_fn=<SumBackward0>)


  1%|▏         | 1419/100000 [10:20<11:50:02,  2.31it/s]

tensor(730392., grad_fn=<SumBackward0>)


  1%|▏         | 1420/100000 [10:21<11:49:39,  2.32it/s]

tensor(730126., grad_fn=<SumBackward0>)


  1%|▏         | 1424/100000 [10:23<11:50:06,  2.31it/s]

tensor(729926., grad_fn=<SumBackward0>)


  1%|▏         | 1439/100000 [10:29<11:51:23,  2.31it/s]

tensor(729680., grad_fn=<SumBackward0>)


  1%|▏         | 1440/100000 [10:30<11:54:31,  2.30it/s]

tensor(729564., grad_fn=<SumBackward0>)


  1%|▏         | 1441/100000 [10:30<11:53:39,  2.30it/s]

tensor(729488., grad_fn=<SumBackward0>)


  1%|▏         | 1446/100000 [10:32<11:52:40,  2.30it/s]

tensor(729392., grad_fn=<SumBackward0>)


  1%|▏         | 1447/100000 [10:33<11:52:47,  2.30it/s]

tensor(729220., grad_fn=<SumBackward0>)


  1%|▏         | 1450/100000 [10:34<11:51:34,  2.31it/s]

tensor(729018., grad_fn=<SumBackward0>)


  1%|▏         | 1452/100000 [10:35<11:51:02,  2.31it/s]

tensor(728888., grad_fn=<SumBackward0>)


  1%|▏         | 1454/100000 [10:36<11:50:49,  2.31it/s]

tensor(728778., grad_fn=<SumBackward0>)


  1%|▏         | 1459/100000 [10:38<11:50:35,  2.31it/s]

tensor(728608., grad_fn=<SumBackward0>)


  1%|▏         | 1460/100000 [10:38<11:51:05,  2.31it/s]

tensor(728592., grad_fn=<SumBackward0>)


  1%|▏         | 1461/100000 [10:39<11:51:41,  2.31it/s]

tensor(728420., grad_fn=<SumBackward0>)


  1%|▏         | 1464/100000 [10:40<11:50:48,  2.31it/s]

tensor(728098., grad_fn=<SumBackward0>)


  1%|▏         | 1472/100000 [10:43<11:50:08,  2.31it/s]

tensor(727944., grad_fn=<SumBackward0>)


  1%|▏         | 1473/100000 [10:44<11:51:03,  2.31it/s]

tensor(727902., grad_fn=<SumBackward0>)


  2%|▏         | 1516/100000 [11:02<11:55:31,  2.29it/s]

tensor(727874., grad_fn=<SumBackward0>)


  2%|▏         | 1517/100000 [11:03<11:56:49,  2.29it/s]

tensor(727832., grad_fn=<SumBackward0>)


  2%|▏         | 1519/100000 [11:04<11:55:45,  2.29it/s]

tensor(727530., grad_fn=<SumBackward0>)


  2%|▏         | 1521/100000 [11:05<11:54:56,  2.30it/s]

tensor(727526., grad_fn=<SumBackward0>)


  2%|▏         | 1522/100000 [11:05<11:54:51,  2.30it/s]

tensor(727314., grad_fn=<SumBackward0>)


  2%|▏         | 1524/100000 [11:06<11:54:39,  2.30it/s]

tensor(727230., grad_fn=<SumBackward0>)


  2%|▏         | 1526/100000 [11:07<11:54:28,  2.30it/s]

tensor(727044., grad_fn=<SumBackward0>)


  2%|▏         | 1530/100000 [11:09<11:53:57,  2.30it/s]

tensor(726716., grad_fn=<SumBackward0>)


  2%|▏         | 1531/100000 [11:09<11:53:34,  2.30it/s]

tensor(726104., grad_fn=<SumBackward0>)


  2%|▏         | 1544/100000 [11:15<11:54:24,  2.30it/s]

tensor(725994., grad_fn=<SumBackward0>)


  2%|▏         | 1546/100000 [11:16<11:53:20,  2.30it/s]

tensor(725728., grad_fn=<SumBackward0>)


  2%|▏         | 1549/100000 [11:17<11:52:30,  2.30it/s]

tensor(725468., grad_fn=<SumBackward0>)


  2%|▏         | 1551/100000 [11:18<11:53:04,  2.30it/s]

tensor(725090., grad_fn=<SumBackward0>)


  2%|▏         | 1554/100000 [11:19<11:52:36,  2.30it/s]

tensor(724872., grad_fn=<SumBackward0>)


  2%|▏         | 1555/100000 [11:19<11:52:48,  2.30it/s]

tensor(724780., grad_fn=<SumBackward0>)


  2%|▏         | 1556/100000 [11:20<12:07:59,  2.25it/s]

tensor(724708., grad_fn=<SumBackward0>)


  2%|▏         | 1557/100000 [11:20<13:14:59,  2.06it/s]

tensor(724560., grad_fn=<SumBackward0>)


  2%|▏         | 1563/100000 [11:23<12:12:58,  2.24it/s]

tensor(724518., grad_fn=<SumBackward0>)


  2%|▏         | 1565/100000 [11:24<12:01:32,  2.27it/s]

tensor(724514., grad_fn=<SumBackward0>)


  2%|▏         | 1566/100000 [11:25<11:59:39,  2.28it/s]

tensor(724330., grad_fn=<SumBackward0>)


  2%|▏         | 1567/100000 [11:25<11:57:18,  2.29it/s]

tensor(724066., grad_fn=<SumBackward0>)


  2%|▏         | 1573/100000 [11:28<11:53:28,  2.30it/s]

tensor(723862., grad_fn=<SumBackward0>)


  2%|▏         | 1576/100000 [11:29<11:52:58,  2.30it/s]

tensor(723842., grad_fn=<SumBackward0>)


  2%|▏         | 1578/100000 [11:30<11:52:55,  2.30it/s]

tensor(723514., grad_fn=<SumBackward0>)


  2%|▏         | 1579/100000 [11:30<11:52:30,  2.30it/s]

tensor(723210., grad_fn=<SumBackward0>)


  2%|▏         | 1580/100000 [11:31<11:52:22,  2.30it/s]

tensor(723194., grad_fn=<SumBackward0>)


  2%|▏         | 1584/100000 [11:32<11:53:11,  2.30it/s]

tensor(722224., grad_fn=<SumBackward0>)


  2%|▏         | 1609/100000 [11:43<11:52:21,  2.30it/s]

tensor(722036., grad_fn=<SumBackward0>)


  2%|▏         | 1613/100000 [11:45<11:53:14,  2.30it/s]

tensor(722012., grad_fn=<SumBackward0>)


  2%|▏         | 1617/100000 [11:47<11:52:01,  2.30it/s]

tensor(721888., grad_fn=<SumBackward0>)


  2%|▏         | 1620/100000 [11:48<11:59:34,  2.28it/s]

tensor(721796., grad_fn=<SumBackward0>)


  2%|▏         | 1634/100000 [11:54<12:00:26,  2.28it/s]

tensor(721760., grad_fn=<SumBackward0>)


  2%|▏         | 1676/100000 [12:13<11:57:29,  2.28it/s]

tensor(720392., grad_fn=<SumBackward0>)


  2%|▏         | 1683/100000 [12:16<11:56:53,  2.29it/s]

tensor(720092., grad_fn=<SumBackward0>)


  2%|▏         | 1685/100000 [12:17<11:56:47,  2.29it/s]

tensor(720018., grad_fn=<SumBackward0>)


  2%|▏         | 1697/100000 [12:22<11:57:04,  2.28it/s]

tensor(718990., grad_fn=<SumBackward0>)


  2%|▏         | 1700/100000 [12:23<11:56:26,  2.29it/s]

tensor(718832., grad_fn=<SumBackward0>)


  2%|▏         | 1706/100000 [12:26<11:53:22,  2.30it/s]

tensor(718382., grad_fn=<SumBackward0>)


  2%|▏         | 1714/100000 [12:29<11:44:54,  2.32it/s]

tensor(718286., grad_fn=<SumBackward0>)


  2%|▏         | 1721/100000 [12:32<11:45:19,  2.32it/s]

tensor(717896., grad_fn=<SumBackward0>)


  2%|▏         | 1724/100000 [12:33<11:45:27,  2.32it/s]

tensor(717874., grad_fn=<SumBackward0>)


  2%|▏         | 1725/100000 [12:34<11:46:06,  2.32it/s]

tensor(717690., grad_fn=<SumBackward0>)


  2%|▏         | 1729/100000 [12:36<11:45:46,  2.32it/s]

tensor(717682., grad_fn=<SumBackward0>)


  2%|▏         | 1734/100000 [12:38<11:46:08,  2.32it/s]

tensor(716972., grad_fn=<SumBackward0>)


  2%|▏         | 1748/100000 [12:44<11:46:21,  2.32it/s]

tensor(716648., grad_fn=<SumBackward0>)


  2%|▏         | 1751/100000 [12:45<11:55:10,  2.29it/s]

tensor(716548., grad_fn=<SumBackward0>)


  2%|▏         | 1754/100000 [12:46<11:54:54,  2.29it/s]

tensor(716180., grad_fn=<SumBackward0>)


  2%|▏         | 1755/100000 [12:47<11:55:23,  2.29it/s]

tensor(715974., grad_fn=<SumBackward0>)


  2%|▏         | 1757/100000 [12:48<11:53:38,  2.29it/s]

tensor(715722., grad_fn=<SumBackward0>)


  2%|▏         | 1762/100000 [12:50<11:53:41,  2.29it/s]

tensor(715444., grad_fn=<SumBackward0>)


  2%|▏         | 1769/100000 [12:53<11:53:20,  2.30it/s]

tensor(715022., grad_fn=<SumBackward0>)


  2%|▏         | 1837/100000 [13:23<12:17:00,  2.22it/s]

tensor(714500., grad_fn=<SumBackward0>)


  2%|▏         | 1838/100000 [13:23<12:09:46,  2.24it/s]

tensor(714016., grad_fn=<SumBackward0>)


  2%|▏         | 1842/100000 [13:25<12:03:57,  2.26it/s]

tensor(713720., grad_fn=<SumBackward0>)


  2%|▏         | 1844/100000 [13:26<12:02:11,  2.27it/s]

tensor(713594., grad_fn=<SumBackward0>)


  2%|▏         | 1845/100000 [13:26<12:02:31,  2.26it/s]

tensor(712866., grad_fn=<SumBackward0>)


  2%|▏         | 1864/100000 [13:35<11:58:32,  2.28it/s]

tensor(712846., grad_fn=<SumBackward0>)


  2%|▏         | 1866/100000 [13:36<11:58:04,  2.28it/s]

tensor(712620., grad_fn=<SumBackward0>)


  2%|▏         | 1868/100000 [13:37<11:57:14,  2.28it/s]

tensor(712510., grad_fn=<SumBackward0>)


  2%|▏         | 1875/100000 [13:40<11:59:00,  2.27it/s]

tensor(712090., grad_fn=<SumBackward0>)


  2%|▏         | 1876/100000 [13:40<11:59:34,  2.27it/s]

tensor(712058., grad_fn=<SumBackward0>)


  2%|▏         | 1877/100000 [13:40<11:59:00,  2.27it/s]

tensor(711804., grad_fn=<SumBackward0>)


  2%|▏         | 1886/100000 [13:44<11:55:43,  2.28it/s]

tensor(711488., grad_fn=<SumBackward0>)


  2%|▏         | 1891/100000 [13:47<11:56:07,  2.28it/s]

tensor(711404., grad_fn=<SumBackward0>)


  2%|▏         | 1901/100000 [13:51<11:57:11,  2.28it/s]

tensor(711150., grad_fn=<SumBackward0>)


  2%|▏         | 1902/100000 [13:51<11:57:22,  2.28it/s]

tensor(711112., grad_fn=<SumBackward0>)


  2%|▏         | 1906/100000 [13:53<11:55:52,  2.28it/s]

tensor(710966., grad_fn=<SumBackward0>)


  2%|▏         | 1912/100000 [13:56<11:52:48,  2.29it/s]

tensor(710824., grad_fn=<SumBackward0>)


  2%|▏         | 1914/100000 [13:57<11:48:18,  2.31it/s]

tensor(710504., grad_fn=<SumBackward0>)


  2%|▏         | 1919/100000 [13:59<11:45:41,  2.32it/s]

tensor(710376., grad_fn=<SumBackward0>)


  2%|▏         | 1982/100000 [14:26<12:04:35,  2.25it/s]

tensor(709966., grad_fn=<SumBackward0>)


  2%|▏         | 1983/100000 [14:27<11:58:51,  2.27it/s]

tensor(709764., grad_fn=<SumBackward0>)


  2%|▏         | 1984/100000 [14:27<11:54:31,  2.29it/s]

tensor(709154., grad_fn=<SumBackward0>)


  2%|▏         | 1989/100000 [14:29<11:47:08,  2.31it/s]

tensor(709040., grad_fn=<SumBackward0>)


  2%|▏         | 1990/100000 [14:30<11:47:03,  2.31it/s]

tensor(708806., grad_fn=<SumBackward0>)


  2%|▏         | 1998/100000 [14:33<11:45:49,  2.31it/s]

tensor(708626., grad_fn=<SumBackward0>)


  2%|▏         | 1999/100000 [14:34<11:46:03,  2.31it/s]

tensor(708486., grad_fn=<SumBackward0>)


  2%|▏         | 2004/100000 [14:36<11:44:41,  2.32it/s]

tensor(708198., grad_fn=<SumBackward0>)


  2%|▏         | 2008/100000 [14:38<11:47:53,  2.31it/s]

tensor(708142., grad_fn=<SumBackward0>)


  2%|▏         | 2015/100000 [14:41<11:46:03,  2.31it/s]

tensor(708086., grad_fn=<SumBackward0>)


  2%|▏         | 2016/100000 [14:41<11:51:59,  2.29it/s]

tensor(707610., grad_fn=<SumBackward0>)


  2%|▏         | 2020/100000 [14:43<11:47:17,  2.31it/s]

tensor(707490., grad_fn=<SumBackward0>)


  2%|▏         | 2021/100000 [14:43<11:47:17,  2.31it/s]

tensor(707292., grad_fn=<SumBackward0>)


  2%|▏         | 2029/100000 [14:47<11:57:25,  2.28it/s]

tensor(707250., grad_fn=<SumBackward0>)


  2%|▏         | 2031/100000 [14:48<11:59:03,  2.27it/s]

tensor(706980., grad_fn=<SumBackward0>)


  2%|▏         | 2035/100000 [14:49<11:58:46,  2.27it/s]

tensor(706654., grad_fn=<SumBackward0>)


  2%|▏         | 2037/100000 [14:50<11:59:40,  2.27it/s]

tensor(706558., grad_fn=<SumBackward0>)


  2%|▏         | 2040/100000 [14:52<12:00:25,  2.27it/s]

tensor(706550., grad_fn=<SumBackward0>)


  2%|▏         | 2044/100000 [14:53<11:58:12,  2.27it/s]

tensor(706300., grad_fn=<SumBackward0>)


  2%|▏         | 2046/100000 [14:54<11:57:27,  2.28it/s]

tensor(705858., grad_fn=<SumBackward0>)


  2%|▏         | 2052/100000 [14:57<11:55:47,  2.28it/s]

tensor(705544., grad_fn=<SumBackward0>)


  2%|▏         | 2123/100000 [15:28<11:54:04,  2.28it/s]

tensor(704962., grad_fn=<SumBackward0>)


  2%|▏         | 2126/100000 [15:30<11:55:15,  2.28it/s]

tensor(704886., grad_fn=<SumBackward0>)


  2%|▏         | 2128/100000 [15:30<11:55:06,  2.28it/s]

tensor(704648., grad_fn=<SumBackward0>)


  2%|▏         | 2135/100000 [15:34<11:55:41,  2.28it/s]

tensor(704516., grad_fn=<SumBackward0>)


  2%|▏         | 2136/100000 [15:34<11:55:41,  2.28it/s]

tensor(704232., grad_fn=<SumBackward0>)


  2%|▏         | 2144/100000 [15:37<11:55:27,  2.28it/s]

tensor(703850., grad_fn=<SumBackward0>)


  2%|▏         | 2149/100000 [15:40<11:54:59,  2.28it/s]

tensor(703830., grad_fn=<SumBackward0>)


  2%|▏         | 2150/100000 [15:40<11:55:47,  2.28it/s]

tensor(703666., grad_fn=<SumBackward0>)


  2%|▏         | 2155/100000 [15:42<11:55:07,  2.28it/s]

tensor(703548., grad_fn=<SumBackward0>)


  2%|▏         | 2160/100000 [15:44<11:54:42,  2.28it/s]

tensor(703264., grad_fn=<SumBackward0>)


  2%|▏         | 2163/100000 [15:46<11:55:27,  2.28it/s]

tensor(703236., grad_fn=<SumBackward0>)


  2%|▏         | 2164/100000 [15:46<11:56:24,  2.28it/s]

tensor(703104., grad_fn=<SumBackward0>)


  2%|▏         | 2166/100000 [15:47<12:04:16,  2.25it/s]

tensor(702664., grad_fn=<SumBackward0>)


  2%|▏         | 2168/100000 [15:48<11:59:42,  2.27it/s]

tensor(702510., grad_fn=<SumBackward0>)


  2%|▏         | 2169/100000 [15:48<11:58:44,  2.27it/s]

tensor(702070., grad_fn=<SumBackward0>)


  2%|▏         | 2240/100000 [16:20<11:52:40,  2.29it/s]

tensor(701994., grad_fn=<SumBackward0>)


  2%|▏         | 2242/100000 [16:20<11:56:24,  2.27it/s]

tensor(701958., grad_fn=<SumBackward0>)


  2%|▏         | 2257/100000 [16:27<11:50:56,  2.29it/s]

tensor(701052., grad_fn=<SumBackward0>)


  2%|▏         | 2258/100000 [16:27<11:51:34,  2.29it/s]

tensor(700604., grad_fn=<SumBackward0>)


  2%|▏         | 2259/100000 [16:28<11:51:06,  2.29it/s]

tensor(700002., grad_fn=<SumBackward0>)


  2%|▏         | 2278/100000 [16:36<11:53:10,  2.28it/s]

tensor(699788., grad_fn=<SumBackward0>)


  2%|▏         | 2279/100000 [16:37<11:55:46,  2.28it/s]

tensor(699620., grad_fn=<SumBackward0>)


  2%|▏         | 2281/100000 [16:38<11:53:52,  2.28it/s]

tensor(699506., grad_fn=<SumBackward0>)


  2%|▏         | 2288/100000 [16:41<11:56:02,  2.27it/s]

tensor(699500., grad_fn=<SumBackward0>)


  2%|▏         | 2290/100000 [16:41<11:55:01,  2.28it/s]

tensor(699488., grad_fn=<SumBackward0>)


  2%|▏         | 2339/100000 [17:03<11:53:33,  2.28it/s]

tensor(699442., grad_fn=<SumBackward0>)


  2%|▏         | 2340/100000 [17:03<11:53:28,  2.28it/s]

tensor(699262., grad_fn=<SumBackward0>)


  2%|▏         | 2344/100000 [17:05<11:52:48,  2.28it/s]

tensor(698812., grad_fn=<SumBackward0>)


  2%|▏         | 2349/100000 [17:07<11:53:24,  2.28it/s]

tensor(698270., grad_fn=<SumBackward0>)


  2%|▏         | 2367/100000 [17:15<11:54:11,  2.28it/s]

tensor(698108., grad_fn=<SumBackward0>)


  2%|▏         | 2384/100000 [17:23<12:30:51,  2.17it/s]

tensor(697032., grad_fn=<SumBackward0>)


  2%|▏         | 2388/100000 [17:25<11:56:51,  2.27it/s]

tensor(696738., grad_fn=<SumBackward0>)


  2%|▏         | 2390/100000 [17:26<11:51:29,  2.29it/s]

tensor(696734., grad_fn=<SumBackward0>)


  2%|▏         | 2392/100000 [17:27<11:48:55,  2.29it/s]

tensor(696298., grad_fn=<SumBackward0>)


  2%|▏         | 2400/100000 [17:30<11:46:38,  2.30it/s]

tensor(696194., grad_fn=<SumBackward0>)


  2%|▏         | 2404/100000 [17:32<11:50:42,  2.29it/s]

tensor(696184., grad_fn=<SumBackward0>)


  2%|▏         | 2405/100000 [17:32<11:49:50,  2.29it/s]

tensor(695680., grad_fn=<SumBackward0>)


  2%|▏         | 2406/100000 [17:33<11:48:45,  2.29it/s]

tensor(695606., grad_fn=<SumBackward0>)


  2%|▏         | 2487/100000 [18:08<11:47:13,  2.30it/s]

tensor(695584., grad_fn=<SumBackward0>)


  2%|▏         | 2490/100000 [18:09<11:46:28,  2.30it/s]

tensor(695496., grad_fn=<SumBackward0>)


  2%|▏         | 2494/100000 [18:11<11:49:00,  2.29it/s]

tensor(695202., grad_fn=<SumBackward0>)


  2%|▏         | 2496/100000 [18:12<11:48:57,  2.29it/s]

tensor(695080., grad_fn=<SumBackward0>)


  3%|▎         | 2502/100000 [18:14<11:48:56,  2.29it/s]

tensor(694596., grad_fn=<SumBackward0>)


  3%|▎         | 2504/100000 [18:15<11:48:37,  2.29it/s]

tensor(693454., grad_fn=<SumBackward0>)


  3%|▎         | 2514/100000 [18:20<11:45:53,  2.30it/s]

tensor(693416., grad_fn=<SumBackward0>)


  3%|▎         | 2517/100000 [18:21<11:46:54,  2.30it/s]

tensor(693404., grad_fn=<SumBackward0>)


  3%|▎         | 2518/100000 [18:21<11:48:21,  2.29it/s]

tensor(692990., grad_fn=<SumBackward0>)


  3%|▎         | 2519/100000 [18:22<11:48:21,  2.29it/s]

tensor(692792., grad_fn=<SumBackward0>)


  3%|▎         | 2521/100000 [18:23<11:48:37,  2.29it/s]

tensor(692618., grad_fn=<SumBackward0>)


  3%|▎         | 2561/100000 [18:40<11:47:11,  2.30it/s]

tensor(692210., grad_fn=<SumBackward0>)


  3%|▎         | 2564/100000 [18:41<11:47:46,  2.29it/s]

tensor(691942., grad_fn=<SumBackward0>)


  3%|▎         | 2601/100000 [18:58<11:45:51,  2.30it/s]

tensor(691470., grad_fn=<SumBackward0>)


  3%|▎         | 2618/100000 [19:05<11:41:10,  2.31it/s]

tensor(691442., grad_fn=<SumBackward0>)


  3%|▎         | 2619/100000 [19:05<11:41:12,  2.31it/s]

tensor(691134., grad_fn=<SumBackward0>)


  3%|▎         | 2620/100000 [19:06<11:42:18,  2.31it/s]

tensor(690894., grad_fn=<SumBackward0>)


  3%|▎         | 2669/100000 [19:27<11:43:25,  2.31it/s]

tensor(690720., grad_fn=<SumBackward0>)


  3%|▎         | 2670/100000 [19:28<11:43:09,  2.31it/s]

tensor(690448., grad_fn=<SumBackward0>)


  3%|▎         | 2676/100000 [19:30<11:41:58,  2.31it/s]

tensor(690190., grad_fn=<SumBackward0>)


  3%|▎         | 2679/100000 [19:32<11:42:00,  2.31it/s]

tensor(689856., grad_fn=<SumBackward0>)


  3%|▎         | 2691/100000 [19:37<11:43:02,  2.31it/s]

tensor(689802., grad_fn=<SumBackward0>)


  3%|▎         | 2697/100000 [19:40<11:42:10,  2.31it/s]

tensor(689786., grad_fn=<SumBackward0>)


  3%|▎         | 2698/100000 [19:40<11:42:22,  2.31it/s]

tensor(689588., grad_fn=<SumBackward0>)


  3%|▎         | 2701/100000 [19:41<11:41:56,  2.31it/s]

tensor(689410., grad_fn=<SumBackward0>)


  3%|▎         | 2704/100000 [19:43<11:41:53,  2.31it/s]

tensor(688976., grad_fn=<SumBackward0>)


  3%|▎         | 2716/100000 [19:48<11:42:37,  2.31it/s]

tensor(688940., grad_fn=<SumBackward0>)


  3%|▎         | 2727/100000 [19:53<11:42:35,  2.31it/s]

tensor(688702., grad_fn=<SumBackward0>)


  3%|▎         | 2770/100000 [20:11<11:41:54,  2.31it/s]

tensor(688470., grad_fn=<SumBackward0>)


  3%|▎         | 2772/100000 [20:12<11:41:21,  2.31it/s]

tensor(688454., grad_fn=<SumBackward0>)


  3%|▎         | 2779/100000 [20:15<11:43:50,  2.30it/s]

tensor(688286., grad_fn=<SumBackward0>)


  3%|▎         | 2780/100000 [20:16<11:43:48,  2.30it/s]

tensor(688184., grad_fn=<SumBackward0>)


  3%|▎         | 2786/100000 [20:18<11:40:51,  2.31it/s]

tensor(687738., grad_fn=<SumBackward0>)


  3%|▎         | 2793/100000 [20:21<11:39:34,  2.32it/s]

tensor(687498., grad_fn=<SumBackward0>)


  3%|▎         | 2856/100000 [20:49<11:47:23,  2.29it/s]

tensor(687440., grad_fn=<SumBackward0>)


  3%|▎         | 2857/100000 [20:49<11:47:43,  2.29it/s]

tensor(686754., grad_fn=<SumBackward0>)


  3%|▎         | 2865/100000 [20:53<11:52:23,  2.27it/s]

tensor(686514., grad_fn=<SumBackward0>)


  3%|▎         | 2867/100000 [20:54<11:51:56,  2.27it/s]

tensor(686414., grad_fn=<SumBackward0>)


  3%|▎         | 2869/100000 [20:55<11:46:58,  2.29it/s]

tensor(686394., grad_fn=<SumBackward0>)


  3%|▎         | 2870/100000 [20:55<11:45:55,  2.29it/s]

tensor(686308., grad_fn=<SumBackward0>)


  3%|▎         | 2874/100000 [20:57<11:42:05,  2.31it/s]

tensor(686250., grad_fn=<SumBackward0>)


  3%|▎         | 2877/100000 [20:58<11:42:40,  2.30it/s]

tensor(686116., grad_fn=<SumBackward0>)


  3%|▎         | 2878/100000 [20:58<11:42:42,  2.30it/s]

tensor(685696., grad_fn=<SumBackward0>)


  3%|▎         | 2886/100000 [21:02<11:42:03,  2.31it/s]

tensor(685574., grad_fn=<SumBackward0>)


  3%|▎         | 2889/100000 [21:03<11:42:43,  2.30it/s]

tensor(685186., grad_fn=<SumBackward0>)


  3%|▎         | 2986/100000 [21:46<11:43:09,  2.30it/s]

tensor(685066., grad_fn=<SumBackward0>)


  3%|▎         | 2987/100000 [21:46<11:43:31,  2.30it/s]

tensor(684896., grad_fn=<SumBackward0>)


  3%|▎         | 2990/100000 [21:48<11:42:42,  2.30it/s]

tensor(684830., grad_fn=<SumBackward0>)


  3%|▎         | 2991/100000 [21:48<11:44:07,  2.30it/s]

tensor(684556., grad_fn=<SumBackward0>)


  3%|▎         | 2992/100000 [21:48<11:43:57,  2.30it/s]

tensor(684434., grad_fn=<SumBackward0>)


  3%|▎         | 2994/100000 [21:49<11:44:04,  2.30it/s]

tensor(684164., grad_fn=<SumBackward0>)


  3%|▎         | 3026/100000 [22:03<11:42:30,  2.30it/s]

tensor(684080., grad_fn=<SumBackward0>)


  3%|▎         | 3032/100000 [22:06<11:41:23,  2.30it/s]

tensor(683990., grad_fn=<SumBackward0>)


  3%|▎         | 3033/100000 [22:06<11:40:20,  2.31it/s]

tensor(683892., grad_fn=<SumBackward0>)


  3%|▎         | 3037/100000 [22:08<11:37:33,  2.32it/s]

tensor(683884., grad_fn=<SumBackward0>)


  3%|▎         | 3041/100000 [22:10<11:38:32,  2.31it/s]

tensor(683762., grad_fn=<SumBackward0>)


  3%|▎         | 3044/100000 [22:11<11:39:37,  2.31it/s]

tensor(683554., grad_fn=<SumBackward0>)


  3%|▎         | 3046/100000 [22:12<11:39:33,  2.31it/s]

tensor(683198., grad_fn=<SumBackward0>)


  3%|▎         | 3047/100000 [22:12<11:40:05,  2.31it/s]

tensor(683170., grad_fn=<SumBackward0>)


  3%|▎         | 3055/100000 [22:16<11:39:54,  2.31it/s]

tensor(683134., grad_fn=<SumBackward0>)


  3%|▎         | 3057/100000 [22:17<11:40:15,  2.31it/s]

tensor(682648., grad_fn=<SumBackward0>)


  3%|▎         | 3064/100000 [22:20<11:39:55,  2.31it/s]

tensor(682472., grad_fn=<SumBackward0>)


  3%|▎         | 3152/100000 [22:58<11:38:36,  2.31it/s]

tensor(682166., grad_fn=<SumBackward0>)


  3%|▎         | 3153/100000 [22:58<11:39:09,  2.31it/s]

tensor(682154., grad_fn=<SumBackward0>)


  3%|▎         | 3157/100000 [23:00<11:38:20,  2.31it/s]

tensor(682050., grad_fn=<SumBackward0>)


  3%|▎         | 3186/100000 [23:13<11:54:12,  2.26it/s]

tensor(681688., grad_fn=<SumBackward0>)


  3%|▎         | 3189/100000 [23:14<11:51:51,  2.27it/s]

tensor(681446., grad_fn=<SumBackward0>)


  3%|▎         | 3196/100000 [23:17<11:46:08,  2.28it/s]

tensor(681424., grad_fn=<SumBackward0>)


  3%|▎         | 3203/100000 [23:20<12:17:58,  2.19it/s]

tensor(681004., grad_fn=<SumBackward0>)


  3%|▎         | 3208/100000 [23:23<12:49:41,  2.10it/s]

tensor(680988., grad_fn=<SumBackward0>)


  3%|▎         | 3209/100000 [23:23<12:29:38,  2.15it/s]

tensor(680926., grad_fn=<SumBackward0>)


  3%|▎         | 3210/100000 [23:24<12:16:07,  2.19it/s]

tensor(680876., grad_fn=<SumBackward0>)


  3%|▎         | 3211/100000 [23:24<12:06:56,  2.22it/s]

tensor(680550., grad_fn=<SumBackward0>)


  3%|▎         | 3217/100000 [23:27<11:45:10,  2.29it/s]

tensor(680346., grad_fn=<SumBackward0>)


  3%|▎         | 3298/100000 [24:02<11:38:49,  2.31it/s]

tensor(680104., grad_fn=<SumBackward0>)


  3%|▎         | 3309/100000 [24:07<11:39:08,  2.30it/s]

tensor(680038., grad_fn=<SumBackward0>)


  3%|▎         | 3330/100000 [24:16<11:39:21,  2.30it/s]

tensor(679406., grad_fn=<SumBackward0>)


  3%|▎         | 3335/100000 [24:18<11:38:56,  2.31it/s]

tensor(678926., grad_fn=<SumBackward0>)


  3%|▎         | 3336/100000 [24:18<11:39:21,  2.30it/s]

tensor(678848., grad_fn=<SumBackward0>)


  3%|▎         | 3347/100000 [24:23<11:38:06,  2.31it/s]

tensor(678562., grad_fn=<SumBackward0>)


  3%|▎         | 3348/100000 [24:23<11:38:35,  2.31it/s]

tensor(678432., grad_fn=<SumBackward0>)


  3%|▎         | 3349/100000 [24:24<11:39:20,  2.30it/s]

tensor(678376., grad_fn=<SumBackward0>)


  3%|▎         | 3350/100000 [24:24<11:39:16,  2.30it/s]

tensor(678258., grad_fn=<SumBackward0>)


  3%|▎         | 3351/100000 [24:25<11:39:40,  2.30it/s]

tensor(678078., grad_fn=<SumBackward0>)


  3%|▎         | 3365/100000 [24:31<11:39:07,  2.30it/s]

tensor(678054., grad_fn=<SumBackward0>)


  3%|▎         | 3368/100000 [24:32<11:39:49,  2.30it/s]

tensor(678020., grad_fn=<SumBackward0>)


  3%|▎         | 3373/100000 [24:34<11:37:16,  2.31it/s]

tensor(677934., grad_fn=<SumBackward0>)


  3%|▎         | 3375/100000 [24:35<11:38:16,  2.31it/s]

tensor(677828., grad_fn=<SumBackward0>)


  3%|▎         | 3475/100000 [25:19<11:43:48,  2.29it/s]

tensor(677808., grad_fn=<SumBackward0>)


  3%|▎         | 3477/100000 [25:20<12:48:01,  2.09it/s]

tensor(677600., grad_fn=<SumBackward0>)


  3%|▎         | 3478/100000 [25:21<14:12:43,  1.89it/s]

tensor(677352., grad_fn=<SumBackward0>)


  3%|▎         | 3483/100000 [25:23<12:33:07,  2.14it/s]

tensor(677244., grad_fn=<SumBackward0>)


  3%|▎         | 3485/100000 [25:24<12:08:19,  2.21it/s]

tensor(677206., grad_fn=<SumBackward0>)


  3%|▎         | 3488/100000 [25:25<11:52:31,  2.26it/s]

tensor(677176., grad_fn=<SumBackward0>)


  3%|▎         | 3489/100000 [25:26<11:50:43,  2.26it/s]

tensor(676910., grad_fn=<SumBackward0>)


  3%|▎         | 3492/100000 [25:27<11:46:50,  2.28it/s]

tensor(676728., grad_fn=<SumBackward0>)


  3%|▎         | 3493/100000 [25:28<11:46:28,  2.28it/s]

tensor(676682., grad_fn=<SumBackward0>)


  4%|▎         | 3502/100000 [25:32<11:44:58,  2.28it/s]

tensor(676640., grad_fn=<SumBackward0>)


  4%|▎         | 3506/100000 [25:33<11:42:46,  2.29it/s]

tensor(675984., grad_fn=<SumBackward0>)


  4%|▎         | 3608/100000 [26:18<11:44:01,  2.28it/s]

tensor(675856., grad_fn=<SumBackward0>)


  4%|▎         | 3612/100000 [26:20<11:43:06,  2.28it/s]

tensor(675760., grad_fn=<SumBackward0>)


  4%|▎         | 3617/100000 [26:22<11:43:04,  2.28it/s]

tensor(675600., grad_fn=<SumBackward0>)


  4%|▎         | 3619/100000 [26:23<11:43:51,  2.28it/s]

tensor(675322., grad_fn=<SumBackward0>)


  4%|▎         | 3627/100000 [26:26<11:43:49,  2.28it/s]

tensor(674928., grad_fn=<SumBackward0>)


  4%|▎         | 3670/100000 [26:45<11:45:02,  2.28it/s]

tensor(674730., grad_fn=<SumBackward0>)


  4%|▎         | 3680/100000 [26:50<11:45:38,  2.28it/s]

tensor(674466., grad_fn=<SumBackward0>)


  4%|▎         | 3681/100000 [26:50<11:46:00,  2.27it/s]

tensor(674200., grad_fn=<SumBackward0>)


  4%|▎         | 3742/100000 [27:17<11:48:43,  2.26it/s]

tensor(674144., grad_fn=<SumBackward0>)


  4%|▎         | 3745/100000 [27:18<11:46:49,  2.27it/s]

tensor(673916., grad_fn=<SumBackward0>)


  4%|▎         | 3749/100000 [27:20<11:48:03,  2.27it/s]

tensor(672728., grad_fn=<SumBackward0>)


  4%|▍         | 3788/100000 [27:37<11:31:17,  2.32it/s]

tensor(672594., grad_fn=<SumBackward0>)


  4%|▍         | 3791/100000 [27:38<11:32:09,  2.32it/s]

tensor(672014., grad_fn=<SumBackward0>)


  4%|▍         | 3867/100000 [28:11<11:34:47,  2.31it/s]

tensor(671968., grad_fn=<SumBackward0>)


  4%|▍         | 3869/100000 [28:12<11:35:22,  2.30it/s]

tensor(671712., grad_fn=<SumBackward0>)


  4%|▍         | 3896/100000 [28:24<11:34:09,  2.31it/s]

tensor(671484., grad_fn=<SumBackward0>)


  4%|▍         | 3899/100000 [28:25<11:34:12,  2.31it/s]

tensor(671482., grad_fn=<SumBackward0>)


  4%|▍         | 3901/100000 [28:26<11:34:24,  2.31it/s]

tensor(671350., grad_fn=<SumBackward0>)


  4%|▍         | 3902/100000 [28:26<11:34:42,  2.31it/s]

tensor(671242., grad_fn=<SumBackward0>)


  4%|▍         | 3905/100000 [28:28<11:33:50,  2.31it/s]

tensor(671232., grad_fn=<SumBackward0>)


  4%|▍         | 3907/100000 [28:29<11:33:22,  2.31it/s]

tensor(671192., grad_fn=<SumBackward0>)


  4%|▍         | 3909/100000 [28:30<11:33:11,  2.31it/s]

tensor(671108., grad_fn=<SumBackward0>)


  4%|▍         | 3910/100000 [28:30<11:33:53,  2.31it/s]

tensor(670782., grad_fn=<SumBackward0>)


  4%|▍         | 3923/100000 [28:36<11:35:36,  2.30it/s]

tensor(670380., grad_fn=<SumBackward0>)


  4%|▍         | 4010/100000 [29:14<11:34:46,  2.30it/s]

tensor(670272., grad_fn=<SumBackward0>)


  4%|▍         | 4015/100000 [29:16<11:34:17,  2.30it/s]

tensor(670262., grad_fn=<SumBackward0>)


  4%|▍         | 4022/100000 [29:19<11:34:07,  2.30it/s]

tensor(670252., grad_fn=<SumBackward0>)


  4%|▍         | 4024/100000 [29:20<11:34:33,  2.30it/s]

tensor(669908., grad_fn=<SumBackward0>)


  4%|▍         | 4025/100000 [29:20<12:13:00,  2.18it/s]

tensor(669340., grad_fn=<SumBackward0>)


  4%|▍         | 4028/100000 [29:22<13:16:06,  2.01it/s]

tensor(669186., grad_fn=<SumBackward0>)


  4%|▍         | 4070/100000 [29:40<11:43:23,  2.27it/s]

tensor(669060., grad_fn=<SumBackward0>)


  4%|▍         | 4076/100000 [29:43<11:42:54,  2.27it/s]

tensor(668938., grad_fn=<SumBackward0>)


  4%|▍         | 4130/100000 [30:06<11:44:09,  2.27it/s]

tensor(668848., grad_fn=<SumBackward0>)


  4%|▍         | 4131/100000 [30:07<11:43:46,  2.27it/s]

tensor(668452., grad_fn=<SumBackward0>)


  4%|▍         | 4132/100000 [30:07<11:43:11,  2.27it/s]

tensor(668360., grad_fn=<SumBackward0>)


  4%|▍         | 4151/100000 [30:16<11:40:20,  2.28it/s]

tensor(668326., grad_fn=<SumBackward0>)


  4%|▍         | 4152/100000 [30:16<11:40:45,  2.28it/s]

tensor(667694., grad_fn=<SumBackward0>)


  4%|▍         | 4156/100000 [30:18<11:40:40,  2.28it/s]

tensor(667586., grad_fn=<SumBackward0>)


  4%|▍         | 4161/100000 [30:20<11:39:14,  2.28it/s]

tensor(667142., grad_fn=<SumBackward0>)


  4%|▍         | 4166/100000 [30:22<11:39:24,  2.28it/s]

tensor(666998., grad_fn=<SumBackward0>)


  4%|▍         | 4177/100000 [30:27<11:36:07,  2.29it/s]

tensor(666832., grad_fn=<SumBackward0>)


  4%|▍         | 4250/100000 [30:59<11:34:52,  2.30it/s]

tensor(666602., grad_fn=<SumBackward0>)


  4%|▍         | 4252/100000 [31:00<11:35:56,  2.29it/s]

tensor(666238., grad_fn=<SumBackward0>)


  4%|▍         | 4264/100000 [31:05<11:39:37,  2.28it/s]

tensor(665958., grad_fn=<SumBackward0>)


  4%|▍         | 4295/100000 [31:18<11:30:51,  2.31it/s]

tensor(665876., grad_fn=<SumBackward0>)


  4%|▍         | 4312/100000 [31:26<11:34:05,  2.30it/s]

tensor(665814., grad_fn=<SumBackward0>)


  4%|▍         | 4313/100000 [31:27<11:33:46,  2.30it/s]

tensor(665460., grad_fn=<SumBackward0>)


  4%|▍         | 4346/100000 [31:41<11:33:22,  2.30it/s]

tensor(665372., grad_fn=<SumBackward0>)


  4%|▍         | 4355/100000 [31:45<11:32:01,  2.30it/s]

tensor(664806., grad_fn=<SumBackward0>)


  4%|▍         | 4424/100000 [32:15<11:32:13,  2.30it/s]

tensor(664798., grad_fn=<SumBackward0>)


  4%|▍         | 4425/100000 [32:16<11:32:14,  2.30it/s]

tensor(664700., grad_fn=<SumBackward0>)


  4%|▍         | 4427/100000 [32:16<11:32:36,  2.30it/s]

tensor(664570., grad_fn=<SumBackward0>)


  4%|▍         | 4429/100000 [32:17<11:32:44,  2.30it/s]

tensor(664470., grad_fn=<SumBackward0>)


  4%|▍         | 4436/100000 [32:20<11:32:54,  2.30it/s]

tensor(664320., grad_fn=<SumBackward0>)


  4%|▍         | 4437/100000 [32:21<11:32:57,  2.30it/s]

tensor(664272., grad_fn=<SumBackward0>)


  4%|▍         | 4438/100000 [32:21<11:32:49,  2.30it/s]

tensor(664042., grad_fn=<SumBackward0>)


  4%|▍         | 4441/100000 [32:22<11:32:38,  2.30it/s]

tensor(663786., grad_fn=<SumBackward0>)


  4%|▍         | 4464/100000 [32:32<11:30:57,  2.30it/s]

tensor(663562., grad_fn=<SumBackward0>)


  4%|▍         | 4469/100000 [32:35<11:30:36,  2.31it/s]

tensor(663450., grad_fn=<SumBackward0>)


  4%|▍         | 4470/100000 [32:35<11:31:07,  2.30it/s]

tensor(663084., grad_fn=<SumBackward0>)


  4%|▍         | 4476/100000 [32:38<11:36:09,  2.29it/s]

tensor(662948., grad_fn=<SumBackward0>)


  5%|▍         | 4506/100000 [32:51<11:30:35,  2.30it/s]

tensor(662838., grad_fn=<SumBackward0>)


  5%|▍         | 4507/100000 [32:51<11:31:14,  2.30it/s]

tensor(662830., grad_fn=<SumBackward0>)


  5%|▍         | 4510/100000 [32:52<11:31:52,  2.30it/s]

tensor(662722., grad_fn=<SumBackward0>)


  5%|▍         | 4513/100000 [32:54<11:31:21,  2.30it/s]

tensor(662636., grad_fn=<SumBackward0>)


  5%|▍         | 4514/100000 [32:54<11:31:34,  2.30it/s]

tensor(661978., grad_fn=<SumBackward0>)


  5%|▍         | 4522/100000 [32:58<11:30:58,  2.30it/s]

tensor(661740., grad_fn=<SumBackward0>)


  5%|▍         | 4629/100000 [33:45<11:30:02,  2.30it/s]

tensor(661690., grad_fn=<SumBackward0>)


  5%|▍         | 4630/100000 [33:45<11:29:48,  2.30it/s]

tensor(661502., grad_fn=<SumBackward0>)


  5%|▍         | 4653/100000 [33:55<11:29:25,  2.30it/s]

tensor(661420., grad_fn=<SumBackward0>)


  5%|▍         | 4654/100000 [33:55<11:30:53,  2.30it/s]

tensor(661388., grad_fn=<SumBackward0>)


  5%|▍         | 4655/100000 [33:56<11:31:07,  2.30it/s]

tensor(661236., grad_fn=<SumBackward0>)


  5%|▍         | 4657/100000 [33:57<11:29:57,  2.30it/s]

tensor(661060., grad_fn=<SumBackward0>)


  5%|▍         | 4658/100000 [33:57<11:30:10,  2.30it/s]

tensor(660664., grad_fn=<SumBackward0>)


  5%|▍         | 4661/100000 [33:58<11:29:57,  2.30it/s]

tensor(660420., grad_fn=<SumBackward0>)


  5%|▍         | 4665/100000 [34:00<11:29:23,  2.30it/s]

tensor(659892., grad_fn=<SumBackward0>)


  5%|▍         | 4744/100000 [34:35<11:37:35,  2.28it/s]

tensor(659870., grad_fn=<SumBackward0>)


  5%|▍         | 4745/100000 [34:35<11:37:32,  2.28it/s]

tensor(659818., grad_fn=<SumBackward0>)


  5%|▍         | 4766/100000 [34:44<11:34:35,  2.29it/s]

tensor(659496., grad_fn=<SumBackward0>)


  5%|▍         | 4768/100000 [34:45<11:35:18,  2.28it/s]

tensor(659072., grad_fn=<SumBackward0>)


  5%|▍         | 4780/100000 [34:50<11:34:15,  2.29it/s]

tensor(658988., grad_fn=<SumBackward0>)


  5%|▍         | 4803/100000 [35:01<11:34:13,  2.29it/s]

tensor(658732., grad_fn=<SumBackward0>)


  5%|▍         | 4809/100000 [35:03<11:44:21,  2.25it/s]

tensor(658394., grad_fn=<SumBackward0>)


  5%|▍         | 4838/100000 [35:16<11:34:18,  2.28it/s]

tensor(658038., grad_fn=<SumBackward0>)


  5%|▍         | 4840/100000 [35:17<11:35:36,  2.28it/s]

tensor(657962., grad_fn=<SumBackward0>)


  5%|▍         | 4844/100000 [35:19<11:34:31,  2.28it/s]

tensor(657858., grad_fn=<SumBackward0>)


  5%|▍         | 4845/100000 [35:19<11:35:00,  2.28it/s]

tensor(657354., grad_fn=<SumBackward0>)


  5%|▍         | 4962/100000 [36:10<11:27:29,  2.30it/s]

tensor(657296., grad_fn=<SumBackward0>)


  5%|▍         | 4965/100000 [36:11<11:27:54,  2.30it/s]

tensor(657242., grad_fn=<SumBackward0>)


  5%|▍         | 4966/100000 [36:12<11:28:04,  2.30it/s]

tensor(657054., grad_fn=<SumBackward0>)


  5%|▍         | 4967/100000 [36:12<11:27:17,  2.30it/s]

tensor(656584., grad_fn=<SumBackward0>)


  5%|▍         | 4979/100000 [36:17<11:25:51,  2.31it/s]

tensor(656532., grad_fn=<SumBackward0>)


  5%|▌         | 5030/100000 [36:40<11:27:01,  2.30it/s]

tensor(656498., grad_fn=<SumBackward0>)


  5%|▌         | 5037/100000 [36:43<11:26:43,  2.30it/s]

tensor(656452., grad_fn=<SumBackward0>)


  5%|▌         | 5061/100000 [36:53<11:26:35,  2.30it/s]

tensor(656108., grad_fn=<SumBackward0>)


  5%|▌         | 5064/100000 [36:54<11:26:02,  2.31it/s]

tensor(656080., grad_fn=<SumBackward0>)


  5%|▌         | 5066/100000 [36:55<11:26:00,  2.31it/s]

tensor(656064., grad_fn=<SumBackward0>)


  5%|▌         | 5093/100000 [37:07<11:27:35,  2.30it/s]

tensor(655618., grad_fn=<SumBackward0>)


  5%|▌         | 5098/100000 [37:09<11:27:41,  2.30it/s]

tensor(655252., grad_fn=<SumBackward0>)


  5%|▌         | 5100/100000 [37:10<11:27:23,  2.30it/s]

tensor(654990., grad_fn=<SumBackward0>)


  5%|▌         | 5101/100000 [37:10<11:27:43,  2.30it/s]

tensor(654640., grad_fn=<SumBackward0>)


  5%|▌         | 5122/100000 [37:20<11:26:08,  2.30it/s]

tensor(654450., grad_fn=<SumBackward0>)


  5%|▌         | 5123/100000 [37:20<11:41:50,  2.25it/s]

tensor(653680., grad_fn=<SumBackward0>)


  5%|▌         | 5235/100000 [38:09<11:23:56,  2.31it/s]

tensor(653538., grad_fn=<SumBackward0>)


  5%|▌         | 5237/100000 [38:10<11:25:02,  2.31it/s]

tensor(652994., grad_fn=<SumBackward0>)


  5%|▌         | 5240/100000 [38:11<11:25:25,  2.30it/s]

tensor(652714., grad_fn=<SumBackward0>)


  5%|▌         | 5263/100000 [38:21<11:23:31,  2.31it/s]

tensor(652604., grad_fn=<SumBackward0>)


  5%|▌         | 5290/100000 [38:33<11:28:35,  2.29it/s]

tensor(652284., grad_fn=<SumBackward0>)


  5%|▌         | 5315/100000 [38:44<11:29:04,  2.29it/s]

tensor(651916., grad_fn=<SumBackward0>)


  5%|▌         | 5323/100000 [38:47<11:28:56,  2.29it/s]

tensor(651706., grad_fn=<SumBackward0>)


  5%|▌         | 5368/100000 [39:07<11:28:44,  2.29it/s]

tensor(651112., grad_fn=<SumBackward0>)


  5%|▌         | 5371/100000 [39:08<11:28:25,  2.29it/s]

tensor(651084., grad_fn=<SumBackward0>)


  5%|▌         | 5372/100000 [39:09<11:28:19,  2.29it/s]

tensor(650824., grad_fn=<SumBackward0>)


  5%|▌         | 5374/100000 [39:10<11:27:47,  2.29it/s]

tensor(650690., grad_fn=<SumBackward0>)


  5%|▌         | 5394/100000 [39:18<11:27:34,  2.29it/s]

tensor(650608., grad_fn=<SumBackward0>)


  5%|▌         | 5441/100000 [39:40<11:32:49,  2.27it/s]

tensor(650532., grad_fn=<SumBackward0>)


  5%|▌         | 5450/100000 [39:43<11:33:04,  2.27it/s]

tensor(650110., grad_fn=<SumBackward0>)


  5%|▌         | 5497/100000 [40:04<11:31:00,  2.28it/s]

tensor(649804., grad_fn=<SumBackward0>)


  5%|▌         | 5498/100000 [40:05<11:31:15,  2.28it/s]

tensor(649794., grad_fn=<SumBackward0>)


  5%|▌         | 5499/100000 [40:05<11:30:54,  2.28it/s]

tensor(649682., grad_fn=<SumBackward0>)


  6%|▌         | 5518/100000 [40:13<11:30:26,  2.28it/s]

tensor(649482., grad_fn=<SumBackward0>)


  6%|▌         | 5520/100000 [40:14<11:31:06,  2.28it/s]

tensor(649236., grad_fn=<SumBackward0>)


  6%|▌         | 5521/100000 [40:15<11:31:37,  2.28it/s]

tensor(648792., grad_fn=<SumBackward0>)


  6%|▌         | 5546/100000 [40:26<11:29:38,  2.28it/s]

tensor(648650., grad_fn=<SumBackward0>)


  6%|▌         | 5587/100000 [40:44<11:29:43,  2.28it/s]

tensor(648564., grad_fn=<SumBackward0>)


  6%|▌         | 5629/100000 [41:02<11:30:09,  2.28it/s]

tensor(648510., grad_fn=<SumBackward0>)


  6%|▌         | 5632/100000 [41:03<11:30:01,  2.28it/s]

tensor(648304., grad_fn=<SumBackward0>)


  6%|▌         | 5634/100000 [41:04<11:30:13,  2.28it/s]

tensor(647934., grad_fn=<SumBackward0>)


  6%|▌         | 5636/100000 [41:05<11:30:59,  2.28it/s]

tensor(647566., grad_fn=<SumBackward0>)


  6%|▌         | 5655/100000 [41:13<11:30:36,  2.28it/s]

tensor(647396., grad_fn=<SumBackward0>)


  6%|▌         | 5657/100000 [41:14<11:32:38,  2.27it/s]

tensor(647252., grad_fn=<SumBackward0>)


  6%|▌         | 5659/100000 [41:15<11:32:27,  2.27it/s]

tensor(647112., grad_fn=<SumBackward0>)


  6%|▌         | 5661/100000 [41:16<11:31:18,  2.27it/s]

tensor(647072., grad_fn=<SumBackward0>)


  6%|▌         | 5699/100000 [41:33<11:30:17,  2.28it/s]

tensor(646794., grad_fn=<SumBackward0>)


  6%|▌         | 5782/100000 [42:09<11:19:58,  2.31it/s]

tensor(646790., grad_fn=<SumBackward0>)


  6%|▌         | 5783/100000 [42:09<11:20:21,  2.31it/s]

tensor(646576., grad_fn=<SumBackward0>)


  6%|▌         | 5784/100000 [42:10<11:20:40,  2.31it/s]

tensor(646462., grad_fn=<SumBackward0>)


  6%|▌         | 5786/100000 [42:11<11:20:09,  2.31it/s]

tensor(645852., grad_fn=<SumBackward0>)


  6%|▌         | 5823/100000 [42:27<11:30:59,  2.27it/s]

tensor(645538., grad_fn=<SumBackward0>)


  6%|▌         | 5824/100000 [42:28<11:31:29,  2.27it/s]

tensor(645498., grad_fn=<SumBackward0>)


  6%|▌         | 5828/100000 [42:29<11:33:26,  2.26it/s]

tensor(644864., grad_fn=<SumBackward0>)


  6%|▌         | 5848/100000 [42:38<11:30:10,  2.27it/s]

tensor(644410., grad_fn=<SumBackward0>)


  6%|▌         | 5922/100000 [43:11<11:28:58,  2.28it/s]

tensor(644158., grad_fn=<SumBackward0>)


  6%|▌         | 5943/100000 [43:20<11:55:29,  2.19it/s]

tensor(643742., grad_fn=<SumBackward0>)


  6%|▌         | 5945/100000 [43:21<14:30:32,  1.80it/s]

tensor(643676., grad_fn=<SumBackward0>)


  6%|▌         | 5965/100000 [43:30<11:27:23,  2.28it/s]

tensor(643438., grad_fn=<SumBackward0>)


  6%|▌         | 5983/100000 [43:38<11:27:32,  2.28it/s]

tensor(643434., grad_fn=<SumBackward0>)


  6%|▌         | 5984/100000 [43:39<11:27:55,  2.28it/s]

tensor(643376., grad_fn=<SumBackward0>)


  6%|▌         | 6018/100000 [43:53<11:27:48,  2.28it/s]

tensor(642858., grad_fn=<SumBackward0>)


  6%|▌         | 6055/100000 [44:10<11:22:29,  2.29it/s]

tensor(642310., grad_fn=<SumBackward0>)


  6%|▌         | 6058/100000 [44:11<11:20:56,  2.30it/s]

tensor(642214., grad_fn=<SumBackward0>)


  6%|▌         | 6146/100000 [44:49<11:29:22,  2.27it/s]

tensor(641818., grad_fn=<SumBackward0>)


  6%|▌         | 6198/100000 [45:12<11:17:17,  2.31it/s]

tensor(641762., grad_fn=<SumBackward0>)


  6%|▌         | 6210/100000 [45:17<11:39:03,  2.24it/s]

tensor(641620., grad_fn=<SumBackward0>)


  6%|▌         | 6211/100000 [45:18<11:33:44,  2.25it/s]

tensor(641066., grad_fn=<SumBackward0>)


  6%|▌         | 6212/100000 [45:18<11:29:06,  2.27it/s]

tensor(641030., grad_fn=<SumBackward0>)


  6%|▌         | 6245/100000 [45:33<11:23:35,  2.29it/s]

tensor(640932., grad_fn=<SumBackward0>)


  6%|▌         | 6246/100000 [45:33<11:24:55,  2.28it/s]

tensor(640726., grad_fn=<SumBackward0>)


  6%|▌         | 6248/100000 [45:34<11:23:23,  2.29it/s]

tensor(640290., grad_fn=<SumBackward0>)


  6%|▌         | 6249/100000 [45:35<11:23:43,  2.29it/s]

tensor(640230., grad_fn=<SumBackward0>)


  6%|▋         | 6278/100000 [45:47<11:21:04,  2.29it/s]

tensor(639762., grad_fn=<SumBackward0>)


  6%|▋         | 6279/100000 [45:48<11:20:56,  2.29it/s]

tensor(639462., grad_fn=<SumBackward0>)


  6%|▋         | 6293/100000 [45:54<11:21:41,  2.29it/s]

tensor(638986., grad_fn=<SumBackward0>)


  6%|▋         | 6325/100000 [46:08<11:21:30,  2.29it/s]

tensor(638798., grad_fn=<SumBackward0>)


  6%|▋         | 6326/100000 [46:08<11:21:58,  2.29it/s]

tensor(638458., grad_fn=<SumBackward0>)


  6%|▋         | 6327/100000 [46:09<11:22:26,  2.29it/s]

tensor(638054., grad_fn=<SumBackward0>)


  6%|▋         | 6403/100000 [46:42<11:20:32,  2.29it/s]

tensor(637934., grad_fn=<SumBackward0>)


  6%|▋         | 6444/100000 [47:00<11:20:55,  2.29it/s]

tensor(637438., grad_fn=<SumBackward0>)


  6%|▋         | 6446/100000 [47:01<11:21:03,  2.29it/s]

tensor(637032., grad_fn=<SumBackward0>)


  6%|▋         | 6448/100000 [47:02<11:21:41,  2.29it/s]

tensor(636688., grad_fn=<SumBackward0>)


  7%|▋         | 6507/100000 [47:28<11:20:44,  2.29it/s]

tensor(636160., grad_fn=<SumBackward0>)


  7%|▋         | 6576/100000 [47:58<11:19:17,  2.29it/s]

tensor(634980., grad_fn=<SumBackward0>)


  7%|▋         | 6672/100000 [48:40<11:16:33,  2.30it/s]

tensor(634922., grad_fn=<SumBackward0>)


  7%|▋         | 6687/100000 [48:47<11:16:50,  2.30it/s]

tensor(634662., grad_fn=<SumBackward0>)


  7%|▋         | 6710/100000 [48:57<11:17:57,  2.29it/s]

tensor(634542., grad_fn=<SumBackward0>)


  7%|▋         | 6734/100000 [49:07<11:17:01,  2.30it/s]

tensor(634280., grad_fn=<SumBackward0>)


  7%|▋         | 6746/100000 [49:12<11:16:58,  2.30it/s]

tensor(634072., grad_fn=<SumBackward0>)


  7%|▋         | 6749/100000 [49:14<11:17:06,  2.30it/s]

tensor(633686., grad_fn=<SumBackward0>)


  7%|▋         | 6770/100000 [49:23<11:49:35,  2.19it/s]

tensor(633500., grad_fn=<SumBackward0>)


  7%|▋         | 6771/100000 [49:24<11:41:40,  2.21it/s]

tensor(633034., grad_fn=<SumBackward0>)


  7%|▋         | 6773/100000 [49:25<11:32:14,  2.24it/s]

tensor(632740., grad_fn=<SumBackward0>)


  7%|▋         | 6810/100000 [49:41<11:20:20,  2.28it/s]

tensor(632370., grad_fn=<SumBackward0>)


  7%|▋         | 6895/100000 [50:18<11:19:36,  2.28it/s]

tensor(632276., grad_fn=<SumBackward0>)


  7%|▋         | 6919/100000 [50:29<11:13:24,  2.30it/s]

tensor(632196., grad_fn=<SumBackward0>)


  7%|▋         | 6995/100000 [51:02<11:15:36,  2.29it/s]

tensor(631410., grad_fn=<SumBackward0>)


  7%|▋         | 6997/100000 [51:02<11:15:37,  2.29it/s]

tensor(630912., grad_fn=<SumBackward0>)


  7%|▋         | 6998/100000 [51:03<11:17:49,  2.29it/s]

tensor(630508., grad_fn=<SumBackward0>)


  7%|▋         | 7157/100000 [52:13<11:14:42,  2.29it/s]

tensor(630134., grad_fn=<SumBackward0>)


  7%|▋         | 7350/100000 [53:37<11:10:56,  2.30it/s]

tensor(629564., grad_fn=<SumBackward0>)


  7%|▋         | 7396/100000 [53:57<11:09:39,  2.30it/s]

tensor(629514., grad_fn=<SumBackward0>)


  7%|▋         | 7415/100000 [54:05<11:10:24,  2.30it/s]

tensor(629278., grad_fn=<SumBackward0>)


  7%|▋         | 7419/100000 [54:07<11:11:01,  2.30it/s]

tensor(628470., grad_fn=<SumBackward0>)


  7%|▋         | 7447/100000 [54:19<11:12:36,  2.29it/s]

tensor(628360., grad_fn=<SumBackward0>)


  8%|▊         | 7500/100000 [54:42<11:10:11,  2.30it/s]

tensor(627966., grad_fn=<SumBackward0>)


  8%|▊         | 7576/100000 [55:15<11:09:32,  2.30it/s]

tensor(627768., grad_fn=<SumBackward0>)


  8%|▊         | 7595/100000 [55:24<11:27:25,  2.24it/s]

tensor(627704., grad_fn=<SumBackward0>)


  8%|▊         | 7598/100000 [55:26<11:16:03,  2.28it/s]

tensor(627690., grad_fn=<SumBackward0>)


  8%|▊         | 7600/100000 [55:26<11:13:25,  2.29it/s]

tensor(627110., grad_fn=<SumBackward0>)


  8%|▊         | 7648/100000 [55:47<11:09:27,  2.30it/s]

tensor(626792., grad_fn=<SumBackward0>)


  9%|▉         | 8765/100000 [1:03:59<11:04:29,  2.29it/s]

tensor(626130., grad_fn=<SumBackward0>)


  9%|▉         | 8774/100000 [1:04:03<11:04:39,  2.29it/s]

tensor(625536., grad_fn=<SumBackward0>)


  9%|▉         | 8783/100000 [1:04:06<11:05:12,  2.29it/s]

tensor(625412., grad_fn=<SumBackward0>)


  9%|▉         | 8791/100000 [1:04:10<11:03:32,  2.29it/s]

tensor(625158., grad_fn=<SumBackward0>)


  9%|▉         | 8792/100000 [1:04:10<11:04:47,  2.29it/s]

tensor(624622., grad_fn=<SumBackward0>)


  9%|▉         | 8801/100000 [1:04:14<11:05:55,  2.28it/s]

tensor(624468., grad_fn=<SumBackward0>)


  9%|▉         | 8803/100000 [1:04:15<11:05:11,  2.29it/s]

tensor(623216., grad_fn=<SumBackward0>)


  9%|▉         | 8804/100000 [1:04:16<11:05:28,  2.28it/s]

tensor(623124., grad_fn=<SumBackward0>)


  9%|▉         | 8810/100000 [1:04:18<11:03:34,  2.29it/s]

tensor(622744., grad_fn=<SumBackward0>)


  9%|▉         | 8817/100000 [1:04:21<10:59:42,  2.30it/s]

tensor(622730., grad_fn=<SumBackward0>)


  9%|▉         | 8819/100000 [1:04:22<11:01:52,  2.30it/s]

tensor(621590., grad_fn=<SumBackward0>)


  9%|▉         | 8831/100000 [1:04:27<10:59:26,  2.30it/s]

tensor(620964., grad_fn=<SumBackward0>)


  9%|▉         | 8832/100000 [1:04:28<10:59:53,  2.30it/s]

tensor(620292., grad_fn=<SumBackward0>)


  9%|▉         | 8835/100000 [1:04:29<10:58:34,  2.31it/s]

tensor(619852., grad_fn=<SumBackward0>)


  9%|▉         | 8841/100000 [1:04:32<10:57:49,  2.31it/s]

tensor(619424., grad_fn=<SumBackward0>)


  9%|▉         | 8845/100000 [1:04:33<10:58:52,  2.31it/s]

tensor(619336., grad_fn=<SumBackward0>)


  9%|▉         | 8848/100000 [1:04:35<10:59:09,  2.30it/s]

tensor(618968., grad_fn=<SumBackward0>)


  9%|▉         | 8849/100000 [1:04:35<10:59:51,  2.30it/s]

tensor(618590., grad_fn=<SumBackward0>)


  9%|▉         | 8854/100000 [1:04:37<10:59:48,  2.30it/s]

tensor(617746., grad_fn=<SumBackward0>)


  9%|▉         | 8863/100000 [1:04:41<10:59:05,  2.30it/s]

tensor(617390., grad_fn=<SumBackward0>)


  9%|▉         | 8865/100000 [1:04:42<11:00:19,  2.30it/s]

tensor(617340., grad_fn=<SumBackward0>)


  9%|▉         | 8867/100000 [1:04:43<11:00:36,  2.30it/s]

tensor(617316., grad_fn=<SumBackward0>)


  9%|▉         | 8868/100000 [1:04:43<11:00:13,  2.30it/s]

tensor(616596., grad_fn=<SumBackward0>)


  9%|▉         | 8870/100000 [1:04:44<11:01:13,  2.30it/s]

tensor(616492., grad_fn=<SumBackward0>)


  9%|▉         | 8873/100000 [1:04:46<11:00:43,  2.30it/s]

tensor(616362., grad_fn=<SumBackward0>)


  9%|▉         | 8874/100000 [1:04:46<11:00:36,  2.30it/s]

tensor(616360., grad_fn=<SumBackward0>)


  9%|▉         | 8875/100000 [1:04:47<11:00:58,  2.30it/s]

tensor(615724., grad_fn=<SumBackward0>)


  9%|▉         | 8880/100000 [1:04:49<11:00:38,  2.30it/s]

tensor(614540., grad_fn=<SumBackward0>)


  9%|▉         | 8887/100000 [1:04:52<11:00:32,  2.30it/s]

tensor(614538., grad_fn=<SumBackward0>)


  9%|▉         | 8890/100000 [1:04:53<10:59:31,  2.30it/s]

tensor(614060., grad_fn=<SumBackward0>)


  9%|▉         | 8891/100000 [1:04:53<10:59:45,  2.30it/s]

tensor(613848., grad_fn=<SumBackward0>)


  9%|▉         | 8896/100000 [1:04:56<10:59:24,  2.30it/s]

tensor(613210., grad_fn=<SumBackward0>)


  9%|▉         | 8897/100000 [1:04:56<10:59:41,  2.30it/s]

tensor(612012., grad_fn=<SumBackward0>)


  9%|▉         | 8909/100000 [1:05:01<10:59:48,  2.30it/s]

tensor(611980., grad_fn=<SumBackward0>)


  9%|▉         | 8910/100000 [1:05:02<11:00:06,  2.30it/s]

tensor(611972., grad_fn=<SumBackward0>)


  9%|▉         | 8911/100000 [1:05:02<10:59:59,  2.30it/s]

tensor(611212., grad_fn=<SumBackward0>)


  9%|▉         | 8921/100000 [1:05:07<10:59:13,  2.30it/s]

tensor(610380., grad_fn=<SumBackward0>)


  9%|▉         | 8923/100000 [1:05:07<11:00:56,  2.30it/s]

tensor(609402., grad_fn=<SumBackward0>)


  9%|▉         | 8939/100000 [1:05:14<11:03:53,  2.29it/s]

tensor(608974., grad_fn=<SumBackward0>)


  9%|▉         | 8952/100000 [1:05:20<11:59:44,  2.11it/s]

tensor(608558., grad_fn=<SumBackward0>)


  9%|▉         | 8965/100000 [1:05:26<11:11:36,  2.26it/s]

tensor(607534., grad_fn=<SumBackward0>)


  9%|▉         | 8975/100000 [1:05:31<11:00:29,  2.30it/s]

tensor(607432., grad_fn=<SumBackward0>)


  9%|▉         | 8989/100000 [1:05:37<10:59:26,  2.30it/s]

tensor(607226., grad_fn=<SumBackward0>)


  9%|▉         | 9004/100000 [1:05:43<10:59:40,  2.30it/s]

tensor(606190., grad_fn=<SumBackward0>)


  9%|▉         | 9023/100000 [1:05:52<10:59:37,  2.30it/s]

tensor(605602., grad_fn=<SumBackward0>)


  9%|▉         | 9048/100000 [1:06:02<10:58:45,  2.30it/s]

tensor(605566., grad_fn=<SumBackward0>)


  9%|▉         | 9055/100000 [1:06:05<11:01:46,  2.29it/s]

tensor(605466., grad_fn=<SumBackward0>)


  9%|▉         | 9062/100000 [1:06:09<11:01:53,  2.29it/s]

tensor(605426., grad_fn=<SumBackward0>)


 10%|▉         | 9611/100000 [1:10:10<10:58:37,  2.29it/s]

tensor(604724., grad_fn=<SumBackward0>)


 10%|▉         | 9612/100000 [1:10:10<10:59:03,  2.29it/s]

tensor(604496., grad_fn=<SumBackward0>)


 10%|▉         | 9632/100000 [1:10:19<10:59:26,  2.28it/s]

tensor(604454., grad_fn=<SumBackward0>)


 10%|▉         | 9647/100000 [1:10:26<11:04:13,  2.27it/s]

tensor(604292., grad_fn=<SumBackward0>)


 10%|▉         | 9668/100000 [1:10:35<11:03:41,  2.27it/s]

tensor(603906., grad_fn=<SumBackward0>)


 10%|▉         | 9671/100000 [1:10:36<11:08:24,  2.25it/s]

tensor(603844., grad_fn=<SumBackward0>)


 10%|▉         | 9679/100000 [1:10:40<10:53:11,  2.30it/s]

tensor(602138., grad_fn=<SumBackward0>)


 23%|██▎       | 22809/100000 [2:46:45<9:20:55,  2.29it/s] 

tensor(601790., grad_fn=<SumBackward0>)


 23%|██▎       | 23029/100000 [2:48:21<9:20:08,  2.29it/s] 

tensor(601704., grad_fn=<SumBackward0>)


 23%|██▎       | 23351/100000 [2:50:43<9:16:36,  2.30it/s] 

tensor(601496., grad_fn=<SumBackward0>)


 24%|██▍       | 24031/100000 [2:55:42<9:10:11,  2.30it/s] 

tensor(601168., grad_fn=<SumBackward0>)


 24%|██▍       | 24165/100000 [2:56:40<9:16:44,  2.27it/s]

tensor(601052., grad_fn=<SumBackward0>)


 24%|██▍       | 24448/100000 [2:58:45<9:09:27,  2.29it/s] 

tensor(601006., grad_fn=<SumBackward0>)


 25%|██▍       | 24816/100000 [3:01:29<9:07:41,  2.29it/s] 

tensor(600888., grad_fn=<SumBackward0>)


 25%|██▍       | 24889/100000 [3:02:01<9:06:37,  2.29it/s]

tensor(600640., grad_fn=<SumBackward0>)


 25%|██▌       | 25470/100000 [3:06:15<9:01:24,  2.29it/s] 

tensor(600616., grad_fn=<SumBackward0>)


 26%|██▌       | 25535/100000 [3:06:44<9:01:08,  2.29it/s]

tensor(600110., grad_fn=<SumBackward0>)


 26%|██▌       | 25831/100000 [3:08:53<8:57:27,  2.30it/s] 

tensor(599886., grad_fn=<SumBackward0>)


 26%|██▌       | 25860/100000 [3:09:06<8:58:25,  2.29it/s]

tensor(599824., grad_fn=<SumBackward0>)


 26%|██▌       | 26016/100000 [3:10:14<9:03:01,  2.27it/s] 

tensor(599654., grad_fn=<SumBackward0>)


 27%|██▋       | 26909/100000 [3:16:45<8:48:59,  2.30it/s] 

tensor(599306., grad_fn=<SumBackward0>)


 27%|██▋       | 27050/100000 [3:17:47<8:51:07,  2.29it/s] 

tensor(599304., grad_fn=<SumBackward0>)


 27%|██▋       | 27078/100000 [3:17:59<8:51:07,  2.29it/s]

tensor(599216., grad_fn=<SumBackward0>)


 27%|██▋       | 27320/100000 [3:19:46<8:47:56,  2.29it/s] 

tensor(598790., grad_fn=<SumBackward0>)


 27%|██▋       | 27377/100000 [3:20:11<8:53:57,  2.27it/s]

tensor(598730., grad_fn=<SumBackward0>)


 28%|██▊       | 27967/100000 [3:24:30<8:40:55,  2.30it/s] 

tensor(598582., grad_fn=<SumBackward0>)


 28%|██▊       | 28043/100000 [3:25:03<8:43:45,  2.29it/s]

tensor(598494., grad_fn=<SumBackward0>)


 28%|██▊       | 28130/100000 [3:25:42<8:42:37,  2.29it/s] 

tensor(598490., grad_fn=<SumBackward0>)


 28%|██▊       | 28200/100000 [3:26:12<8:42:15,  2.29it/s]

tensor(598026., grad_fn=<SumBackward0>)


 28%|██▊       | 28211/100000 [3:26:17<8:42:21,  2.29it/s]

tensor(597304., grad_fn=<SumBackward0>)


 29%|██▉       | 28869/100000 [3:31:05<8:35:53,  2.30it/s] 

tensor(597282., grad_fn=<SumBackward0>)


 30%|██▉       | 29808/100000 [3:37:57<8:28:24,  2.30it/s] 

tensor(596866., grad_fn=<SumBackward0>)


 31%|███       | 30540/100000 [3:43:17<8:27:05,  2.28it/s] 

tensor(596788., grad_fn=<SumBackward0>)


 31%|███       | 30832/100000 [3:45:26<8:28:22,  2.27it/s] 

tensor(596252., grad_fn=<SumBackward0>)


 31%|███       | 31224/100000 [3:48:18<8:20:04,  2.29it/s]

tensor(596058., grad_fn=<SumBackward0>)


 32%|███▏      | 31861/100000 [3:52:57<8:16:48,  2.29it/s] 

tensor(595990., grad_fn=<SumBackward0>)


 33%|███▎      | 32543/100000 [3:57:57<8:14:18,  2.27it/s] 

tensor(595974., grad_fn=<SumBackward0>)


 33%|███▎      | 32969/100000 [4:01:10<9:03:26,  2.06it/s] 

tensor(595634., grad_fn=<SumBackward0>)


 33%|███▎      | 33044/100000 [4:01:43<8:08:16,  2.29it/s] 

tensor(595008., grad_fn=<SumBackward0>)


 33%|███▎      | 33081/100000 [4:01:59<8:07:41,  2.29it/s]

tensor(594932., grad_fn=<SumBackward0>)


 33%|███▎      | 33105/100000 [4:02:10<8:03:25,  2.31it/s]

tensor(594892., grad_fn=<SumBackward0>)


 34%|███▎      | 33589/100000 [4:05:41<7:59:22,  2.31it/s] 

tensor(594794., grad_fn=<SumBackward0>)


 34%|███▍      | 33806/100000 [4:07:22<9:54:24,  1.86it/s] 

tensor(594492., grad_fn=<SumBackward0>)


 34%|███▍      | 34294/100000 [4:11:11<8:03:36,  2.26it/s] 

tensor(594362., grad_fn=<SumBackward0>)


 35%|███▍      | 34840/100000 [4:15:13<7:59:28,  2.26it/s]

tensor(594324., grad_fn=<SumBackward0>)


 35%|███▍      | 34963/100000 [4:16:08<8:00:30,  2.26it/s]

tensor(594278., grad_fn=<SumBackward0>)


 35%|███▌      | 35011/100000 [4:16:29<8:05:45,  2.23it/s]

tensor(592966., grad_fn=<SumBackward0>)


 40%|███▉      | 39913/100000 [4:52:44<7:23:40,  2.26it/s] 

tensor(592838., grad_fn=<SumBackward0>)


 41%|████      | 40703/100000 [4:58:35<7:20:56,  2.24it/s]

tensor(592370., grad_fn=<SumBackward0>)


 41%|████▏     | 41470/100000 [5:04:17<7:09:34,  2.27it/s]

tensor(592186., grad_fn=<SumBackward0>)


 42%|████▏     | 41918/100000 [5:07:35<7:09:24,  2.25it/s]

tensor(592154., grad_fn=<SumBackward0>)


 42%|████▏     | 42256/100000 [5:10:05<7:04:06,  2.27it/s]

tensor(592036., grad_fn=<SumBackward0>)


 43%|████▎     | 42508/100000 [5:11:57<7:04:18,  2.26it/s]

tensor(591914., grad_fn=<SumBackward0>)


 43%|████▎     | 42540/100000 [5:12:11<7:04:08,  2.26it/s]

tensor(591902., grad_fn=<SumBackward0>)


 43%|████▎     | 42893/100000 [5:14:47<6:58:54,  2.27it/s]

tensor(591860., grad_fn=<SumBackward0>)


 43%|████▎     | 43010/100000 [5:15:39<6:57:52,  2.27it/s]

tensor(591610., grad_fn=<SumBackward0>)


 45%|████▍     | 44934/100000 [5:29:51<6:41:37,  2.29it/s]

tensor(591384., grad_fn=<SumBackward0>)


 45%|████▌     | 45034/100000 [5:30:35<6:41:00,  2.28it/s]

tensor(591308., grad_fn=<SumBackward0>)


 46%|████▌     | 46057/100000 [5:38:06<6:34:14,  2.28it/s]

tensor(591124., grad_fn=<SumBackward0>)


 47%|████▋     | 46875/100000 [5:44:08<6:33:16,  2.25it/s]

tensor(591024., grad_fn=<SumBackward0>)


 49%|████▊     | 48506/100000 [5:56:09<6:17:21,  2.27it/s]

tensor(590850., grad_fn=<SumBackward0>)


 49%|████▉     | 48786/100000 [5:58:12<6:14:57,  2.28it/s]

tensor(590688., grad_fn=<SumBackward0>)


 50%|████▉     | 49835/100000 [6:05:58<6:06:35,  2.28it/s]

tensor(589912., grad_fn=<SumBackward0>)


 53%|█████▎    | 52580/100000 [6:26:12<5:53:34,  2.24it/s]

tensor(589904., grad_fn=<SumBackward0>)


 53%|█████▎    | 52831/100000 [6:28:04<5:51:14,  2.24it/s]

tensor(589852., grad_fn=<SumBackward0>)


 53%|█████▎    | 53272/100000 [6:31:21<5:43:17,  2.27it/s]


KeyboardInterrupt: 