In [99]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import os, sys, pathlib, random, time, pickle, copy
from tqdm import tqdm

In [100]:
device = torch.device("cuda:1")
# device = torch.device("cpu")

In [101]:
import torch.optim as optim
from torch.utils import data

In [102]:
import nflib
from nflib.flows import SequentialFlow, NormalizingFlow, ActNorm, ActNorm2D, AffineConstantFlow
import nflib.coupling_flows as icf
import nflib.inn_flow as inn
import nflib.res_flow as irf

### Datasets

In [103]:
# cifar_train = transforms.Compose([
#     transforms.RandomCrop(size=32, padding=4),
#     transforms.RandomHorizontalFlip(),
#     transforms.ToTensor(),
#     transforms.Normalize(
#         mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
#         std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
#     ),
# ])

# cifar_test = transforms.Compose([
#     transforms.ToTensor(),
#     transforms.Normalize(
#         mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
#         std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
#     ),
# ])

# train_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=True, download=True, transform=cifar_train)
# test_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=False, download=True, transform=cifar_test)

In [104]:
cifar_train = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4865, 0.4409],
        std=[0.2009, 0.1984, 0.2023],
    ),
])

cifar_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4865, 0.4409],
        std=[0.2009, 0.1984, 0.2023],
    ),
])

train_dataset = datasets.CIFAR100(root="../../../../../_Datasets/cifar100/", train=True, download=True, transform=cifar_train)
test_dataset = datasets.CIFAR100(root="../../../../../_Datasets/cifar100/", train=False, download=True, transform=cifar_test)

Files already downloaded and verified
Files already downloaded and verified


In [105]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False, num_workers=2)

In [106]:
xx, yy = iter(train_loader).next()

In [107]:
xx.shape

torch.Size([128, 3, 32, 32])

### Model

In [108]:
actf = irf.Swish
flows = [
#     ActNorm2D(3),
    nn.BatchNorm2d(3),
    irf.ConvResidualFlow(3, [32, 32], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
    nn.BatchNorm2d(192),
    irf.Flatten(img_size=(192, 4, 4)),
#     ActNorm(3072),
#     nn.BatchNorm1d(3072),
#     nn.Linear(3072, 3072, bias=False),
    nn.BatchNorm1d(3072),
        ]

# backbone = SequentialFlow(flows)
backbone = nn.Sequential(*flows)

In [109]:
backbone.to(device)

Sequential(
  (0): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): ConvResidualFlow(
    (resblock): ModuleList(
      (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): Swish()
      (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (3): Swish()
      (4): Conv2d(32, 3, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    )
  )
  (2): InvertiblePooling()
  (3): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ConvResidualFlow(
    (resblock): ModuleList(
      (0): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): Swish()
      (2): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (3): Swish()
      (4): Conv2d(64, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    )
  )
  (5): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ConvResidualFlow(
    (resblock)

In [110]:
backbone(xx.to(device)).shape, 32*32*3

(torch.Size([128, 3072]), 3072)

In [111]:
print("number of params: ", sum(p.numel() for p in backbone.parameters()))

number of params:  9947519


In [112]:
def get_children(module):
    child = list(module.children())
    if len(child) == 0:
        return [module]
    children = []
    for ch in child:
        grand_ch = get_children(ch)
        children+=grand_ch
    return children

def remove_spectral_norm(model):
    for child in get_children(model):
        if hasattr(child, 'weight'):
            print("Yes", child)
            try:
                nn.utils.remove_spectral_norm(child)
                print("Success")
            except:
                print("Failed")
    return

In [113]:
# remove_spectral_norm(backbone)

In [114]:
for xx, yy in train_loader:
    tt = backbone(xx.to(device))
    print(xx.shape, tt.shape)
    break

torch.Size([128, 3, 32, 32]) torch.Size([128, 3072])


In [137]:
class ConnectedClassifier_Linear(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.linear = nn.Linear(input_dim, num_sets)
#         self.linear.bias.data *= 0
#         self.linear.weight.data *= 0.1
#         self.cls_weight = nn.Parameter(torch.randn(num_sets, output_dim)/output_dim)

        init_val = torch.randn(num_sets, output_dim)
        for ns in range(num_sets):
            init_val[ns, ns%output_dim] = 5
        self.cls_weight = nn.Parameter(init_val)
        
        self.cls_confidence = None
        
        
    def forward(self, x, hard=False):
#         self.cls_weight.data = torch.abs(self.cls_weight.data/self.cls_weight.data.sum(dim=1, keepdim=True))
        
        x = self.linear(x)*torch.exp(self.inv_temp)
        if hard:
            x = torch.softmax(x*1e5, dim=1)
        else:
            x = torch.softmax(x, dim=1)
#             x = torch.softmax(x*self.inv_temp, dim=1)
        self.cls_confidence = x
#         c = torch.softmax(self.cls_weight, dim=1)
        c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized

In [138]:
class ConnectedClassifier_SoftKMeans(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.centers = nn.Parameter(torch.rand(num_sets, input_dim)*2-1)
        
#         self.cls_weight = nn.Parameter(torch.ones(num_sets, output_dim)/output_dim)

        init_val = torch.randn(num_sets, output_dim)
        for ns in range(num_sets):
            init_val[ns, ns%output_dim] = 5
        self.cls_weight = nn.Parameter(init_val)

        self.cls_confidence = None
        
        
    def forward(self, x, hard=False):
#         self.cls_weight.data = torch.abs(self.cls_weight.data/self.cls_weight.data.sum(dim=1, keepdim=True))
        
        dists = torch.cdist(x, self.centers)
        ### correction to make diagonal of unit square 1 in nD space
        dists = dists/np.sqrt(self.input_dim)
        dists = dists*torch.exp(self.inv_temp)
#         dists = dists/self.input_dim
#         dists = dists/dists.norm(dim=1, keepdim=True)
        if hard:
            x = torch.softmax(-dists*1e5, dim=1)
        else:
            x = torch.softmax(-dists, dim=1)
#             x = torch.softmax(-dists*self.inv_temp, dim=1)
        self.cls_confidence = x
#         c = torch.softmax(self.cls_weight, dim=1)
        c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized
#         return torch.softmax(x@self.cls_weight, dim=1)

    def set_centroid_to_data_randomly(self, data_loader, model):
        num_centers = self.centers.shape[0]
        xxs, yys = [], []
        count = 0
        for xx, yy in data_loader:
            yout = model(xx.to(device)).data.cpu()
            xxs.append(yout)
            yys.append(yy)
            count += len(xx)
            if count >= num_centers:
                break
        
        yout = torch.cat(xxs, dim=0)
        yy = torch.cat(yys, dim=0)
        
        yout = yout[:num_centers].to(self.centers.device)
        yy = yy[:num_centers].to(self.centers.device)
        
        self.centers.data = yout
        
        init_val = torch.randn(self.num_sets, self.output_dim)#/self.output_dim
        for ns in range(num_centers):
            init_val[ns, yy[ns]] = 5.
        self.cls_weight.data = init_val.to(self.cls_weight.device)
        pass

In [139]:
#### for cifar 10
# classifier = ConnectedClassifier_SoftKMeans(3072, 100, 10)
# classifier = ConnectedClassifier_Linear(3072, 100, 10)

#### for cifar 100
# classifier = ConnectedClassifier_SoftKMeans(3072, 500, 100, inv_temp=0.8)
classifier = ConnectedClassifier_Linear(3072, 500, 100, inv_temp=0)
# classifier = ConnectedClassifier_Linear(3072, 500, 100, )

#### for MLP based classification
# classifier = nn.Sequential(nn.Linear(3072, 500), nn.SELU(), nn.Linear(500, 100))

In [140]:
classifier = classifier.to(device)

In [141]:
print("number of params: ", sum(p.numel() for p in backbone.parameters()))
print("number of params: ", sum(p.numel() for p in classifier.parameters()))

number of params:  9947519
number of params:  1586501


In [142]:
### debug linear classifier
yout = classifier(torch.randn(10, 3072).to(device))

In [143]:
i = 0
yout[i].sort()

torch.return_types.sort(
values=tensor([-9.0281e-02, -6.6435e-02, -5.2912e-02, -4.1353e-02, -3.7196e-02,
        -3.5898e-02, -3.5250e-02, -3.3013e-02, -2.4511e-02, -2.1625e-02,
        -2.0458e-02, -1.9775e-02, -1.4160e-02, -1.2927e-02, -1.0691e-02,
        -8.9936e-03, -5.7959e-03, -5.2504e-03, -6.0032e-04,  4.7937e-05,
         1.0715e-03,  2.5291e-03,  3.1104e-03,  4.5176e-03,  4.7441e-03,
         5.8861e-03,  8.9973e-03,  1.2339e-02,  1.4333e-02,  1.5184e-02,
         1.5313e-02,  1.6670e-02,  2.0348e-02,  2.1683e-02,  2.2022e-02,
         2.4291e-02,  2.7500e-02,  2.8481e-02,  3.0305e-02,  3.0870e-02,
         3.1302e-02,  3.2249e-02,  3.5992e-02,  3.6122e-02,  3.7241e-02,
         3.8588e-02,  4.1107e-02,  4.1294e-02,  4.3851e-02,  4.4012e-02,
         4.4367e-02,  4.5078e-02,  4.8360e-02,  5.2189e-02,  5.3951e-02,
         5.6542e-02,  5.8199e-02,  5.9146e-02,  6.0921e-02,  6.2457e-02,
         6.2961e-02,  6.3202e-02,  6.4325e-02,  6.5087e-02,  6.6035e-02,
         6.6265e-02

In [144]:
classifier.cls_confidence[i].sort()

torch.return_types.sort(
values=tensor([0.0003, 0.0003, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004, 0.0005,
        0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0006, 0.0006,
        0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0006, 0.0007,
        0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007,
        0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0008, 0.0008, 0.0008,
        0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008, 0.0008,
        0.0008, 0.0008, 0.0008, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009,
        0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009,
        0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009,
        0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010,
        0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010,
        0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0010, 0.0011,
        

In [145]:
# classifier.set_centroid_to_data_randomly(train_loader, backbone)

In [146]:
model = nn.Sequential(backbone, classifier).to(device)

In [147]:
print("number of params: ", sum(p.numel() for p in model.parameters()))

number of params:  11534020


## Training

In [148]:
 ## debugging to find the good classifier/output distribution.
# model_name = 'c100_inv_v1'## using linear+500 units
# model_name = 'c100_inv_v2' ## using dists+500 units
# model_name = 'c100_inv_v3' ## using dists+3072 units
# model_name = 'c100_inv_v4' ## using linear+3072+unnormalized output units
# model_name = 'c100_inv_v5' ## using dists+500+unnormalized output units
model_name = 'c100_inv_v6' ## using linear+500+unnormalized output units

In [149]:
# model_name = 'c10_inv_v0'
# model_name = 'c10_ord_v0'
# model_name = 'c100_inv_v0'
# model_name = 'c100_ord_v0'

In [150]:
EPOCHS = 200
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
#                       momentum=0.9, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

In [151]:
## Following is copied from 
### https://github.com/kuangliu/pytorch-cifar/blob/master/main.py

# Training
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    print(f"[Train] {epoch} Loss: {train_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
    return

In [152]:
best_acc = -1
def test(epoch):
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
    print(f"[Test] {epoch} Loss: {test_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
    
    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'model': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('models'):
            os.mkdir('models')
        torch.save(state, f'./models/{model_name}.pth')
        best_acc = acc

In [153]:
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
resume = False

if resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('./models'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load(f'./models/{model_name}.pth')
    model.load_state_dict(checkpoint['model'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

In [158]:
### Train the whole damn thing

for epoch in range(start_epoch, start_epoch+EPOCHS): ## for 200 epochs
    train(epoch)
    test(epoch)
    scheduler.step()

100%|██████████| 391/391 [00:42<00:00,  9.28it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 0 Loss: 3.181 | Acc: 41.898 20949/50000


100%|██████████| 79/79 [00:02<00:00, 26.70it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 0 Loss: 3.011 | Acc: 44.100 4410/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.25it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 1 Loss: 2.644 | Acc: 54.146 27073/50000


100%|██████████| 79/79 [00:02<00:00, 26.81it/s]


[Test] 1 Loss: 2.853 | Acc: 47.470 4747/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.25it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 2 Loss: 2.465 | Acc: 57.926 28963/50000


100%|██████████| 79/79 [00:02<00:00, 26.57it/s]


[Test] 2 Loss: 2.793 | Acc: 49.130 4913/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.23it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 3 Loss: 2.344 | Acc: 60.440 30220/50000


100%|██████████| 79/79 [00:02<00:00, 26.50it/s]


[Test] 3 Loss: 2.759 | Acc: 49.510 4951/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 4 Loss: 2.263 | Acc: 61.686 30843/50000


100%|██████████| 79/79 [00:02<00:00, 26.36it/s]


[Test] 4 Loss: 2.711 | Acc: 50.460 5046/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 5 Loss: 2.193 | Acc: 63.240 31620/50000


100%|██████████| 79/79 [00:02<00:00, 26.58it/s]


[Test] 5 Loss: 2.689 | Acc: 51.040 5104/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 6 Loss: 2.124 | Acc: 64.348 32174/50000


100%|██████████| 79/79 [00:02<00:00, 26.53it/s]


[Test] 6 Loss: 2.670 | Acc: 51.400 5140/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.17it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 7 Loss: 2.067 | Acc: 65.324 32662/50000


100%|██████████| 79/79 [00:02<00:00, 26.40it/s]


[Test] 7 Loss: 2.657 | Acc: 51.440 5144/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 8 Loss: 2.019 | Acc: 66.080 33040/50000


100%|██████████| 79/79 [00:02<00:00, 26.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 8 Loss: 2.636 | Acc: 51.420 5142/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 9 Loss: 1.972 | Acc: 66.976 33488/50000


100%|██████████| 79/79 [00:02<00:00, 26.39it/s]


[Test] 9 Loss: 2.620 | Acc: 51.920 5192/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 10 Loss: 1.927 | Acc: 67.774 33887/50000


100%|██████████| 79/79 [00:02<00:00, 26.39it/s]


[Test] 10 Loss: 2.621 | Acc: 52.150 5215/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 11 Loss: 1.878 | Acc: 68.596 34298/50000


100%|██████████| 79/79 [00:02<00:00, 26.38it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 11 Loss: 2.618 | Acc: 52.110 5211/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 12 Loss: 1.840 | Acc: 69.314 34657/50000


100%|██████████| 79/79 [00:02<00:00, 26.45it/s]


[Test] 12 Loss: 2.595 | Acc: 52.670 5267/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 13 Loss: 1.809 | Acc: 69.844 34922/50000


100%|██████████| 79/79 [00:02<00:00, 26.56it/s]


[Test] 13 Loss: 2.566 | Acc: 53.180 5318/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 14 Loss: 1.767 | Acc: 70.560 35280/50000


100%|██████████| 79/79 [00:02<00:00, 26.62it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 14 Loss: 2.566 | Acc: 52.940 5294/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 15 Loss: 1.732 | Acc: 71.126 35563/50000


100%|██████████| 79/79 [00:02<00:00, 26.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 15 Loss: 2.560 | Acc: 52.870 5287/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 16 Loss: 1.698 | Acc: 71.758 35879/50000


100%|██████████| 79/79 [00:02<00:00, 26.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 16 Loss: 2.557 | Acc: 53.010 5301/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 17 Loss: 1.673 | Acc: 72.170 36085/50000


100%|██████████| 79/79 [00:02<00:00, 26.45it/s]


[Test] 17 Loss: 2.542 | Acc: 53.290 5329/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 18 Loss: 1.629 | Acc: 73.042 36521/50000


100%|██████████| 79/79 [00:02<00:00, 26.43it/s]


[Test] 18 Loss: 2.520 | Acc: 53.710 5371/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 19 Loss: 1.605 | Acc: 73.288 36644/50000


100%|██████████| 79/79 [00:03<00:00, 26.14it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 19 Loss: 2.545 | Acc: 53.300 5330/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 20 Loss: 1.571 | Acc: 73.886 36943/50000


100%|██████████| 79/79 [00:03<00:00, 26.24it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 20 Loss: 2.515 | Acc: 53.470 5347/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 21 Loss: 1.550 | Acc: 74.316 37158/50000


100%|██████████| 79/79 [00:02<00:00, 26.51it/s]


[Test] 21 Loss: 2.500 | Acc: 53.850 5385/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 22 Loss: 1.509 | Acc: 74.954 37477/50000


100%|██████████| 79/79 [00:02<00:00, 26.35it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 22 Loss: 2.522 | Acc: 53.220 5322/10000


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 23 Loss: 1.483 | Acc: 75.428 37714/50000


100%|██████████| 79/79 [00:02<00:00, 26.52it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 23 Loss: 2.522 | Acc: 53.740 5374/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 24 Loss: 1.469 | Acc: 75.606 37803/50000


100%|██████████| 79/79 [00:02<00:00, 26.58it/s]


[Test] 24 Loss: 2.489 | Acc: 54.140 5414/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 25 Loss: 1.439 | Acc: 76.062 38031/50000


100%|██████████| 79/79 [00:02<00:00, 26.69it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 25 Loss: 2.509 | Acc: 53.600 5360/10000


100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 26 Loss: 1.416 | Acc: 76.432 38216/50000


100%|██████████| 79/79 [00:02<00:00, 26.69it/s]


[Test] 26 Loss: 2.469 | Acc: 54.370 5437/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.20it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 27 Loss: 1.388 | Acc: 76.986 38493/50000


100%|██████████| 79/79 [00:03<00:00, 26.12it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 27 Loss: 2.488 | Acc: 54.370 5437/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 28 Loss: 1.355 | Acc: 77.620 38810/50000


100%|██████████| 79/79 [00:02<00:00, 26.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 28 Loss: 2.505 | Acc: 53.840 5384/10000


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 29 Loss: 1.336 | Acc: 77.784 38892/50000


100%|██████████| 79/79 [00:02<00:00, 26.46it/s]


[Test] 29 Loss: 2.471 | Acc: 54.500 5450/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 30 Loss: 1.320 | Acc: 77.982 38991/50000


100%|██████████| 79/79 [00:02<00:00, 26.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 30 Loss: 2.475 | Acc: 54.330 5433/10000


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 31 Loss: 1.290 | Acc: 78.536 39268/50000


100%|██████████| 79/79 [00:02<00:00, 26.36it/s]


[Test] 31 Loss: 2.454 | Acc: 54.830 5483/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 32 Loss: 1.274 | Acc: 78.894 39447/50000


100%|██████████| 79/79 [00:02<00:00, 26.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 32 Loss: 2.449 | Acc: 54.670 5467/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 33 Loss: 1.243 | Acc: 79.426 39713/50000


100%|██████████| 79/79 [00:02<00:00, 26.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 33 Loss: 2.470 | Acc: 54.130 5413/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 34 Loss: 1.232 | Acc: 79.452 39726/50000


100%|██████████| 79/79 [00:02<00:00, 26.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 34 Loss: 2.471 | Acc: 54.180 5418/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 35 Loss: 1.199 | Acc: 80.120 40060/50000


100%|██████████| 79/79 [00:02<00:00, 26.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 35 Loss: 2.450 | Acc: 54.420 5442/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 36 Loss: 1.196 | Acc: 80.068 40034/50000


100%|██████████| 79/79 [00:02<00:00, 26.34it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 36 Loss: 2.447 | Acc: 54.540 5454/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 37 Loss: 1.178 | Acc: 80.276 40138/50000


100%|██████████| 79/79 [00:02<00:00, 26.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 37 Loss: 2.454 | Acc: 54.260 5426/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 38 Loss: 1.155 | Acc: 80.678 40339/50000


100%|██████████| 79/79 [00:02<00:00, 26.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 38 Loss: 2.446 | Acc: 54.310 5431/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 39 Loss: 1.130 | Acc: 81.144 40572/50000


100%|██████████| 79/79 [00:03<00:00, 26.22it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 39 Loss: 2.462 | Acc: 54.290 5429/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 40 Loss: 1.107 | Acc: 81.626 40813/50000


100%|██████████| 79/79 [00:02<00:00, 26.61it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 40 Loss: 2.467 | Acc: 53.960 5396/10000


100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 41 Loss: 1.092 | Acc: 81.906 40953/50000


100%|██████████| 79/79 [00:02<00:00, 26.60it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 41 Loss: 2.444 | Acc: 54.340 5434/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 42 Loss: 1.083 | Acc: 81.908 40954/50000


100%|██████████| 79/79 [00:02<00:00, 26.64it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 42 Loss: 2.460 | Acc: 53.990 5399/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 43 Loss: 1.062 | Acc: 82.292 41146/50000


100%|██████████| 79/79 [00:03<00:00, 26.32it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 43 Loss: 2.449 | Acc: 54.390 5439/10000


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 44 Loss: 1.039 | Acc: 82.716 41358/50000


100%|██████████| 79/79 [00:02<00:00, 26.66it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 44 Loss: 2.440 | Acc: 54.680 5468/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 45 Loss: 1.033 | Acc: 82.760 41380/50000


100%|██████████| 79/79 [00:02<00:00, 26.68it/s]


[Test] 45 Loss: 2.417 | Acc: 55.000 5500/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.17it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 46 Loss: 1.013 | Acc: 83.132 41566/50000


100%|██████████| 79/79 [00:02<00:00, 26.38it/s]


[Test] 46 Loss: 2.416 | Acc: 55.030 5503/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 47 Loss: 1.002 | Acc: 83.242 41621/50000


100%|██████████| 79/79 [00:03<00:00, 26.31it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 47 Loss: 2.432 | Acc: 54.580 5458/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 48 Loss: 0.986 | Acc: 83.450 41725/50000


100%|██████████| 79/79 [00:02<00:00, 26.54it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 48 Loss: 2.422 | Acc: 54.700 5470/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 49 Loss: 0.964 | Acc: 83.908 41954/50000


100%|██████████| 79/79 [00:02<00:00, 26.41it/s]


[Test] 49 Loss: 2.410 | Acc: 55.160 5516/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 50 Loss: 0.956 | Acc: 83.966 41983/50000


100%|██████████| 79/79 [00:02<00:00, 26.39it/s]


[Test] 50 Loss: 2.396 | Acc: 55.370 5537/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 51 Loss: 0.945 | Acc: 84.078 42039/50000


100%|██████████| 79/79 [00:02<00:00, 26.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 51 Loss: 2.423 | Acc: 54.910 5491/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 52 Loss: 0.926 | Acc: 84.510 42255/50000


100%|██████████| 79/79 [00:02<00:00, 26.35it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 52 Loss: 2.413 | Acc: 55.230 5523/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 53 Loss: 0.912 | Acc: 84.718 42359/50000


100%|██████████| 79/79 [00:02<00:00, 26.37it/s]


[Test] 53 Loss: 2.391 | Acc: 55.450 5545/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 54 Loss: 0.902 | Acc: 84.872 42436/50000


100%|██████████| 79/79 [00:02<00:00, 26.34it/s]


[Test] 54 Loss: 2.385 | Acc: 55.610 5561/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 55 Loss: 0.883 | Acc: 85.262 42631/50000


100%|██████████| 79/79 [00:03<00:00, 26.17it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 55 Loss: 2.401 | Acc: 55.200 5520/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 56 Loss: 0.878 | Acc: 85.288 42644/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 56 Loss: 2.378 | Acc: 55.500 5550/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 57 Loss: 0.864 | Acc: 85.474 42737/50000


100%|██████████| 79/79 [00:02<00:00, 26.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 57 Loss: 2.386 | Acc: 55.610 5561/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 58 Loss: 0.847 | Acc: 85.722 42861/50000


100%|██████████| 79/79 [00:03<00:00, 26.29it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 58 Loss: 2.405 | Acc: 55.340 5534/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 59 Loss: 0.837 | Acc: 85.992 42996/50000


100%|██████████| 79/79 [00:03<00:00, 26.28it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 59 Loss: 2.384 | Acc: 55.480 5548/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 60 Loss: 0.822 | Acc: 86.210 43105/50000


100%|██████████| 79/79 [00:03<00:00, 26.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 60 Loss: 2.382 | Acc: 55.300 5530/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 61 Loss: 0.812 | Acc: 86.274 43137/50000


100%|██████████| 79/79 [00:03<00:00, 25.80it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 61 Loss: 2.398 | Acc: 55.540 5554/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 62 Loss: 0.809 | Acc: 86.282 43141/50000


100%|██████████| 79/79 [00:03<00:00, 26.25it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 62 Loss: 2.401 | Acc: 55.280 5528/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 63 Loss: 0.792 | Acc: 86.702 43351/50000


100%|██████████| 79/79 [00:03<00:00, 26.29it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 63 Loss: 2.412 | Acc: 55.180 5518/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 64 Loss: 0.781 | Acc: 86.840 43420/50000


100%|██████████| 79/79 [00:02<00:00, 26.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 64 Loss: 2.383 | Acc: 55.540 5554/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 65 Loss: 0.777 | Acc: 86.878 43439/50000


100%|██████████| 79/79 [00:02<00:00, 26.35it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 65 Loss: 2.384 | Acc: 55.260 5526/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 66 Loss: 0.766 | Acc: 87.018 43509/50000


100%|██████████| 79/79 [00:03<00:00, 26.18it/s]


[Test] 66 Loss: 2.381 | Acc: 55.660 5566/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 67 Loss: 0.752 | Acc: 87.324 43662/50000


100%|██████████| 79/79 [00:03<00:00, 26.28it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 67 Loss: 2.373 | Acc: 55.460 5546/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 68 Loss: 0.738 | Acc: 87.458 43729/50000


100%|██████████| 79/79 [00:03<00:00, 26.33it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 68 Loss: 2.393 | Acc: 55.060 5506/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 69 Loss: 0.732 | Acc: 87.646 43823/50000


100%|██████████| 79/79 [00:03<00:00, 26.07it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 69 Loss: 2.408 | Acc: 55.160 5516/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 70 Loss: 0.722 | Acc: 87.720 43860/50000


100%|██████████| 79/79 [00:03<00:00, 26.28it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 70 Loss: 2.391 | Acc: 55.260 5526/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 71 Loss: 0.710 | Acc: 88.012 44006/50000


100%|██████████| 79/79 [00:03<00:00, 26.19it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 71 Loss: 2.377 | Acc: 55.530 5553/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 72 Loss: 0.700 | Acc: 88.106 44053/50000


100%|██████████| 79/79 [00:02<00:00, 26.49it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 72 Loss: 2.386 | Acc: 55.210 5521/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 73 Loss: 0.696 | Acc: 88.222 44111/50000


100%|██████████| 79/79 [00:03<00:00, 26.23it/s]


[Test] 73 Loss: 2.363 | Acc: 56.250 5625/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 74 Loss: 0.691 | Acc: 88.250 44125/50000


100%|██████████| 79/79 [00:02<00:00, 26.34it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 74 Loss: 2.393 | Acc: 55.530 5553/10000


100%|██████████| 391/391 [00:42<00:00,  9.15it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 75 Loss: 0.673 | Acc: 88.624 44312/50000


100%|██████████| 79/79 [00:02<00:00, 26.68it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 75 Loss: 2.372 | Acc: 55.650 5565/10000


100%|██████████| 391/391 [00:42<00:00,  9.17it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 76 Loss: 0.666 | Acc: 88.822 44411/50000


100%|██████████| 79/79 [00:03<00:00, 26.24it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 76 Loss: 2.382 | Acc: 55.670 5567/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 77 Loss: 0.656 | Acc: 88.942 44471/50000


100%|██████████| 79/79 [00:02<00:00, 26.63it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 77 Loss: 2.387 | Acc: 55.500 5550/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 78 Loss: 0.655 | Acc: 88.910 44455/50000


100%|██████████| 79/79 [00:02<00:00, 26.69it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 78 Loss: 2.369 | Acc: 55.720 5572/10000


100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 79 Loss: 0.647 | Acc: 89.032 44516/50000


100%|██████████| 79/79 [00:02<00:00, 26.68it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 79 Loss: 2.377 | Acc: 55.720 5572/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 80 Loss: 0.641 | Acc: 89.156 44578/50000


100%|██████████| 79/79 [00:02<00:00, 26.62it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 80 Loss: 2.366 | Acc: 55.940 5594/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 81 Loss: 0.628 | Acc: 89.378 44689/50000


100%|██████████| 79/79 [00:02<00:00, 26.61it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 81 Loss: 2.368 | Acc: 56.080 5608/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 82 Loss: 0.620 | Acc: 89.472 44736/50000


100%|██████████| 79/79 [00:02<00:00, 26.65it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 82 Loss: 2.393 | Acc: 55.460 5546/10000


100%|██████████| 391/391 [00:42<00:00,  9.20it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 83 Loss: 0.610 | Acc: 89.624 44812/50000


100%|██████████| 79/79 [00:02<00:00, 26.60it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 83 Loss: 2.370 | Acc: 55.350 5535/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 84 Loss: 0.606 | Acc: 89.682 44841/50000


100%|██████████| 79/79 [00:02<00:00, 26.60it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 84 Loss: 2.359 | Acc: 55.620 5562/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 85 Loss: 0.589 | Acc: 90.048 45024/50000


100%|██████████| 79/79 [00:02<00:00, 26.63it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 85 Loss: 2.372 | Acc: 55.790 5579/10000


100%|██████████| 391/391 [00:42<00:00,  9.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 86 Loss: 0.588 | Acc: 89.984 44992/50000


100%|██████████| 79/79 [00:02<00:00, 26.68it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 86 Loss: 2.372 | Acc: 55.820 5582/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 87 Loss: 0.586 | Acc: 90.064 45032/50000


100%|██████████| 79/79 [00:02<00:00, 26.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 87 Loss: 2.364 | Acc: 55.870 5587/10000


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 88 Loss: 0.580 | Acc: 90.150 45075/50000


100%|██████████| 79/79 [00:02<00:00, 26.52it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 88 Loss: 2.375 | Acc: 55.780 5578/10000


100%|██████████| 391/391 [00:42<00:00,  9.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 89 Loss: 0.569 | Acc: 90.308 45154/50000


100%|██████████| 79/79 [00:02<00:00, 26.67it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 89 Loss: 2.383 | Acc: 55.630 5563/10000


100%|██████████| 391/391 [00:42<00:00,  9.19it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 90 Loss: 0.564 | Acc: 90.386 45193/50000


100%|██████████| 79/79 [00:02<00:00, 26.56it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 90 Loss: 2.381 | Acc: 55.670 5567/10000


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 91 Loss: 0.560 | Acc: 90.450 45225/50000


100%|██████████| 79/79 [00:03<00:00, 26.27it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 91 Loss: 2.391 | Acc: 55.410 5541/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 92 Loss: 0.550 | Acc: 90.628 45314/50000


100%|██████████| 79/79 [00:02<00:00, 26.35it/s]


[Test] 92 Loss: 2.362 | Acc: 56.330 5633/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 93 Loss: 0.547 | Acc: 90.670 45335/50000


100%|██████████| 79/79 [00:03<00:00, 26.27it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 93 Loss: 2.372 | Acc: 55.840 5584/10000


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 94 Loss: 0.544 | Acc: 90.720 45360/50000


100%|██████████| 79/79 [00:02<00:00, 26.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 94 Loss: 2.366 | Acc: 55.890 5589/10000


100%|██████████| 391/391 [00:42<00:00,  9.14it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 95 Loss: 0.533 | Acc: 90.920 45460/50000


100%|██████████| 79/79 [00:02<00:00, 26.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 95 Loss: 2.380 | Acc: 55.470 5547/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 96 Loss: 0.526 | Acc: 90.966 45483/50000


100%|██████████| 79/79 [00:03<00:00, 26.30it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 96 Loss: 2.380 | Acc: 55.790 5579/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 97 Loss: 0.522 | Acc: 91.052 45526/50000


100%|██████████| 79/79 [00:02<00:00, 26.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 97 Loss: 2.378 | Acc: 55.820 5582/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 98 Loss: 0.513 | Acc: 91.276 45638/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 98 Loss: 2.362 | Acc: 55.890 5589/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 99 Loss: 0.514 | Acc: 91.222 45611/50000


100%|██████████| 79/79 [00:03<00:00, 26.20it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 99 Loss: 2.358 | Acc: 56.210 5621/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 100 Loss: 0.507 | Acc: 91.338 45669/50000


100%|██████████| 79/79 [00:02<00:00, 26.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 100 Loss: 2.366 | Acc: 55.890 5589/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 101 Loss: 0.503 | Acc: 91.346 45673/50000


100%|██████████| 79/79 [00:03<00:00, 26.25it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 101 Loss: 2.379 | Acc: 55.790 5579/10000


100%|██████████| 391/391 [00:42<00:00,  9.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 102 Loss: 0.493 | Acc: 91.544 45772/50000


100%|██████████| 79/79 [00:03<00:00, 26.04it/s]


[Test] 102 Loss: 2.347 | Acc: 56.360 5636/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.13it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 103 Loss: 0.487 | Acc: 91.670 45835/50000


100%|██████████| 79/79 [00:03<00:00, 26.17it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 103 Loss: 2.348 | Acc: 56.060 5606/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 104 Loss: 0.490 | Acc: 91.558 45779/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 104 Loss: 2.350 | Acc: 56.110 5611/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 105 Loss: 0.481 | Acc: 91.770 45885/50000


100%|██████████| 79/79 [00:03<00:00, 26.12it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 105 Loss: 2.364 | Acc: 55.690 5569/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 106 Loss: 0.474 | Acc: 91.820 45910/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 106 Loss: 2.357 | Acc: 56.200 5620/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 107 Loss: 0.474 | Acc: 91.842 45921/50000


100%|██████████| 79/79 [00:03<00:00, 26.31it/s]


[Test] 107 Loss: 2.354 | Acc: 56.390 5639/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 108 Loss: 0.472 | Acc: 91.862 45931/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 108 Loss: 2.355 | Acc: 56.130 5613/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 109 Loss: 0.465 | Acc: 91.964 45982/50000


100%|██████████| 79/79 [00:03<00:00, 26.19it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 109 Loss: 2.353 | Acc: 56.230 5623/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 110 Loss: 0.461 | Acc: 91.974 45987/50000


100%|██████████| 79/79 [00:02<00:00, 26.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 110 Loss: 2.372 | Acc: 56.000 5600/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 111 Loss: 0.453 | Acc: 92.170 46085/50000


100%|██████████| 79/79 [00:03<00:00, 26.25it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 111 Loss: 2.356 | Acc: 56.220 5622/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 112 Loss: 0.452 | Acc: 92.162 46081/50000


100%|██████████| 79/79 [00:03<00:00, 26.33it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 112 Loss: 2.362 | Acc: 56.050 5605/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 113 Loss: 0.447 | Acc: 92.272 46136/50000


100%|██████████| 79/79 [00:03<00:00, 26.12it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 113 Loss: 2.369 | Acc: 56.180 5618/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 114 Loss: 0.440 | Acc: 92.350 46175/50000


100%|██████████| 79/79 [00:03<00:00, 26.12it/s]


[Test] 114 Loss: 2.363 | Acc: 56.500 5650/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 115 Loss: 0.440 | Acc: 92.392 46196/50000


100%|██████████| 79/79 [00:03<00:00, 26.08it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 115 Loss: 2.354 | Acc: 56.280 5628/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 116 Loss: 0.435 | Acc: 92.420 46210/50000


100%|██████████| 79/79 [00:03<00:00, 26.21it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 116 Loss: 2.358 | Acc: 56.450 5645/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 117 Loss: 0.433 | Acc: 92.524 46262/50000


100%|██████████| 79/79 [00:03<00:00, 26.09it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 117 Loss: 2.355 | Acc: 55.840 5584/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 118 Loss: 0.426 | Acc: 92.606 46303/50000


100%|██████████| 79/79 [00:03<00:00, 25.94it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 118 Loss: 2.354 | Acc: 56.320 5632/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 119 Loss: 0.424 | Acc: 92.638 46319/50000


100%|██████████| 79/79 [00:03<00:00, 26.20it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 119 Loss: 2.349 | Acc: 56.500 5650/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 120 Loss: 0.422 | Acc: 92.686 46343/50000


100%|██████████| 79/79 [00:03<00:00, 25.88it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 120 Loss: 2.344 | Acc: 56.400 5640/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 121 Loss: 0.417 | Acc: 92.762 46381/50000


100%|██████████| 79/79 [00:03<00:00, 25.98it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 121 Loss: 2.347 | Acc: 56.330 5633/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 122 Loss: 0.415 | Acc: 92.726 46363/50000


100%|██████████| 79/79 [00:03<00:00, 26.22it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 122 Loss: 2.362 | Acc: 56.340 5634/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 123 Loss: 0.413 | Acc: 92.776 46388/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 123 Loss: 2.362 | Acc: 56.350 5635/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 124 Loss: 0.407 | Acc: 92.886 46443/50000


100%|██████████| 79/79 [00:03<00:00, 25.90it/s]


[Test] 124 Loss: 2.354 | Acc: 56.550 5655/10000
Saving..


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 125 Loss: 0.403 | Acc: 92.948 46474/50000


100%|██████████| 79/79 [00:03<00:00, 26.07it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 125 Loss: 2.342 | Acc: 56.450 5645/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 126 Loss: 0.400 | Acc: 93.030 46515/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 126 Loss: 2.353 | Acc: 56.400 5640/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 127 Loss: 0.396 | Acc: 93.052 46526/50000


100%|██████████| 79/79 [00:03<00:00, 26.17it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 127 Loss: 2.347 | Acc: 56.540 5654/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 128 Loss: 0.396 | Acc: 93.048 46524/50000


100%|██████████| 79/79 [00:03<00:00, 26.13it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 128 Loss: 2.359 | Acc: 56.370 5637/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 129 Loss: 0.390 | Acc: 93.158 46579/50000


100%|██████████| 79/79 [00:03<00:00, 25.94it/s]


[Test] 129 Loss: 2.338 | Acc: 56.870 5687/10000
Saving..


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 130 Loss: 0.390 | Acc: 93.112 46556/50000


100%|██████████| 79/79 [00:03<00:00, 26.19it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 130 Loss: 2.372 | Acc: 56.150 5615/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 131 Loss: 0.386 | Acc: 93.218 46609/50000


100%|██████████| 79/79 [00:03<00:00, 26.27it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 131 Loss: 2.364 | Acc: 56.380 5638/10000


100%|██████████| 391/391 [00:43<00:00,  9.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 132 Loss: 0.384 | Acc: 93.238 46619/50000


100%|██████████| 79/79 [00:03<00:00, 26.27it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 132 Loss: 2.360 | Acc: 56.280 5628/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 133 Loss: 0.380 | Acc: 93.320 46660/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 133 Loss: 2.353 | Acc: 56.770 5677/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 134 Loss: 0.380 | Acc: 93.316 46658/50000


100%|██████████| 79/79 [00:03<00:00, 26.11it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 134 Loss: 2.361 | Acc: 56.360 5636/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 135 Loss: 0.373 | Acc: 93.444 46722/50000


100%|██████████| 79/79 [00:03<00:00, 25.95it/s]


[Test] 135 Loss: 2.337 | Acc: 56.970 5697/10000
Saving..


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 136 Loss: 0.372 | Acc: 93.442 46721/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 136 Loss: 2.358 | Acc: 56.460 5646/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 137 Loss: 0.371 | Acc: 93.430 46715/50000


100%|██████████| 79/79 [00:03<00:00, 26.01it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 137 Loss: 2.352 | Acc: 56.790 5679/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 138 Loss: 0.366 | Acc: 93.552 46776/50000


100%|██████████| 79/79 [00:03<00:00, 26.09it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 138 Loss: 2.352 | Acc: 56.680 5668/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 139 Loss: 0.366 | Acc: 93.514 46757/50000


100%|██████████| 79/79 [00:02<00:00, 26.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 139 Loss: 2.354 | Acc: 56.600 5660/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 140 Loss: 0.364 | Acc: 93.544 46772/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 140 Loss: 2.348 | Acc: 56.630 5663/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 141 Loss: 0.363 | Acc: 93.590 46795/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 141 Loss: 2.354 | Acc: 56.370 5637/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 142 Loss: 0.360 | Acc: 93.616 46808/50000


100%|██████████| 79/79 [00:03<00:00, 25.88it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 142 Loss: 2.352 | Acc: 56.540 5654/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 143 Loss: 0.356 | Acc: 93.726 46863/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 143 Loss: 2.347 | Acc: 56.580 5658/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 144 Loss: 0.354 | Acc: 93.730 46865/50000


100%|██████████| 79/79 [00:03<00:00, 25.95it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 144 Loss: 2.347 | Acc: 56.740 5674/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 145 Loss: 0.352 | Acc: 93.762 46881/50000


100%|██████████| 79/79 [00:03<00:00, 26.04it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 145 Loss: 2.365 | Acc: 56.380 5638/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 146 Loss: 0.351 | Acc: 93.780 46890/50000


100%|██████████| 79/79 [00:03<00:00, 26.21it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 146 Loss: 2.341 | Acc: 56.790 5679/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 147 Loss: 0.347 | Acc: 93.848 46924/50000


100%|██████████| 79/79 [00:03<00:00, 26.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 147 Loss: 2.352 | Acc: 56.460 5646/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 148 Loss: 0.349 | Acc: 93.778 46889/50000


100%|██████████| 79/79 [00:03<00:00, 26.19it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 148 Loss: 2.350 | Acc: 56.650 5665/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 149 Loss: 0.344 | Acc: 93.840 46920/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 149 Loss: 2.357 | Acc: 56.570 5657/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 150 Loss: 0.342 | Acc: 93.904 46952/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 150 Loss: 2.341 | Acc: 56.830 5683/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 151 Loss: 0.341 | Acc: 93.918 46959/50000


100%|██████████| 79/79 [00:03<00:00, 25.92it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 151 Loss: 2.349 | Acc: 56.780 5678/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 152 Loss: 0.342 | Acc: 93.920 46960/50000


100%|██████████| 79/79 [00:03<00:00, 26.17it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 152 Loss: 2.340 | Acc: 56.890 5689/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 153 Loss: 0.337 | Acc: 94.002 47001/50000


100%|██████████| 79/79 [00:03<00:00, 26.18it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 153 Loss: 2.352 | Acc: 56.790 5679/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 154 Loss: 0.336 | Acc: 94.006 47003/50000


100%|██████████| 79/79 [00:03<00:00, 26.13it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 154 Loss: 2.343 | Acc: 56.850 5685/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 155 Loss: 0.336 | Acc: 93.982 46991/50000


100%|██████████| 79/79 [00:03<00:00, 26.18it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 155 Loss: 2.342 | Acc: 56.900 5690/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 156 Loss: 0.332 | Acc: 94.040 47020/50000


100%|██████████| 79/79 [00:03<00:00, 26.00it/s]


[Test] 156 Loss: 2.334 | Acc: 57.140 5714/10000
Saving..


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 157 Loss: 0.328 | Acc: 94.130 47065/50000


100%|██████████| 79/79 [00:03<00:00, 26.14it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 157 Loss: 2.337 | Acc: 56.930 5693/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 158 Loss: 0.330 | Acc: 94.112 47056/50000


100%|██████████| 79/79 [00:03<00:00, 26.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 158 Loss: 2.338 | Acc: 56.890 5689/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 159 Loss: 0.329 | Acc: 94.108 47054/50000


100%|██████████| 79/79 [00:03<00:00, 26.22it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 159 Loss: 2.342 | Acc: 57.020 5702/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 160 Loss: 0.326 | Acc: 94.158 47079/50000


100%|██████████| 79/79 [00:03<00:00, 26.23it/s]


[Test] 160 Loss: 2.342 | Acc: 57.250 5725/10000
Saving..


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 161 Loss: 0.327 | Acc: 94.158 47079/50000


100%|██████████| 79/79 [00:03<00:00, 26.23it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 161 Loss: 2.336 | Acc: 56.900 5690/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 162 Loss: 0.326 | Acc: 94.118 47059/50000


100%|██████████| 79/79 [00:03<00:00, 26.17it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 162 Loss: 2.342 | Acc: 56.840 5684/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 163 Loss: 0.324 | Acc: 94.170 47085/50000


100%|██████████| 79/79 [00:03<00:00, 26.14it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 163 Loss: 2.338 | Acc: 56.890 5689/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 164 Loss: 0.323 | Acc: 94.182 47091/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 164 Loss: 2.339 | Acc: 56.800 5680/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 165 Loss: 0.321 | Acc: 94.216 47108/50000


100%|██████████| 79/79 [00:03<00:00, 26.07it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 165 Loss: 2.334 | Acc: 56.930 5693/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 166 Loss: 0.321 | Acc: 94.218 47109/50000


100%|██████████| 79/79 [00:03<00:00, 25.95it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 166 Loss: 2.343 | Acc: 56.800 5680/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 167 Loss: 0.320 | Acc: 94.222 47111/50000


100%|██████████| 79/79 [00:03<00:00, 26.08it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 167 Loss: 2.337 | Acc: 57.110 5711/10000


100%|██████████| 391/391 [00:43<00:00,  9.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 168 Loss: 0.319 | Acc: 94.248 47124/50000


100%|██████████| 79/79 [00:03<00:00, 26.23it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 168 Loss: 2.332 | Acc: 56.970 5697/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 169 Loss: 0.318 | Acc: 94.260 47130/50000


100%|██████████| 79/79 [00:03<00:00, 26.22it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 169 Loss: 2.334 | Acc: 57.050 5705/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 170 Loss: 0.316 | Acc: 94.280 47140/50000


100%|██████████| 79/79 [00:03<00:00, 26.09it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 170 Loss: 2.336 | Acc: 56.910 5691/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 171 Loss: 0.315 | Acc: 94.314 47157/50000


100%|██████████| 79/79 [00:03<00:00, 26.06it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 171 Loss: 2.341 | Acc: 57.070 5707/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 172 Loss: 0.315 | Acc: 94.316 47158/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 172 Loss: 2.345 | Acc: 56.670 5667/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 173 Loss: 0.314 | Acc: 94.324 47162/50000


100%|██████████| 79/79 [00:03<00:00, 26.09it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 173 Loss: 2.344 | Acc: 57.060 5706/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 174 Loss: 0.312 | Acc: 94.328 47164/50000


100%|██████████| 79/79 [00:03<00:00, 25.82it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 174 Loss: 2.346 | Acc: 56.980 5698/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 175 Loss: 0.313 | Acc: 94.334 47167/50000


100%|██████████| 79/79 [00:03<00:00, 26.02it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 175 Loss: 2.345 | Acc: 56.660 5666/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 176 Loss: 0.314 | Acc: 94.338 47169/50000


100%|██████████| 79/79 [00:03<00:00, 26.26it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 176 Loss: 2.344 | Acc: 56.720 5672/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 177 Loss: 0.312 | Acc: 94.370 47185/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 177 Loss: 2.343 | Acc: 56.690 5669/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 178 Loss: 0.310 | Acc: 94.372 47186/50000


100%|██████████| 79/79 [00:03<00:00, 26.00it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 178 Loss: 2.342 | Acc: 56.940 5694/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 179 Loss: 0.310 | Acc: 94.398 47199/50000


100%|██████████| 79/79 [00:03<00:00, 26.18it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 179 Loss: 2.344 | Acc: 56.960 5696/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 180 Loss: 0.312 | Acc: 94.348 47174/50000


100%|██████████| 79/79 [00:03<00:00, 26.20it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 180 Loss: 2.345 | Acc: 56.960 5696/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 181 Loss: 0.309 | Acc: 94.408 47204/50000


100%|██████████| 79/79 [00:03<00:00, 26.19it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 181 Loss: 2.335 | Acc: 56.930 5693/10000


100%|██████████| 391/391 [00:43<00:00,  9.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 182 Loss: 0.310 | Acc: 94.366 47183/50000


100%|██████████| 79/79 [00:03<00:00, 26.18it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 182 Loss: 2.342 | Acc: 56.730 5673/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 183 Loss: 0.309 | Acc: 94.378 47189/50000


100%|██████████| 79/79 [00:03<00:00, 26.10it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 183 Loss: 2.335 | Acc: 56.870 5687/10000


100%|██████████| 391/391 [00:42<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 184 Loss: 0.308 | Acc: 94.400 47200/50000


100%|██████████| 79/79 [00:03<00:00, 26.13it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 184 Loss: 2.343 | Acc: 56.770 5677/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 185 Loss: 0.309 | Acc: 94.404 47202/50000


100%|██████████| 79/79 [00:03<00:00, 26.11it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 185 Loss: 2.344 | Acc: 56.930 5693/10000


100%|██████████| 391/391 [00:42<00:00,  9.11it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 186 Loss: 0.308 | Acc: 94.398 47199/50000


100%|██████████| 79/79 [00:03<00:00, 26.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 186 Loss: 2.342 | Acc: 56.720 5672/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 187 Loss: 0.310 | Acc: 94.330 47165/50000


100%|██████████| 79/79 [00:03<00:00, 26.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 187 Loss: 2.344 | Acc: 56.790 5679/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 188 Loss: 0.309 | Acc: 94.390 47195/50000


100%|██████████| 79/79 [00:03<00:00, 26.24it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 188 Loss: 2.343 | Acc: 56.820 5682/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 192 Loss: 0.307 | Acc: 94.398 47199/50000


100%|██████████| 79/79 [00:03<00:00, 26.12it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 192 Loss: 2.339 | Acc: 57.010 5701/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 193 Loss: 0.307 | Acc: 94.400 47200/50000


100%|██████████| 79/79 [00:03<00:00, 26.16it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 193 Loss: 2.339 | Acc: 56.890 5689/10000


100%|██████████| 391/391 [00:43<00:00,  9.09it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 194 Loss: 0.307 | Acc: 94.434 47217/50000


100%|██████████| 79/79 [00:03<00:00, 26.21it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 194 Loss: 2.341 | Acc: 56.920 5692/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 195 Loss: 0.307 | Acc: 94.402 47201/50000


100%|██████████| 79/79 [00:03<00:00, 26.25it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 195 Loss: 2.335 | Acc: 57.020 5702/10000


100%|██████████| 391/391 [00:43<00:00,  9.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 196 Loss: 0.307 | Acc: 94.416 47208/50000


100%|██████████| 79/79 [00:03<00:00, 26.29it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 196 Loss: 2.340 | Acc: 56.930 5693/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 197 Loss: 0.307 | Acc: 94.404 47202/50000


100%|██████████| 79/79 [00:03<00:00, 26.21it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 197 Loss: 2.340 | Acc: 56.750 5675/10000


100%|██████████| 391/391 [00:42<00:00,  9.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 198 Loss: 0.308 | Acc: 94.400 47200/50000


100%|██████████| 79/79 [00:03<00:00, 26.11it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 198 Loss: 2.339 | Acc: 56.920 5692/10000


 19%|█▉        | 74/391 [00:08<00:34,  9.19it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [159]:
best_acc

57.25

In [157]:
torch.count_nonzero(a)

tensor(810)

In [None]:
######## C10

#### non-inv nn with MLP classifier: 85.73 Acc ; using no-spectral init
#### non-inv nn with connected classifier:   Acc ; using no-spectral init
#### inv nn with connected classifier: 84.25 Acc ; spectral normalized

In [33]:
######## C100
#### inv nn with MLP classifier: 59.3 Acc ;
#### non-inv nn with MLP classifier: 54.67 Acc ;

#### inv nn + ConnectedDist: 30.03 Acc; -> v2
#### inv nn + ConnectedLin: 48.79 Acc; -> v1

#### inv nn + ConnectedLin-3072: 46.82 Acc; v3
#### inv nn + ConnectedLin-3072-unnormalized: 55.54 Acc / 54.97 (Hard); v4

#### inv nn + ConnectedLin-500-unnormalized: 57.25 Acc / 56.83 (Hard); v6

In [160]:
classifier.inv_temp

Parameter containing:
tensor([0.3377], device='cuda:1', requires_grad=True)

In [161]:
checkpoint = torch.load(f'./models/{model_name}.pth')
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']

best_acc, start_epoch

(57.25, 160)

In [163]:
model.load_state_dict(checkpoint['model'])

<All keys matched successfully>

In [None]:
backbone, classifier = model[0], model[1]

### Hard test accuracy with count per classifier

In [165]:
test_count = 0
test_acc = 0
set_count = torch.zeros(classifier.num_sets).to(device)
model.eval()
for xx, yy in tqdm(test_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(backbone(xx), hard=True)
        set_indx, count = torch.unique(torch.argmax(classifier.cls_confidence, dim=1), return_counts=True) 
        set_count[set_indx] += count
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Test Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 79/79 [00:02<00:00, 26.92it/s]

Hard Test Acc:56.83%
[98, 0, 1, 20, 77, 0, 3, 2, 0, 1, 48, 12, 79, 0, 0, 94, 14, 0, 4, 10, 0, 21, 1, 98, 94, 13, 72, 56, 72, 15, 0, 0, 0, 1, 2, 7, 86, 0, 0, 0, 4, 0, 1, 0, 3, 67, 4, 42, 1, 0, 1, 82, 0, 107, 0, 31, 0, 0, 90, 102, 1, 1, 6, 0, 10, 43, 4, 0, 105, 2, 86, 102, 21, 0, 0, 0, 1, 0, 25, 80, 1, 1, 2, 8, 61, 0, 0, 0, 3, 101, 1, 8, 82, 49, 1, 0, 0, 6, 1, 66, 6, 2, 1, 1, 20, 1, 0, 0, 66, 77, 3, 2, 1, 83, 0, 3, 2, 0, 8, 0, 2, 0, 57, 8, 0, 24, 3, 0, 4, 56, 0, 3, 5, 66, 81, 0, 8, 92, 102, 16, 5, 1, 10, 0, 50, 0, 0, 48, 110, 0, 4, 5, 0, 3, 0, 26, 0, 6, 4, 12, 0, 13, 90, 54, 0, 1, 48, 3, 0, 0, 0, 0, 28, 3, 14, 0, 108, 1, 2, 0, 82, 9, 0, 1, 1, 6, 4, 0, 0, 12, 98, 45, 2, 3, 0, 38, 0, 63, 3, 1, 0, 1, 1, 9, 5, 87, 0, 0, 0, 1, 29, 3, 0, 9, 8, 1, 75, 0, 0, 0, 11, 0, 2, 0, 6, 3, 0, 1, 20, 0, 80, 3, 56, 6, 18, 8, 0, 3, 0, 1, 4, 84, 0, 0, 4, 9, 0, 7, 1, 85, 86, 7, 95, 0, 2, 1, 97, 4, 1, 0, 94, 0, 0, 14, 3, 72, 2, 0, 0, 94, 3, 0, 41, 10, 73, 102, 18, 0, 9, 2, 20, 1, 0, 93, 25, 0, 5, 42, 74, 1, 1, 




In [None]:
#### Classifiers that enclose any data
torch.count_nonzero(set_count)

### Hard train accuracy with count per classifier

In [34]:
test_count = 0
test_acc = 0
set_count = torch.zeros(classifier.num_sets).to(device)
for xx, yy in tqdm(train_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(backbone(xx), hard=True)
        set_indx, count = torch.unique(torch.argmax(classifier.cls_confidence, dim=1), return_counts=True) 
        set_count[set_indx] += count
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Train Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 391/391 [00:29<00:00, 13.04it/s]

Hard Train Acc:91.31%
[0, 0, 0, 0, 4452, 25, 4597, 0, 4948, 0, 0, 0, 0, 162, 0, 0, 0, 0, 0, 4909, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5013, 4926, 0, 56, 0, 5022, 4125, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 5073, 38, 4, 158, 0, 0, 0, 0, 0, 60, 9, 84, 0, 0, 119, 4967, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 284, 0, 0, 0, 0, 0, 888, 0]





In [35]:
#### Classifiers that enclose any data
torch.count_nonzero(set_count)

tensor(26, device='cuda:1')

In [36]:
#### classifier with class representation
torch.argmax(classifier.cls_weight, dim=1)

tensor([6, 3, 7, 9, 5, 0, 2, 3, 3, 3, 7, 7, 5, 3, 5, 6, 3, 1, 8, 7, 3, 1, 5, 2,
        3, 3, 3, 1, 6, 2, 1, 1, 5, 6, 9, 9, 2, 3, 4, 8, 3, 9, 8, 3, 5, 9, 9, 8,
        3, 2, 2, 1, 8, 8, 8, 0, 6, 7, 2, 3, 5, 7, 0, 9, 1, 0, 6, 6, 5, 4, 1, 1,
        9, 8, 2, 5, 1, 1, 7, 7, 8, 9, 2, 2, 9, 0, 9, 5, 4, 3, 3, 1, 5, 1, 5, 4,
        5, 9, 8, 5], device='cuda:1')