In [70]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import os, sys, pathlib, random, time, pickle, copy
from tqdm import tqdm

In [71]:
device = torch.device("cuda:1")
# device = torch.device("cpu")

In [72]:
import torch.optim as optim
from torch.utils import data

In [73]:
import nflib
from nflib.flows import SequentialFlow, NormalizingFlow, ActNorm, ActNorm2D, AffineConstantFlow
import nflib.coupling_flows as icf
import nflib.inn_flow as inn
import nflib.res_flow as irf

### Datasets

In [74]:
cifar_train = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
        std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
    ),
])

cifar_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
        std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
    ),
])

train_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=True, download=True, transform=cifar_train)
test_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=False, download=True, transform=cifar_test)

Files already downloaded and verified
Files already downloaded and verified


In [75]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=128, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=128, shuffle=False, num_workers=2)

In [76]:
xx, yy = iter(train_loader).next()

In [77]:
xx.shape

torch.Size([128, 3, 32, 32])

### Model

In [78]:
actf = irf.Swish
flows = [
#     ActNorm2D(3),
    nn.BatchNorm2d(3),
    irf.ConvResidualFlow(3, [32, 32], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
    nn.BatchNorm2d(192),
    irf.Flatten(img_size=(192, 4, 4)),
#     ActNorm(3072),
#     nn.BatchNorm1d(3072),
#     nn.Linear(3072, 3072, bias=False),
    nn.BatchNorm1d(3072),
        ]

# backbone = SequentialFlow(flows)
backbone = nn.Sequential(*flows)

In [79]:
def get_children(module):
    child = list(module.children())
    if len(child) == 0:
        return [module]
    children = []
    for ch in child:
        grand_ch = get_children(ch)
        children+=grand_ch
    return children

def remove_spectral_norm(model):
    for child in get_children(model):
        if hasattr(child, 'weight'):
            print("Yes", child)
            try:
                nn.utils.remove_spectral_norm(child)
                print("Success")
            except:
                print("Failed")
    return

In [80]:
remove_spectral_norm(backbone)

Yes BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Failed
Yes Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(32, 3, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Failed
Yes Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(64, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
Failed
Yes Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes Conv2d(64, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
Success
Yes BatchNorm2d(48, eps=1e-0

In [81]:
backbone.to(device)

Sequential(
  (0): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): ConvResidualFlow(
    (resblock): ModuleList(
      (0): Conv2d(3, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): Swish()
      (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (3): Swish()
      (4): Conv2d(32, 3, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    )
  )
  (2): InvertiblePooling()
  (3): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (4): ConvResidualFlow(
    (resblock): ModuleList(
      (0): Conv2d(12, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (1): Swish()
      (2): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (3): Swish()
      (4): Conv2d(64, 12, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    )
  )
  (5): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ConvResidualFlow(
    (resblock)

In [82]:
backbone(xx.to(device)).shape, 32*32*3

(torch.Size([128, 3072]), 3072)

In [83]:
print("number of params: ", sum(p.numel() for p in backbone.parameters()))

number of params:  9947519


In [84]:
for xx, yy in train_loader:
    tt = backbone(xx.to(device))
    print(xx.shape, tt.shape)
    break

torch.Size([128, 3, 32, 32]) torch.Size([128, 3072])


In [85]:
class ConnectedClassifier_Linear(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.linear = nn.Linear(input_dim, num_sets)
#         self.linear.bias.data *= 0
#         self.linear.weight.data *= 0.1
#         self.cls_weight = nn.Parameter(torch.randn(num_sets, output_dim)/output_dim)

        init_val = torch.randn(num_sets, output_dim)*0.01
        for ns in range(num_sets):
            init_val[ns, ns%output_dim] = 0.1
        self.cls_weight = nn.Parameter(init_val)
        
        self.cls_confidence = None
        
        
    def forward(self, x, hard=False):
#         self.cls_weight.data = torch.abs(self.cls_weight.data/self.cls_weight.data.sum(dim=1, keepdim=True))
        
        x = self.linear(x)
        if hard:
            x = torch.softmax(x*1e5, dim=1)
        else:
            x = torch.softmax(x*self.inv_temp, dim=1)
        self.cls_confidence = x
        c = torch.softmax(self.cls_weight, dim=1)
#         c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized

In [86]:
class ConnectedClassifier_SoftKMeans(nn.Module):
    
    def __init__(self,input_dim, num_sets, output_dim, inv_temp=1):
        super().__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_sets = num_sets
        self.inv_temp = nn.Parameter(torch.ones(1)*inv_temp)
        
        self.centers = nn.Parameter(torch.rand(num_sets, input_dim)*2-1)
        
#         self.cls_weight = nn.Parameter(torch.ones(num_sets, output_dim)/output_dim)

        init_val = torch.randn(num_sets, output_dim)*0.01
        for ns in range(num_sets):
            init_val[ns, ns%output_dim] = 0.1
        self.cls_weight = nn.Parameter(init_val)

        self.cls_confidence = None
        
        
    def forward(self, x, hard=False):
#         self.cls_weight.data = torch.abs(self.cls_weight.data/self.cls_weight.data.sum(dim=1, keepdim=True))
        
        dists = torch.cdist(x, self.centers)
        dists = dists/np.sqrt(self.input_dim) ### correction to make diagonal of unit square 1 in nD space
        
        if hard:
            x = torch.softmax(-dists*1e5, dim=1)
        else:
            x = torch.softmax(-dists*self.inv_temp, dim=1)
        self.cls_confidence = x
        c = torch.softmax(self.cls_weight, dim=1)
#         c = self.cls_weight
        return x@c ## since both are normalized, it is also normalized
#         return torch.softmax(x@self.cls_weight, dim=1)

    def set_centroid_to_data_randomly(self, data_loader, model):
        num_centers = self.centers.shape[0]
        xxs, yys = [], []
        count = 0
        for xx, yy in data_loader:
            yout = model(xx.to(device)).data.cpu()
            xxs.append(yout)
            yys.append(yy)
            count += len(xx)
            if count >= num_centers:
                break
        
        yout = torch.cat(xxs, dim=0)
        yy = torch.cat(yys, dim=0)
        
        yout = yout[:num_centers].to(self.centers.device)
        yy = yy[:num_centers].to(self.centers.device)
        
        self.centers.data = yout
        
        init_val = torch.ones(self.num_sets, self.output_dim)/self.output_dim
        for ns in range(num_centers):
            init_val[ns, yy[ns]] = 1.
        self.cls_weight.data = init_val.to(self.cls_weight.device)
        pass

In [90]:
classifier = ConnectedClassifier_SoftKMeans(3072, 100, 10)
# classifier = ConnectedClassifier_Linear(3072, 100, 10)
# classifier = nn.Sequential(nn.Linear(3072, 100),
#                           nn.SELU(),
#                           nn.Linear(100, 10))

classifier = classifier.to(device)

In [91]:
print("number of params: ", sum(p.numel() for p in backbone.parameters()))
print("number of params: ", sum(p.numel() for p in classifier.parameters()))

number of params:  9947519
number of params:  308201


In [92]:
# classifier.set_centroid_to_data_randomly(train_loader, backbone)

In [93]:
model = nn.Sequential(backbone, classifier).to(device)

In [94]:
print("number of params: ", sum(p.numel() for p in model.parameters()))

number of params:  10255720


## Training

In [97]:
# model_name = 'c10_inv_v0'
# model_name = 'c10_ord_v1'
# model_name = 'c10_ord_v0_mlp'

In [98]:
criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr=0.1,
#                       momentum=0.9, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [99]:
## Following is copied from 
### https://github.com/kuangliu/pytorch-cifar/blob/master/main.py

# Training
def train(epoch):
    model.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    print(f"[Train] {epoch} Loss: {train_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
    return

In [100]:
best_acc = -1
def test(epoch):
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
    print(f"[Test] {epoch} Loss: {test_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
    
    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'model': model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('models'):
            os.mkdir('models')
        torch.save(state, f'./models/{model_name}.pth')
        best_acc = acc

In [101]:
start_epoch = 0  # start from epoch 0 or last checkpoint epoch
resume = False

if resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('./models'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load(f'./models/{model_name}.pth')
    model.load_state_dict(checkpoint['model'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

In [102]:
### Train the whole damn thing

for epoch in range(start_epoch, start_epoch+200): ## for 200 epochs
    train(epoch)
    test(epoch)
    scheduler.step()

100%|██████████| 391/391 [00:38<00:00, 10.20it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 0 Loss: 2.303 | Acc: 15.002 7501/50000


100%|██████████| 79/79 [00:02<00:00, 27.76it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 0 Loss: 2.303 | Acc: 27.990 2799/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.21it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 1 Loss: 2.302 | Acc: 27.216 13608/50000


100%|██████████| 79/79 [00:02<00:00, 27.66it/s]


[Test] 1 Loss: 2.302 | Acc: 34.600 3460/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 2 Loss: 2.302 | Acc: 36.476 18238/50000


100%|██████████| 79/79 [00:02<00:00, 27.69it/s]


[Test] 2 Loss: 2.302 | Acc: 40.450 4045/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.18it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 3 Loss: 2.302 | Acc: 40.384 20192/50000


100%|██████████| 79/79 [00:02<00:00, 27.70it/s]


[Test] 3 Loss: 2.302 | Acc: 47.260 4726/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.16it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 4 Loss: 2.302 | Acc: 48.656 24328/50000


100%|██████████| 79/79 [00:02<00:00, 27.66it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 4 Loss: 2.302 | Acc: 46.820 4682/10000


100%|██████████| 391/391 [00:38<00:00, 10.12it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 5 Loss: 2.302 | Acc: 43.892 21946/50000


100%|██████████| 79/79 [00:02<00:00, 27.68it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 5 Loss: 2.302 | Acc: 42.910 4291/10000


100%|██████████| 391/391 [00:38<00:00, 10.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 6 Loss: 2.302 | Acc: 41.616 20808/50000


100%|██████████| 79/79 [00:02<00:00, 27.62it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 6 Loss: 2.301 | Acc: 42.660 4266/10000


100%|██████████| 391/391 [00:38<00:00, 10.10it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 7 Loss: 2.301 | Acc: 37.888 18944/50000


100%|██████████| 79/79 [00:02<00:00, 27.57it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 7 Loss: 2.300 | Acc: 36.170 3617/10000


100%|██████████| 391/391 [00:38<00:00, 10.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 8 Loss: 2.300 | Acc: 31.622 15811/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 8 Loss: 2.299 | Acc: 36.400 3640/10000


100%|██████████| 391/391 [00:38<00:00, 10.08it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 9 Loss: 2.298 | Acc: 34.714 17357/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 9 Loss: 2.298 | Acc: 33.700 3370/10000


100%|██████████| 391/391 [00:38<00:00, 10.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 10 Loss: 2.297 | Acc: 33.208 16604/50000


100%|██████████| 79/79 [00:02<00:00, 27.60it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 10 Loss: 2.295 | Acc: 34.090 3409/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 11 Loss: 2.294 | Acc: 31.858 15929/50000


100%|██████████| 79/79 [00:02<00:00, 27.59it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 11 Loss: 2.293 | Acc: 31.160 3116/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 12 Loss: 2.292 | Acc: 31.070 15535/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 12 Loss: 2.290 | Acc: 31.030 3103/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 13 Loss: 2.288 | Acc: 28.948 14474/50000


100%|██████████| 79/79 [00:02<00:00, 27.52it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 13 Loss: 2.287 | Acc: 30.500 3050/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 14 Loss: 2.286 | Acc: 28.926 14463/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 14 Loss: 2.284 | Acc: 32.660 3266/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 15 Loss: 2.282 | Acc: 28.208 14104/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 15 Loss: 2.281 | Acc: 27.710 2771/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 16 Loss: 2.279 | Acc: 28.274 14137/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 16 Loss: 2.278 | Acc: 28.100 2810/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 17 Loss: 2.276 | Acc: 28.320 14160/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 17 Loss: 2.274 | Acc: 32.470 3247/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 18 Loss: 2.273 | Acc: 28.722 14361/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 18 Loss: 2.271 | Acc: 29.910 2991/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 19 Loss: 2.270 | Acc: 31.710 15855/50000


100%|██████████| 79/79 [00:02<00:00, 27.34it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 19 Loss: 2.268 | Acc: 34.420 3442/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 20 Loss: 2.267 | Acc: 34.652 17326/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 20 Loss: 2.265 | Acc: 36.910 3691/10000


100%|██████████| 391/391 [00:39<00:00, 10.01it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 21 Loss: 2.264 | Acc: 36.066 18033/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 21 Loss: 2.263 | Acc: 38.370 3837/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 22 Loss: 2.261 | Acc: 38.100 19050/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 22 Loss: 2.260 | Acc: 39.800 3980/10000


100%|██████████| 391/391 [00:39<00:00, 10.01it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 23 Loss: 2.258 | Acc: 39.056 19528/50000


100%|██████████| 79/79 [00:02<00:00, 27.33it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 23 Loss: 2.257 | Acc: 40.160 4016/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 24 Loss: 2.256 | Acc: 39.968 19984/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 24 Loss: 2.254 | Acc: 39.600 3960/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 25 Loss: 2.252 | Acc: 40.498 20249/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 25 Loss: 2.250 | Acc: 40.850 4085/10000


100%|██████████| 391/391 [00:39<00:00, 10.01it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 26 Loss: 2.248 | Acc: 40.846 20423/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 26 Loss: 2.246 | Acc: 40.810 4081/10000


100%|██████████| 391/391 [00:39<00:00, 10.01it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 27 Loss: 2.244 | Acc: 40.844 20422/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 27 Loss: 2.243 | Acc: 39.640 3964/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 28 Loss: 2.239 | Acc: 40.646 20323/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 28 Loss: 2.236 | Acc: 40.530 4053/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 29 Loss: 2.233 | Acc: 41.402 20701/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 29 Loss: 2.231 | Acc: 41.240 4124/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 30 Loss: 2.227 | Acc: 41.326 20663/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 30 Loss: 2.224 | Acc: 41.500 4150/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 31 Loss: 2.221 | Acc: 41.412 20706/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 31 Loss: 2.218 | Acc: 41.730 4173/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 32 Loss: 2.214 | Acc: 41.584 20792/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 32 Loss: 2.211 | Acc: 41.110 4111/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 33 Loss: 2.206 | Acc: 42.434 21217/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 33 Loss: 2.203 | Acc: 42.620 4262/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 34 Loss: 2.200 | Acc: 43.236 21618/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 34 Loss: 2.194 | Acc: 43.920 4392/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 35 Loss: 2.192 | Acc: 44.574 22287/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 35 Loss: 2.188 | Acc: 45.220 4522/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 36 Loss: 2.184 | Acc: 45.348 22674/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 36 Loss: 2.182 | Acc: 44.060 4406/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 37 Loss: 2.175 | Acc: 47.050 23525/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]


[Test] 37 Loss: 2.171 | Acc: 51.030 5103/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 38 Loss: 2.166 | Acc: 51.124 25562/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]


[Test] 38 Loss: 2.161 | Acc: 51.810 5181/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 39 Loss: 2.157 | Acc: 54.534 27267/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]


[Test] 39 Loss: 2.152 | Acc: 55.870 5587/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 40 Loss: 2.147 | Acc: 57.428 28714/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]


[Test] 40 Loss: 2.143 | Acc: 57.440 5744/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 41 Loss: 2.138 | Acc: 58.348 29174/50000


100%|██████████| 79/79 [00:02<00:00, 27.56it/s]


[Test] 41 Loss: 2.132 | Acc: 57.920 5792/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 42 Loss: 2.125 | Acc: 59.416 29708/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]


[Test] 42 Loss: 2.118 | Acc: 59.440 5944/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 43 Loss: 2.114 | Acc: 59.872 29936/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 43 Loss: 2.108 | Acc: 59.640 5964/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 44 Loss: 2.103 | Acc: 60.212 30106/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]


[Test] 44 Loss: 2.094 | Acc: 60.740 6074/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 45 Loss: 2.091 | Acc: 60.846 30423/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 45 Loss: 2.087 | Acc: 60.000 6000/10000


100%|██████████| 391/391 [00:38<00:00, 10.07it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 46 Loss: 2.080 | Acc: 60.778 30389/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]


[Test] 46 Loss: 2.072 | Acc: 61.130 6113/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 47 Loss: 2.066 | Acc: 61.580 30790/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 47 Loss: 2.057 | Acc: 61.760 6176/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 48 Loss: 2.054 | Acc: 61.764 30882/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 48 Loss: 2.045 | Acc: 62.450 6245/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 49 Loss: 2.041 | Acc: 62.280 31140/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 49 Loss: 2.031 | Acc: 62.760 6276/10000
Saving..


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 50 Loss: 2.029 | Acc: 62.528 31264/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 50 Loss: 2.024 | Acc: 61.810 6181/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 51 Loss: 2.020 | Acc: 62.300 31150/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 51 Loss: 2.017 | Acc: 61.730 6173/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 52 Loss: 2.009 | Acc: 62.524 31262/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 52 Loss: 2.006 | Acc: 62.100 6210/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 53 Loss: 1.998 | Acc: 62.962 31481/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]


[Test] 53 Loss: 1.991 | Acc: 62.820 6282/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 54 Loss: 1.989 | Acc: 63.156 31578/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 54 Loss: 1.991 | Acc: 61.870 6187/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 55 Loss: 1.978 | Acc: 63.524 31762/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]


[Test] 55 Loss: 1.971 | Acc: 63.610 6361/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 56 Loss: 1.971 | Acc: 63.388 31694/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 56 Loss: 1.972 | Acc: 61.930 6193/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 57 Loss: 1.962 | Acc: 63.340 31670/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 57 Loss: 1.958 | Acc: 63.090 6309/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 58 Loss: 1.952 | Acc: 63.756 31878/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 58 Loss: 1.963 | Acc: 60.950 6095/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 59 Loss: 1.948 | Acc: 63.338 31669/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 59 Loss: 1.945 | Acc: 63.010 6301/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 60 Loss: 1.938 | Acc: 64.048 32024/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]


[Test] 60 Loss: 1.933 | Acc: 64.060 6406/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 61 Loss: 1.931 | Acc: 64.130 32065/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 61 Loss: 1.929 | Acc: 63.860 6386/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 62 Loss: 1.922 | Acc: 64.564 32282/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]


[Test] 62 Loss: 1.920 | Acc: 64.460 6446/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 63 Loss: 1.918 | Acc: 64.200 32100/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 63 Loss: 1.925 | Acc: 62.620 6262/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 64 Loss: 1.910 | Acc: 64.564 32282/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 64 Loss: 1.902 | Acc: 64.920 6492/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 65 Loss: 1.902 | Acc: 64.652 32326/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 65 Loss: 1.902 | Acc: 64.370 6437/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 66 Loss: 1.897 | Acc: 64.770 32385/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 66 Loss: 1.900 | Acc: 64.130 6413/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 67 Loss: 1.888 | Acc: 65.240 32620/50000


100%|██████████| 79/79 [00:02<00:00, 27.57it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 67 Loss: 1.888 | Acc: 64.420 6442/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 68 Loss: 1.886 | Acc: 64.686 32343/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 68 Loss: 1.893 | Acc: 63.230 6323/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 69 Loss: 1.877 | Acc: 67.148 33574/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]


[Test] 69 Loss: 1.878 | Acc: 67.860 6786/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 70 Loss: 1.879 | Acc: 67.660 33830/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 70 Loss: 1.876 | Acc: 66.960 6696/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 71 Loss: 1.868 | Acc: 68.638 34319/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 71 Loss: 1.872 | Acc: 67.600 6760/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 72 Loss: 1.865 | Acc: 68.544 34272/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 72 Loss: 1.871 | Acc: 67.260 6726/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 73 Loss: 1.861 | Acc: 68.352 34176/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 73 Loss: 1.862 | Acc: 67.910 6791/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 74 Loss: 1.853 | Acc: 68.968 34484/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]


[Test] 74 Loss: 1.858 | Acc: 68.170 6817/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 75 Loss: 1.850 | Acc: 69.066 34533/50000


100%|██████████| 79/79 [00:02<00:00, 27.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 75 Loss: 1.861 | Acc: 67.660 6766/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 76 Loss: 1.846 | Acc: 69.278 34639/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 76 Loss: 1.845 | Acc: 69.670 6967/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 77 Loss: 1.841 | Acc: 69.916 34958/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 77 Loss: 1.837 | Acc: 69.760 6976/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 78 Loss: 1.836 | Acc: 70.320 35160/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 78 Loss: 1.846 | Acc: 68.500 6850/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 79 Loss: 1.834 | Acc: 70.296 35148/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 79 Loss: 1.833 | Acc: 69.520 6952/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 80 Loss: 1.826 | Acc: 70.978 35489/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 80 Loss: 1.828 | Acc: 70.200 7020/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 81 Loss: 1.826 | Acc: 70.832 35416/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 81 Loss: 1.830 | Acc: 69.730 6973/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 82 Loss: 1.822 | Acc: 71.026 35513/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]


[Test] 82 Loss: 1.823 | Acc: 70.580 7058/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 83 Loss: 1.817 | Acc: 71.350 35675/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 83 Loss: 1.831 | Acc: 69.760 6976/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 84 Loss: 1.815 | Acc: 71.438 35719/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 84 Loss: 1.825 | Acc: 69.920 6992/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 85 Loss: 1.812 | Acc: 71.558 35779/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 85 Loss: 1.819 | Acc: 70.190 7019/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 86 Loss: 1.810 | Acc: 71.572 35786/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 86 Loss: 1.812 | Acc: 71.020 7102/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 87 Loss: 1.808 | Acc: 71.582 35791/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 87 Loss: 1.807 | Acc: 71.300 7130/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 88 Loss: 1.803 | Acc: 72.062 36031/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]


[Test] 88 Loss: 1.805 | Acc: 71.480 7148/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 89 Loss: 1.795 | Acc: 72.740 36370/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]


[Test] 89 Loss: 1.805 | Acc: 71.600 7160/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 90 Loss: 1.795 | Acc: 72.634 36317/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 90 Loss: 1.808 | Acc: 70.690 7069/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 91 Loss: 1.796 | Acc: 72.312 36156/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 91 Loss: 1.803 | Acc: 71.230 7123/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 92 Loss: 1.793 | Acc: 72.442 36221/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 92 Loss: 1.804 | Acc: 71.220 7122/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 93 Loss: 1.787 | Acc: 72.906 36453/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 93 Loss: 1.788 | Acc: 72.690 7269/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 94 Loss: 1.783 | Acc: 73.212 36606/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 94 Loss: 1.793 | Acc: 72.020 7202/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 95 Loss: 1.783 | Acc: 73.118 36559/50000


100%|██████████| 79/79 [00:02<00:00, 27.50it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 95 Loss: 1.789 | Acc: 72.330 7233/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 96 Loss: 1.775 | Acc: 73.808 36904/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]


[Test] 96 Loss: 1.781 | Acc: 72.910 7291/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 97 Loss: 1.782 | Acc: 72.790 36395/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 97 Loss: 1.789 | Acc: 71.870 7187/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 98 Loss: 1.775 | Acc: 73.508 36754/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 98 Loss: 1.790 | Acc: 71.380 7138/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 99 Loss: 1.773 | Acc: 73.598 36799/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 99 Loss: 1.786 | Acc: 71.700 7170/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 100 Loss: 1.764 | Acc: 74.462 37231/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 100 Loss: 1.777 | Acc: 72.680 7268/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 101 Loss: 1.764 | Acc: 74.308 37154/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 101 Loss: 1.781 | Acc: 72.150 7215/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 102 Loss: 1.764 | Acc: 74.082 37041/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 102 Loss: 1.774 | Acc: 72.840 7284/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 103 Loss: 1.762 | Acc: 74.272 37136/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]


[Test] 103 Loss: 1.769 | Acc: 73.320 7332/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 104 Loss: 1.762 | Acc: 74.244 37122/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]


[Test] 104 Loss: 1.767 | Acc: 73.370 7337/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 105 Loss: 1.760 | Acc: 74.434 37217/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 105 Loss: 1.777 | Acc: 72.240 7224/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 106 Loss: 1.757 | Acc: 74.498 37249/50000


100%|██████████| 79/79 [00:02<00:00, 27.50it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 106 Loss: 1.771 | Acc: 72.720 7272/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 107 Loss: 1.756 | Acc: 74.504 37252/50000


100%|██████████| 79/79 [00:02<00:00, 27.25it/s]


[Test] 107 Loss: 1.762 | Acc: 73.650 7365/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 108 Loss: 1.750 | Acc: 75.160 37580/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 108 Loss: 1.760 | Acc: 73.770 7377/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 109 Loss: 1.748 | Acc: 75.208 37604/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 109 Loss: 1.757 | Acc: 74.170 7417/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 110 Loss: 1.747 | Acc: 75.246 37623/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 110 Loss: 1.760 | Acc: 73.680 7368/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 111 Loss: 1.744 | Acc: 75.508 37754/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 111 Loss: 1.756 | Acc: 74.150 7415/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 112 Loss: 1.743 | Acc: 75.434 37717/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 112 Loss: 1.755 | Acc: 73.980 7398/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 113 Loss: 1.737 | Acc: 75.980 37990/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 113 Loss: 1.757 | Acc: 73.710 7371/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 114 Loss: 1.738 | Acc: 75.912 37956/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]


[Test] 114 Loss: 1.750 | Acc: 74.350 7435/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 115 Loss: 1.734 | Acc: 76.152 38076/50000


100%|██████████| 79/79 [00:02<00:00, 27.50it/s]


[Test] 115 Loss: 1.749 | Acc: 74.420 7442/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 116 Loss: 1.736 | Acc: 75.972 37986/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 116 Loss: 1.747 | Acc: 74.510 7451/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 117 Loss: 1.732 | Acc: 76.208 38104/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 117 Loss: 1.749 | Acc: 74.290 7429/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 118 Loss: 1.733 | Acc: 76.102 38051/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 118 Loss: 1.745 | Acc: 74.520 7452/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 119 Loss: 1.728 | Acc: 76.488 38244/50000


100%|██████████| 79/79 [00:02<00:00, 27.33it/s]


[Test] 119 Loss: 1.743 | Acc: 74.580 7458/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 120 Loss: 1.728 | Acc: 76.556 38278/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]


[Test] 120 Loss: 1.740 | Acc: 75.110 7511/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 121 Loss: 1.722 | Acc: 77.030 38515/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 121 Loss: 1.740 | Acc: 75.030 7503/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 122 Loss: 1.728 | Acc: 76.452 38226/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 122 Loss: 1.742 | Acc: 74.610 7461/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 123 Loss: 1.727 | Acc: 76.374 38187/50000


100%|██████████| 79/79 [00:02<00:00, 27.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 123 Loss: 1.743 | Acc: 74.450 7445/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 124 Loss: 1.723 | Acc: 76.766 38383/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 124 Loss: 1.740 | Acc: 74.840 7484/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 125 Loss: 1.719 | Acc: 77.112 38556/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]


[Test] 125 Loss: 1.735 | Acc: 75.320 7532/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 126 Loss: 1.720 | Acc: 77.008 38504/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 126 Loss: 1.735 | Acc: 75.270 7527/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 127 Loss: 1.720 | Acc: 76.874 38437/50000


100%|██████████| 79/79 [00:02<00:00, 27.34it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 127 Loss: 1.736 | Acc: 75.140 7514/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 128 Loss: 1.717 | Acc: 77.252 38626/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 128 Loss: 1.729 | Acc: 75.850 7585/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 129 Loss: 1.715 | Acc: 77.306 38653/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 129 Loss: 1.730 | Acc: 75.740 7574/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 130 Loss: 1.713 | Acc: 77.592 38796/50000


100%|██████████| 79/79 [00:02<00:00, 27.15it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 130 Loss: 1.731 | Acc: 75.470 7547/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 131 Loss: 1.712 | Acc: 77.616 38808/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 131 Loss: 1.726 | Acc: 76.140 7614/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 132 Loss: 1.709 | Acc: 77.876 38938/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 132 Loss: 1.726 | Acc: 75.860 7586/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 133 Loss: 1.710 | Acc: 77.750 38875/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 133 Loss: 1.724 | Acc: 76.110 7611/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 134 Loss: 1.710 | Acc: 77.752 38876/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 134 Loss: 1.726 | Acc: 76.030 7603/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 135 Loss: 1.705 | Acc: 78.250 39125/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 135 Loss: 1.724 | Acc: 76.040 7604/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 136 Loss: 1.708 | Acc: 77.784 38892/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 136 Loss: 1.724 | Acc: 75.920 7592/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 137 Loss: 1.708 | Acc: 77.790 38895/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 137 Loss: 1.730 | Acc: 75.270 7527/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 138 Loss: 1.706 | Acc: 77.880 38940/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 138 Loss: 1.726 | Acc: 75.600 7560/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 139 Loss: 1.706 | Acc: 77.892 38946/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 139 Loss: 1.723 | Acc: 75.970 7597/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 140 Loss: 1.704 | Acc: 78.162 39081/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 140 Loss: 1.722 | Acc: 76.090 7609/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 141 Loss: 1.701 | Acc: 78.410 39205/50000


100%|██████████| 79/79 [00:02<00:00, 27.19it/s]


[Test] 141 Loss: 1.717 | Acc: 76.700 7670/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 142 Loss: 1.701 | Acc: 78.358 39179/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]


[Test] 142 Loss: 1.715 | Acc: 76.770 7677/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 143 Loss: 1.699 | Acc: 78.504 39252/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 143 Loss: 1.720 | Acc: 76.170 7617/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 144 Loss: 1.701 | Acc: 78.306 39153/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 144 Loss: 1.719 | Acc: 76.340 7634/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 145 Loss: 1.700 | Acc: 78.366 39183/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 145 Loss: 1.720 | Acc: 76.100 7610/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 146 Loss: 1.698 | Acc: 78.560 39280/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 146 Loss: 1.719 | Acc: 76.360 7636/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 147 Loss: 1.698 | Acc: 78.516 39258/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 147 Loss: 1.716 | Acc: 76.570 7657/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 148 Loss: 1.696 | Acc: 78.730 39365/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 148 Loss: 1.717 | Acc: 76.370 7637/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 149 Loss: 1.696 | Acc: 78.724 39362/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 149 Loss: 1.717 | Acc: 76.420 7642/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 150 Loss: 1.695 | Acc: 78.776 39388/50000


100%|██████████| 79/79 [00:02<00:00, 27.47it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 150 Loss: 1.717 | Acc: 76.480 7648/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 151 Loss: 1.695 | Acc: 78.790 39395/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 151 Loss: 1.714 | Acc: 76.750 7675/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 152 Loss: 1.696 | Acc: 78.696 39348/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 152 Loss: 1.713 | Acc: 76.700 7670/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 153 Loss: 1.694 | Acc: 78.894 39447/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]


[Test] 153 Loss: 1.712 | Acc: 76.830 7683/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 154 Loss: 1.693 | Acc: 78.942 39471/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 154 Loss: 1.712 | Acc: 76.790 7679/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 155 Loss: 1.691 | Acc: 79.116 39558/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]


[Test] 155 Loss: 1.709 | Acc: 77.080 7708/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 156 Loss: 1.691 | Acc: 79.056 39528/50000


100%|██████████| 79/79 [00:02<00:00, 27.46it/s]


[Test] 156 Loss: 1.709 | Acc: 77.210 7721/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 157 Loss: 1.688 | Acc: 79.404 39702/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 157 Loss: 1.706 | Acc: 77.410 7741/10000
Saving..


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 158 Loss: 1.689 | Acc: 79.348 39674/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 158 Loss: 1.706 | Acc: 77.400 7740/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 159 Loss: 1.688 | Acc: 79.456 39728/50000


100%|██████████| 79/79 [00:02<00:00, 27.52it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 159 Loss: 1.708 | Acc: 77.110 7711/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 160 Loss: 1.686 | Acc: 79.552 39776/50000


100%|██████████| 79/79 [00:02<00:00, 27.44it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 160 Loss: 1.709 | Acc: 77.060 7706/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 161 Loss: 1.688 | Acc: 79.356 39678/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 161 Loss: 1.710 | Acc: 77.000 7700/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 162 Loss: 1.686 | Acc: 79.500 39750/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 162 Loss: 1.707 | Acc: 77.180 7718/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 163 Loss: 1.685 | Acc: 79.690 39845/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 163 Loss: 1.707 | Acc: 77.230 7723/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 164 Loss: 1.686 | Acc: 79.606 39803/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 164 Loss: 1.707 | Acc: 77.130 7713/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 165 Loss: 1.686 | Acc: 79.574 39787/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 165 Loss: 1.709 | Acc: 77.150 7715/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 166 Loss: 1.684 | Acc: 79.784 39892/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 166 Loss: 1.707 | Acc: 77.210 7721/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 167 Loss: 1.683 | Acc: 79.892 39946/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 167 Loss: 1.707 | Acc: 77.260 7726/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 168 Loss: 1.683 | Acc: 79.850 39925/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 168 Loss: 1.707 | Acc: 77.230 7723/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 169 Loss: 1.684 | Acc: 79.790 39895/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 169 Loss: 1.706 | Acc: 77.390 7739/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 170 Loss: 1.683 | Acc: 79.872 39936/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]


[Test] 170 Loss: 1.704 | Acc: 77.510 7751/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 171 Loss: 1.682 | Acc: 79.942 39971/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 171 Loss: 1.703 | Acc: 77.590 7759/10000
Saving..


100%|██████████| 391/391 [00:39<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 172 Loss: 1.682 | Acc: 79.906 39953/50000


100%|██████████| 79/79 [00:02<00:00, 27.43it/s]


[Test] 172 Loss: 1.702 | Acc: 77.710 7771/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 173 Loss: 1.683 | Acc: 79.796 39898/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]


[Test] 173 Loss: 1.701 | Acc: 77.800 7780/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 174 Loss: 1.680 | Acc: 80.072 40036/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 174 Loss: 1.704 | Acc: 77.480 7748/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 175 Loss: 1.681 | Acc: 80.038 40019/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 175 Loss: 1.704 | Acc: 77.530 7753/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 176 Loss: 1.680 | Acc: 80.078 40039/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 176 Loss: 1.702 | Acc: 77.710 7771/10000


100%|██████████| 391/391 [00:39<00:00, 10.02it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 177 Loss: 1.680 | Acc: 80.070 40035/50000


100%|██████████| 79/79 [00:02<00:00, 27.37it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 177 Loss: 1.702 | Acc: 77.630 7763/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 178 Loss: 1.680 | Acc: 80.152 40076/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 178 Loss: 1.701 | Acc: 77.710 7771/10000


100%|██████████| 391/391 [00:38<00:00, 10.03it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 179 Loss: 1.681 | Acc: 79.956 39978/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 179 Loss: 1.703 | Acc: 77.590 7759/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 180 Loss: 1.680 | Acc: 80.078 40039/50000


100%|██████████| 79/79 [00:02<00:00, 27.32it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 180 Loss: 1.702 | Acc: 77.650 7765/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 181 Loss: 1.680 | Acc: 80.104 40052/50000


100%|██████████| 79/79 [00:02<00:00, 27.42it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 181 Loss: 1.701 | Acc: 77.730 7773/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 182 Loss: 1.680 | Acc: 80.068 40034/50000


100%|██████████| 79/79 [00:02<00:00, 27.53it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 182 Loss: 1.702 | Acc: 77.670 7767/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 183 Loss: 1.679 | Acc: 80.198 40099/50000


100%|██████████| 79/79 [00:02<00:00, 27.38it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 183 Loss: 1.701 | Acc: 77.740 7774/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 184 Loss: 1.680 | Acc: 80.108 40054/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]


[Test] 184 Loss: 1.700 | Acc: 77.830 7783/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 185 Loss: 1.679 | Acc: 80.172 40086/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 185 Loss: 1.700 | Acc: 77.880 7788/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 186 Loss: 1.677 | Acc: 80.336 40168/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]


[Test] 186 Loss: 1.700 | Acc: 77.900 7790/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 187 Loss: 1.679 | Acc: 80.192 40096/50000


100%|██████████| 79/79 [00:02<00:00, 27.49it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 187 Loss: 1.700 | Acc: 77.830 7783/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 188 Loss: 1.676 | Acc: 80.462 40231/50000


100%|██████████| 79/79 [00:02<00:00, 27.36it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 188 Loss: 1.700 | Acc: 77.810 7781/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 189 Loss: 1.678 | Acc: 80.302 40151/50000


100%|██████████| 79/79 [00:02<00:00, 27.51it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 189 Loss: 1.701 | Acc: 77.750 7775/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 190 Loss: 1.680 | Acc: 80.070 40035/50000


100%|██████████| 79/79 [00:02<00:00, 27.45it/s]


[Test] 190 Loss: 1.699 | Acc: 77.940 7794/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 191 Loss: 1.676 | Acc: 80.446 40223/50000


100%|██████████| 79/79 [00:02<00:00, 27.41it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 191 Loss: 1.700 | Acc: 77.870 7787/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 192 Loss: 1.678 | Acc: 80.318 40159/50000


100%|██████████| 79/79 [00:02<00:00, 27.40it/s]


[Test] 192 Loss: 1.699 | Acc: 77.950 7795/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 193 Loss: 1.680 | Acc: 80.074 40037/50000


100%|██████████| 79/79 [00:02<00:00, 27.48it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 193 Loss: 1.700 | Acc: 77.880 7788/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 194 Loss: 1.679 | Acc: 80.212 40106/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 194 Loss: 1.700 | Acc: 77.880 7788/10000


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 195 Loss: 1.678 | Acc: 80.274 40137/50000


100%|██████████| 79/79 [00:02<00:00, 27.39it/s]


[Test] 195 Loss: 1.699 | Acc: 78.030 7803/10000
Saving..


100%|██████████| 391/391 [00:38<00:00, 10.06it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 196 Loss: 1.678 | Acc: 80.286 40143/50000


100%|██████████| 79/79 [00:02<00:00, 27.07it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 196 Loss: 1.700 | Acc: 77.930 7793/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 197 Loss: 1.677 | Acc: 80.348 40174/50000


100%|██████████| 79/79 [00:02<00:00, 27.14it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 197 Loss: 1.700 | Acc: 77.850 7785/10000


100%|██████████| 391/391 [00:38<00:00, 10.04it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 198 Loss: 1.678 | Acc: 80.300 40150/50000


100%|██████████| 79/79 [00:02<00:00, 27.21it/s]
  0%|          | 0/391 [00:00<?, ?it/s]

[Test] 198 Loss: 1.700 | Acc: 77.830 7783/10000


100%|██████████| 391/391 [00:38<00:00, 10.05it/s]
  0%|          | 0/79 [00:00<?, ?it/s]

[Train] 199 Loss: 1.677 | Acc: 80.426 40213/50000


100%|██████████| 79/79 [00:02<00:00, 26.95it/s]

[Test] 199 Loss: 1.699 | Acc: 77.950 7795/10000





In [103]:
best_acc

78.03

In [104]:
exit(0)

In [68]:
'''
flows = [
    ActNorm2D(3),
    irf.ConvResidualFlow(3, [32, 32], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(48),
    irf.ConvResidualFlow(48, [128], activation=actf),
    irf.ConvResidualFlow(48, [128], activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(192),
    irf.ConvResidualFlow(192, [128], activation=actf),
    irf.ConvResidualFlow(192, [128], activation=actf),
    irf.Flatten(img_size=(192, 4, 4))
        ]
'''
### ACC: 76.6? | 69.95 --> Inv + Connected Linear
### ACC: 67.03 | 67.61 --> Ord + Connected Linear


'''
flows = [
    ActNorm2D(3),
    irf.ConvResidualFlow(3, [32, 32], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
    ActNorm2D(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
    irf.Flatten(img_size=(192, 4, 4)),
    irf.ResidualFlow(3072, [4096], activation=actf),
        ]
'''
### ACC:  --> Inv + Connected Linear
### ACC:  --> Ord + Connected Linear

'''
actf = irf.Swish
flows = [
#     ActNorm2D(3),
    nn.BatchNorm2d(3),
    irf.ConvResidualFlow(3, [32, 32], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
#     ActNorm2D(12),
    nn.BatchNorm2d(12),
    irf.ConvResidualFlow(12, [64, 64], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
#     ActNorm2D(48),
    nn.BatchNorm2d(48),
    irf.ConvResidualFlow(48, [128, 128], kernels=5, activation=actf),
    irf.InvertiblePooling(2),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
#     ActNorm2D(192),
    nn.BatchNorm2d(192),
    irf.ConvResidualFlow(192, [256, 256], kernels=5, activation=actf),
    irf.Flatten(img_size=(192, 4, 4)),
#     ActNorm(3072),
    nn.BatchNorm1d(3072),
    nn.Linear(3072, 3072, bias=False),
    nn.BatchNorm1d(3072),
        ]

backbone = nn.Sequential(*flows)
'''

### ACC:  --> Inv + Connected Distance
### ACC:  --> Ord + Connected Distance
print()




In [69]:
classifier.inv_temp

AttributeError: 'Sequential' object has no attribute 'inv_temp'

In [55]:
checkpoint = torch.load(f'./models/{model_name}.pth')
best_acc = checkpoint['acc']
start_epoch = checkpoint['epoch']

best_acc, start_epoch

(82.79, 194)

### Hard test accuracy with count per classifier

In [56]:
test_count = 0
test_acc = 0
set_count = torch.zeros(classifier.num_sets).to(device)
for xx, yy in tqdm(test_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(backbone(xx), hard=True)
        set_indx, count = torch.unique(torch.argmax(classifier.cls_confidence, dim=1), return_counts=True) 
        set_count[set_indx] += count
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Test Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 79/79 [00:02<00:00, 26.95it/s]

Hard Test Acc:82.50%
[266, 0, 0, 2, 0, 991, 1, 0, 0, 0, 0, 0, 0, 972, 0, 0, 0, 0, 16, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19, 0, 0, 2, 0, 0, 0, 826, 5, 7, 1013, 0, 8, 0, 967, 0, 4, 0, 0, 0, 0, 0, 0, 0, 792, 0, 0, 1122, 0, 0, 963, 0, 30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 0, 0, 982, 0, 0, 0, 0, 0, 11, 12, 0, 7, 0, 959]





### Hard train accuracy with count per classifier

In [57]:
test_count = 0
test_acc = 0
set_count = torch.zeros(classifier.num_sets).to(device)
for xx, yy in tqdm(train_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(backbone(xx), hard=True)
        set_indx, count = torch.unique(torch.argmax(classifier.cls_confidence, dim=1), return_counts=True) 
        set_count[set_indx] += count
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
    test_acc += correct
    test_count += len(xx)

print(f'Hard Train Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

100%|██████████| 391/391 [00:14<00:00, 27.51it/s]

Hard Train Acc:88.49%
[1285, 0, 0, 2, 0, 4978, 2, 3, 0, 0, 0, 0, 0, 4903, 0, 0, 0, 0, 83, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 2, 0, 0, 0, 4286, 9, 29, 5067, 0, 28, 0, 4957, 0, 18, 0, 0, 0, 0, 0, 0, 0, 3910, 0, 0, 5219, 0, 0, 4907, 0, 165, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 72, 0, 0, 4921, 0, 0, 0, 0, 0, 50, 27, 0, 34, 0, 4941]





In [58]:
#### Classifiers that enclose any data
torch.count_nonzero(set_count)

tensor(30, device='cuda:1')

In [59]:
#### classifier with class representation
torch.argmax(classifier.cls_weight, dim=1)

tensor([0, 2, 4, 9, 3, 1, 6, 1, 5, 3, 9, 3, 3, 2, 3, 6, 5, 7, 5, 1, 6, 0, 3, 2,
        8, 2, 8, 0, 0, 2, 8, 4, 0, 1, 8, 6, 2, 4, 1, 0, 0, 3, 6, 9, 1, 8, 9, 5,
        7, 5, 0, 4, 8, 2, 9, 9, 7, 3, 4, 8, 8, 2, 0, 7, 2, 0, 0, 9, 0, 3, 9, 6,
        4, 3, 5, 0, 7, 8, 5, 9, 8, 2, 5, 0, 3, 4, 8, 8, 9, 9, 9, 8, 7, 4, 6, 7,
        0, 5, 7, 6], device='cuda:1')

### analyze per classifier accuracy

In [None]:
test_count = 0
test_acc = 0
set_count = torch.zeros(classifier.num_sets).to(device)
set_acc = torch.zeros(classifier.num_sets).to(device)
for xx, yy in tqdm(train_loader):
    xx, yy = xx.to(device), yy.to(device)
    with torch.no_grad():
        yout = classifier(backbone(xx), hard=True)
        
    cls_indx = torch.argmax(classifier.cls_confidence, dim=1)
    set_indx, count = torch.unique(cls_indx, return_counts=True) 
    set_count[set_indx] += count
    
    outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
    correct = (outputs == yy.data.cpu().numpy()).astype(float)
    
    ### class_index has 100 possible values
    for i, c in enumerate(correct):
        set_acc[cls_indx[i]] += c
    
#     print(set_acc.sum(), set_count.sum())
#     break
    test_acc += correct.sum()
    test_count += len(xx)

print(f'Hard Train Acc:{float(test_acc)/test_count*100:.2f}%')
print(set_count.type(torch.long).tolist())

In [None]:
# set_acc/set_count

In [None]:
for i, (cnt, acc, cls) in enumerate(zip(set_count.type(torch.long).tolist(),
                                   (set_acc/set_count).tolist(),
                                   torch.argmax(classifier.cls_weight, dim=1).tolist())):
    if cnt == 0: continue
    print(f"{i},\t {cnt},\t {cls}\t {acc*100:.2f}%")