In [1]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

import os, sys, pathlib, random, time, pickle, copy, json
# from tqdm.autonotebook import tqdm
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch.optim as optim
from torch.utils import data

In [3]:
from transformers_lib import TransformerBlock, \
        Mixer_TransformerBlock_Encoder, \
        PositionalEncoding

In [4]:
class TinyImageNet_Preload(data.Dataset):
#     https://gist.github.com/z-a-f/b862013c0dc2b540cf96a123a6766e54
    
    def __init__(self, root, mode='train', transform=None, preload=False):
        super().__init__()
        self.preload = preload
        dataset = datasets.ImageFolder(
            root=os.path.join(root, mode),
            transform=None
        )
        self.transform = transform
        self.images, self.labels = [], []
        print("Dataset Size:",len(dataset))
        
        if preload:
            for i in tqdm(range(len(dataset))):
                x, y = dataset[i]
                self.images.append(x)
                self.labels.append(y)
                
#         del dataset
        self.dataset = dataset
            
    def _add_channels(img, total_channels=3):
        while len(img.shape) < 3:  # third axis is the channels
            img = np.expand_dims(img, axis=-1)
        while(img.shape[-1]) < 3:
            img = np.concatenate([img, img[:, :, -1:]], axis=-1)
        return img
        
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        if self.preload:
            img, lbl = self.images[idx], self.labels[idx]
        else:
            img, lbl = self.dataset[idx]
        return self.transform(img), lbl

# Model

In [5]:
class Mixer_ViT_Classifier(nn.Module):
    
    def __init__(self, image_dim:tuple, patch_size:tuple, hidden_channel:int, num_blocks:int, num_classes:int, block_seq_size:int, block_mlp_size:int, forward_expansion:float=2.0, pos_emb=True, dropout:float=0.0):
        super().__init__()
        
        self.img_dim = image_dim ### must contain (C, H, W) or (H, W)
        
        ### find patch dim
        d0 = int(image_dim[-2]/patch_size[0])
        d1 = int(image_dim[-1]/patch_size[1])
        assert d0*patch_size[0]==image_dim[-2], "Image must be divisible into patch size"
        assert d1*patch_size[1]==image_dim[-1], "Image must be divisible into patch size"
        
#         self.d0, self.d1 = d0, d1 ### number of patches in each axis
        __patch_size = patch_size[0]*patch_size[1]*image_dim[0] ## number of channels in each patch
    
        ### find channel dim
        channel_size = d0*d1 ## number of patches
        
        ### after the number of channels are changed
        init_dim = __patch_size
        final_dim = hidden_channel
        self.unfold = nn.Unfold(kernel_size=patch_size, stride=patch_size)
        #### rescale the patches (patch wise image non preserving transform, unlike bilinear interpolation)
        self.channel_change = nn.Linear(init_dim, final_dim)
        print(f"ViT Mixer : Channels per patch -> Initial:{init_dim} Final:{final_dim}")
        
        
        self.channel_dim = final_dim
        self.patch_dim = channel_size
        
        self.transformer_blocks = []
        
        f = self.get_factors(self.channel_dim)
        print(f)
        fi = np.abs(np.array(f) - np.sqrt(self.channel_dim)).argmin()
        
        _n_heads = f[fi]
        
        ## number of dims per channel -> channel_dim
#         print('Num patches:', self.patch_dim)
        print(f'Sequence len: {self.patch_dim} ; Block size: {block_seq_size}')
        print('Channel dim:', self.channel_dim, 'num heads:',_n_heads)
        
        if block_seq_size is None or block_seq_size<2:
            ### Find the block size for sequence:
            block_seq_size = int(2**np.ceil(np.log2(np.sqrt(self.patch_dim))))
            
        print(f'MLP dim: {self.channel_dim} ; Block size: {block_mlp_size}')

        for i in range(num_blocks):
            L = Mixer_TransformerBlock_Encoder(self.patch_dim, block_seq_size, self.channel_dim, _n_heads, dropout, forward_expansion, nn.GELU, block_mlp_size)
            self.transformer_blocks.append(L)
        self.transformer_blocks = nn.Sequential(*self.transformer_blocks)
        
        self.linear = nn.Linear(self.patch_dim*self.channel_dim, num_classes)
        
        self.positional_encoding = PositionalEncoding(self.channel_dim, dropout=0)
        if not pos_emb:
            self.positional_encoding = nn.Identity()
        
        
    def get_factors(self, n):
        facts = []
        for i in range(2, n+1):
            if n%i == 0:
                facts.append(i)
        return facts
    
    def forward(self, x):
        bs = x.shape[0]
        x = self.unfold(x).swapaxes(-1, -2)
        x = self.channel_change(x)
        x = self.positional_encoding(x)
        x = self.transformer_blocks(x)
        x = self.linear(x.view(bs, -1))
        return x

In [6]:
device = torch.device('cuda:0')
# device = torch.device('cpu')

In [7]:
torch.cuda.device_count()

2

In [8]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce RTX 3090'

In [9]:
# model = Mixer_ViT_Classifier([3, 32, 32], [1, 1], 64, num_blocks=2, num_classes=10, 
#                             block_seq_size=32, block_mlp_size=None, pos_emb=False).to(device)

In [10]:
# model

In [11]:
# print("number of params: ", sum(p.numel() for p in model.parameters()))

In [12]:
# %timeit model(torch.randn(32, 3, 32, 32).to(device)).mean().backward()

In [13]:
# %timeit model(torch.randn(32, 3, 32, 32).to(device))

In [14]:
# with torch.no_grad():
#     %timeit model(torch.randn(32, 3, 32, 32).to(device))

In [15]:
# model = Mixer_ViT_Classifier([3, 32, 32], [1, 1], 64, num_blocks=4, num_classes=10, 
#                             block_seq_size=1024, block_mlp_size=None, pos_emb=False).to(device)

In [16]:
# model

In [17]:
# print("number of params: ", sum(p.numel() for p in model.parameters()))

In [18]:
# %timeit model(torch.randn(32, 3, 32, 32).to(device)).mean().backward()

In [19]:
# %timeit model(torch.randn(32, 3, 32, 32).to(device))

In [20]:
# with torch.no_grad():
#     %timeit model(torch.randn(32, 3, 32, 32).to(device))

In [21]:
# asas

In [22]:
def experiment_skip(model_name, ep):
    
    ## if file of benchmark is there and the training is done for full epochs
    filename = f"./output/benchmark/{model_name}_data.json"
    if os.path.exists(filename):
        with open(filename) as f:
            data = json.load(f)
        ## data consists of lists and dicts.
        epochs = data['train_stat'][-1][0]
        if epochs >= ep-1:
            return True
    
    return False

In [23]:
def benchmark(dataset:str, patch_size:int, num_layers:int, SEED:int, sparse_att:bool=False, sparse_mlp:bool=False, pos_emb:bool=False, cuda:int=0):
    device = torch.device(f"cuda:{cuda}")
    
    if sparse_att:
        assert num_layers%2 == 0, 'number of blocks on sparse transformer is (x2)/2 hence it must be even'
        num_layers_ = num_layers//2
    else:
        num_layers_ = num_layers
    
    BS = 256
    NC = -1
    EPOCHS = 300
    imsize = (3, 32, 32)
    expansion_dict = {16:1024, 8:256, 4:128, 2:64, 1:64}
    expansion = expansion_dict[patch_size]

    
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    ##### Data Transforms
    if dataset == 'tiny':
        NC = 200
        EPOCHS = 400
        imsize = (3, 64, 64)
        tiny_train = transforms.Compose([
        transforms.RandAugment(),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.5]*3,
            std=[0.2]*3,
            ),
        ])

        tiny_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.5]*3,
                std=[0.2]*3,
            ),
        ])
        
        train_dataset = TinyImageNet_Preload(root="../../../../../_Datasets/tiny-imagenet-200",
                                     mode='train', transform=tiny_train)
        test_dataset = TinyImageNet_Preload(root="../../../../../_Datasets/tiny-imagenet-200",
                                     mode='val', transform=tiny_test)
        
    elif dataset == 'cifar10':
        NC = 10
        BS = 32
        EPOCHS = 200
        
        cifar_train = transforms.Compose([
            transforms.RandomCrop(size=32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
                std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
            ),
        ])

        cifar_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.4914, 0.4822, 0.4465], # mean=[0.5071, 0.4865, 0.4409] for cifar100
                std=[0.2023, 0.1994, 0.2010], # std=[0.2009, 0.1984, 0.2023] for cifar100
            ),
        ])

        train_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=True, download=True, transform=cifar_train)
        test_dataset = datasets.CIFAR10(root="../../../../../_Datasets/cifar10/", train=False, download=True, transform=cifar_test)

    elif dataset == 'cifar100':
        NC = 100
        BS = 128
        EPOCHS = 300

        cifar_train = transforms.Compose([
            transforms.RandomCrop(size=32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.5071, 0.4865, 0.4409],
                std=[0.2009, 0.1984, 0.2023],
            ),
        ])

        cifar_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.5071, 0.4865, 0.4409],
                std=[0.2009, 0.1984, 0.2023],
            ),
        ])

        train_dataset = datasets.CIFAR100(root="../../../../../_Datasets/cifar100/", train=True, download=True, transform=cifar_train)
        test_dataset = datasets.CIFAR100(root="../../../../../_Datasets/cifar100/", train=False, download=True, transform=cifar_test)
        
    ##### Now create data loaders
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BS, shuffle=True, num_workers=4)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BS, shuffle=False, num_workers=4)
    
    ### Now create models
    
    seq_len = (imsize[-1]*imsize[-2])//(patch_size*patch_size)
    mlp_dim = expansion
    print(seq_len, mlp_dim)
    
    if sparse_att:
        seq_len = int(2**np.ceil(np.log2(np.sqrt(seq_len))))
    if sparse_mlp:
        mlp_dim = int(2**np.ceil(np.log2(np.sqrt(expansion))))
    
    
    torch.manual_seed(SEED)
    model = Mixer_ViT_Classifier(imsize, 
                                 patch_size=[patch_size]*2, 
                                 hidden_channel=expansion, 
                                 num_blocks=num_layers_, 
                                 num_classes=NC, 
                                 block_seq_size=seq_len, 
                                 block_mlp_size=mlp_dim,
                                 pos_emb=pos_emb).to(device)
    
#     _x = torch.randn(BS, *imsize).to(device)
#     print("Output: ",vit_mixer(_x).shape)
    num_params = sum(p.numel() for p in model.parameters())
    print("number of params: ", num_params)
    
    _a, _b, _c = 'att', 'mlp', 'nPE'
    if sparse_att: _a = 'sAtt'
    if sparse_mlp: _b = 'sMlp'
    if pos_emb: _c = 'PE'
    model_name = f'01.3_ViT_{_c}_{dataset}_patch{patch_size}_l{num_layers}_{_a}_{_b}_s{SEED}'
    print(f"Model Name: {model_name}")
    
    if experiment_skip(model_name, EPOCHS):
        print(f'EXPERIMENT DONE... SKIPPING : {model_name}')
        return
    
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

    STAT ={'train_stat':[], 'test_stat':[], 'params':num_params, }

    ## Following is copied from 
    ### https://github.com/kuangliu/pytorch-cifar/blob/master/main.py

    # Training
    def train(epoch):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
#             break

        STAT['train_stat'].append((epoch, train_loss/(batch_idx+1), 100.*correct/total)) ### (Epochs, Loss, Acc)
        print(f"[Train] {epoch} Loss: {train_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")
        return

    global best_acc
    best_acc = -1
    def test(epoch):
        global best_acc
        model.eval()
        test_loss = 0
        correct = 0
        total = 0
        time_taken = []
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):
                inputs, targets = inputs.to(device), targets.to(device)

                start = time.time()

                outputs = model(inputs)

                start = time.time()-start
                time_taken.append(start)

                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

        STAT['test_stat'].append((epoch, test_loss/(batch_idx+1), 100.*correct/total, np.mean(time_taken))) ### (Epochs, Loss, Acc, time)
        print(f"[Test] {epoch} Loss: {test_loss/(batch_idx+1):.3f} | Acc: {100.*correct/total:.3f} {correct}/{total}")

        # Save checkpoint.
        acc = 100.*correct/total
        if acc > best_acc:
            print('Saving..')
            state = {
                'model': model.state_dict(),
                'acc': acc,
                'epoch': epoch
            }
            if not os.path.isdir('models'):
                os.mkdir('models')
            torch.save(state, f'./models/benchmark/{model_name}.pth')
            best_acc = acc

        with open(f"./output/benchmark/{model_name}_data.json", 'w') as f:
            json.dump(STAT, f, indent=0)

    ### Train the whole damn thing
#     EPOCHS = 1
    for epoch in range(0, EPOCHS):
        train(epoch)
        test(epoch)
        scheduler.step()
        
    
    train_stat = np.array(STAT['train_stat'])
    test_stat = np.array(STAT['test_stat'])

    plt.plot(train_stat[:,1], label='train')
    plt.plot(test_stat[:,1], label='test')
    plt.ylabel("Loss")
    plt.legend()
    plt.savefig(f"./output/benchmark/plots/{model_name}_loss.svg")
    plt.show()

    plt.plot(train_stat[:,2], label='train')
    plt.plot(test_stat[:,2], label='test')
    plt.ylabel("Accuracy")
    plt.legend()
    plt.savefig(f"./output/benchmark/plots/{model_name}_accs.svg")
    plt.show()
    
    del model, optimizer
    return

In [24]:
# benchmark(dataset='tiny', 
#           patch_size=4, num_layers=10, SEED=123, sparse_att=True, sparse_mlp=True, cuda=0
#          )

In [25]:
# ### Automate the benchmark
# ###### for tiny
# cuda_idx = 0
# # for seed in [147, 258, 369]:
# for seed in [147]:
#     for patch_size in [16, 8, 4]:
#         for sparse_attention in [False, True]:
#             for sparse_mlp in [False, True]:
# #                 for nlayers in [6, 10, 14]:
#                 for nlayers in [6]:
#                     print(f'''
#                         Experimenting on Tiny Dataset 
#                         patch:{patch_size},
#                         sparse_att: {sparse_attention},
#                         sparse_mlp: {sparse_mlp},
#                         num_layers : {nlayers},
#                         seed: {seed}
#                     ''')
            
#                     benchmark(dataset='tiny', 
#                               patch_size=patch_size, 
#                               num_layers=nlayers, 
#                               SEED=seed, 
#                               sparse_att=sparse_attention, sparse_mlp=sparse_mlp, 
#                               cuda=cuda_idx
#                              )


In [26]:
# ### Automate the benchmark
# ###### for tiny

# not_working = [
#     (4, False, True, 6),
#     (4, False, False, 10),
#     (4, False, False, 14),
#     (4, False, True, 10),
#     (4, False, True, 14),
#     (4, True, True, 14),
# ]

# cuda_idx = 0
# # for seed in [147, 258, 369]:
# for seed in [147]:
#     for patch_size in [16, 8, 4]:
#         for sparse_attention in [False, True]:
#             for sparse_mlp in [False, True]:
#                 for nlayers in [6, 10, 14]:

#                     print(f'''
#                         Experimenting on Tiny Dataset 
#                         patch:{patch_size},
#                         sparse_att: {sparse_attention},
#                         sparse_mlp: {sparse_mlp},
#                         num_layers : {nlayers},
#                         seed: {seed}
#                     ''')
            
#                 ### check if config is in not_working case
#                     exit = False
#                     for nw in not_working:
#                         if patch_size==nw[0] and \
#                             sparse_attention==nw[1] and \
#                             sparse_mlp==nw[2] and\
#                             nlayers==nw[3]:
                            
#                             exit=True
#                             break
#                     if exit:
#                         print(f'Exiting as the config is in NOT WORKING')
#                         continue


#                     benchmark(dataset='tiny', 
#                               patch_size=patch_size, 
#                               num_layers=nlayers, 
#                               SEED=seed, 
#                               sparse_att=sparse_attention, sparse_mlp=sparse_mlp, 
#                               cuda=cuda_idx
#                              )


In [27]:
### Automate the benchmark
###### for c10

not_working = [
]

cuda_idx = 0
for dataset in ['cifar10']:
    for seed in [147, 258, 369]:
        for nlayers in [12, 8, 4]:
            for patch_size in [1]:
                for sparse_attention in [False]:
                    for sparse_mlp in [False, True]:
                        for PE in [False]:

                            print(f'''
                                Experimenting on {dataset} Dataset 
                                patch:{patch_size},
                                sparse_att: {sparse_attention},
                                sparse_mlp: {sparse_mlp},
                                num_layers: {nlayers},
                                pos_embed: {PE},
                                seed: {seed}
                            ''')

                        ### check if config is in not_working case
                            exit = False
                            for nw in not_working:
                                if patch_size==nw[0] and \
                                    sparse_attention==nw[1] and \
                                    sparse_mlp==nw[2] and\
                                    nlayers==nw[3]:

                                    exit=True
                                    break
                            if exit:
                                print(f'Exiting as the config is in NOT WORKING')
                                continue


                            benchmark(dataset=dataset, 
                                      patch_size=patch_size, 
                                      num_layers=nlayers, 
                                      SEED=seed, 
                                      sparse_att=sparse_attention, sparse_mlp=sparse_mlp, 
                                      pos_emb=PE,
                                      cuda=cuda_idx
                                     )


                                Experimenting on cifar10 Dataset 
                                patch:1,
                                sparse_att: False,
                                sparse_mlp: False,
                                num_layers: 12,
                                pos_embed: False,
                                seed: 147
                            
Files already downloaded and verified
Files already downloaded and verified
1024 64
ViT Mixer : Channels per patch -> Initial:3 Final:64
[2, 4, 8, 16, 32, 64]
Sequence len: 1024 ; Block size: 1024
Channel dim: 64 num heads: 8
MLP dim: 64 ; Block size: 64
number of params:  1057290
Model Name: 01.3_ViT_nPE_cifar10_patch1_l12_att_mlp_s147


100%|█████████████████████████████████████████████████| 1563/1563 [10:10<00:00,  2.56it/s]


[Train] 0 Loss: 1.889 | Acc: 34.098 17049/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 0 Loss: 1.513 | Acc: 46.800 4680/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.56it/s]


[Train] 1 Loss: 1.650 | Acc: 41.396 20698/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 1 Loss: 1.433 | Acc: 49.280 4928/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 2 Loss: 1.550 | Acc: 45.232 22616/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 2 Loss: 1.476 | Acc: 50.010 5001/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 3 Loss: 1.477 | Acc: 47.466 23733/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 3 Loss: 1.364 | Acc: 52.670 5267/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 4 Loss: 1.401 | Acc: 50.186 25093/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 4 Loss: 1.312 | Acc: 53.460 5346/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 5 Loss: 1.352 | Acc: 51.824 25912/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 5 Loss: 1.323 | Acc: 53.580 5358/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 6 Loss: 1.307 | Acc: 53.798 26899/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 6 Loss: 1.245 | Acc: 55.990 5599/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 7 Loss: 1.263 | Acc: 55.254 27627/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 7 Loss: 1.196 | Acc: 57.740 5774/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 8 Loss: 1.232 | Acc: 56.410 28205/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 8 Loss: 1.213 | Acc: 57.610 5761/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 9 Loss: 1.203 | Acc: 57.306 28653/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 9 Loss: 1.140 | Acc: 59.870 5987/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 10 Loss: 1.174 | Acc: 58.298 29149/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 10 Loss: 1.128 | Acc: 60.210 6021/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 11 Loss: 1.155 | Acc: 58.888 29444/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 11 Loss: 1.131 | Acc: 60.170 6017/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 12 Loss: 1.129 | Acc: 60.262 30131/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 12 Loss: 1.111 | Acc: 60.570 6057/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 13 Loss: 1.116 | Acc: 60.612 30306/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 13 Loss: 1.127 | Acc: 59.960 5996/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 14 Loss: 1.098 | Acc: 61.274 30637/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 14 Loss: 1.087 | Acc: 61.690 6169/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 15 Loss: 1.081 | Acc: 61.762 30881/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 15 Loss: 1.079 | Acc: 62.470 6247/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 16 Loss: 1.066 | Acc: 62.144 31072/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 16 Loss: 1.063 | Acc: 63.040 6304/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 17 Loss: 1.054 | Acc: 62.704 31352/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 17 Loss: 1.053 | Acc: 62.980 6298/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 18 Loss: 1.038 | Acc: 63.276 31638/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 18 Loss: 1.053 | Acc: 63.240 6324/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 19 Loss: 1.028 | Acc: 63.916 31958/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 19 Loss: 1.051 | Acc: 63.690 6369/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 20 Loss: 1.010 | Acc: 64.294 32147/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 20 Loss: 1.039 | Acc: 63.520 6352/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 21 Loss: 0.998 | Acc: 64.738 32369/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 21 Loss: 1.025 | Acc: 63.960 6396/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 22 Loss: 0.984 | Acc: 65.276 32638/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 22 Loss: 1.053 | Acc: 63.690 6369/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 23 Loss: 0.972 | Acc: 65.834 32917/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 23 Loss: 1.038 | Acc: 63.730 6373/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 24 Loss: 0.962 | Acc: 65.994 32997/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 24 Loss: 1.049 | Acc: 63.920 6392/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 25 Loss: 0.948 | Acc: 66.578 33289/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 25 Loss: 1.032 | Acc: 64.410 6441/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 26 Loss: 0.939 | Acc: 66.720 33360/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.61it/s]


[Test] 26 Loss: 1.052 | Acc: 63.960 6396/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 27 Loss: 0.929 | Acc: 67.260 33630/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 27 Loss: 1.037 | Acc: 64.450 6445/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 28 Loss: 0.915 | Acc: 67.838 33919/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 28 Loss: 1.067 | Acc: 63.540 6354/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 29 Loss: 0.904 | Acc: 68.030 34015/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 29 Loss: 1.030 | Acc: 64.470 6447/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 30 Loss: 0.889 | Acc: 68.530 34265/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 30 Loss: 1.035 | Acc: 64.840 6484/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 31 Loss: 0.879 | Acc: 69.010 34505/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 31 Loss: 1.044 | Acc: 64.390 6439/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 32 Loss: 0.870 | Acc: 69.164 34582/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.59it/s]


[Test] 32 Loss: 1.024 | Acc: 64.900 6490/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 33 Loss: 0.857 | Acc: 69.838 34919/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 33 Loss: 1.028 | Acc: 64.730 6473/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 34 Loss: 0.846 | Acc: 70.156 35078/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 34 Loss: 1.068 | Acc: 64.390 6439/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 35 Loss: 0.837 | Acc: 70.298 35149/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 35 Loss: 1.078 | Acc: 65.100 6510/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 36 Loss: 0.826 | Acc: 70.826 35413/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 36 Loss: 1.054 | Acc: 64.470 6447/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 37 Loss: 0.815 | Acc: 71.256 35628/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 37 Loss: 1.049 | Acc: 65.090 6509/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 38 Loss: 0.802 | Acc: 71.540 35770/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 38 Loss: 1.053 | Acc: 64.710 6471/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 39 Loss: 0.792 | Acc: 72.078 36039/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 39 Loss: 1.074 | Acc: 65.180 6518/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 40 Loss: 0.780 | Acc: 72.444 36222/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 40 Loss: 1.045 | Acc: 65.300 6530/10000
Saving..


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 41 Loss: 0.769 | Acc: 72.850 36425/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 41 Loss: 1.061 | Acc: 65.190 6519/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 42 Loss: 0.760 | Acc: 73.270 36635/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 42 Loss: 1.080 | Acc: 65.020 6502/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 43 Loss: 0.749 | Acc: 73.434 36717/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.61it/s]


[Test] 43 Loss: 1.100 | Acc: 64.630 6463/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:13<00:00,  2.55it/s]


[Train] 44 Loss: 0.738 | Acc: 73.818 36909/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 44 Loss: 1.100 | Acc: 64.530 6453/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 46 Loss: 0.719 | Acc: 74.514 37257/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 46 Loss: 1.118 | Acc: 64.690 6469/10000


 77%|█████████████████████████████████████▌           | 1199/1563 [07:49<02:22,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 54 Loss: 0.624 | Acc: 77.740 38870/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 54 Loss: 1.207 | Acc: 64.000 6400/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 55 Loss: 0.612 | Acc: 78.418 39209/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 55 Loss: 1.229 | Acc: 64.330 6433/10000


  3%|█▌                                                 | 47/1563 [00:18<09:54,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 63 Loss: 0.527 | Acc: 81.256 40628/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 63 Loss: 1.373 | Acc: 64.010 6401/10000


 94%|█████████████████████████████████████████████▉   | 1466/1563 [09:34<00:38,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 71 Loss: 0.452 | Acc: 83.716 41858/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 71 Loss: 1.625 | Acc: 63.180 6318/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 72 Loss: 0.442 | Acc: 84.266 42133/50000


100%|███████████████████████████████████████████████████| 313/313 [00:40<00:00,  7.63it/s]


[Test] 72 Loss: 1.638 | Acc: 63.140 6314/10000


 20%|█████████▊                                        | 308/1563 [02:00<08:14,  2.54it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 80 Loss: 0.370 | Acc: 86.950 43475/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 80 Loss: 1.851 | Acc: 62.060 6206/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 81 Loss: 0.366 | Acc: 86.984 43492/50000


 54%|███████████████████████████▌                       | 169/313 [00:22<00:18,  7.65it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 88 Loss: 0.309 | Acc: 88.988 44494/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 88 Loss: 2.060 | Acc: 62.070 6207/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.55it/s]


[Train] 89 Loss: 0.309 | Acc: 89.048 44524/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 89 Loss: 2.161 | Acc: 61.820 6182/10000


 45%|██████████████████████▎                           | 699/1563 [04:33<05:37,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|███████████████████████████████████████████████████| 313/313 [00:40<00:00,  7.63it/s]


[Test] 96 Loss: 2.393 | Acc: 61.740 6174/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 97 Loss: 0.252 | Acc: 90.960 45480/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 97 Loss: 2.457 | Acc: 61.820 6182/10000


 60%|██████████████████████████████▏                   | 942/1563 [06:09<04:02,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.56it/s]


[Train] 105 Loss: 0.212 | Acc: 92.426 46213/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 105 Loss: 2.664 | Acc: 61.260 6126/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.55it/s]


[Train] 106 Loss: 0.208 | Acc: 92.680 46340/50000


100%|███████████████████████████████████████████████████| 313/313 [00:40<00:00,  7.64it/s]


[Test] 106 Loss: 2.728 | Acc: 61.590 6159/10000


  4%|██                                                 | 62/1563 [00:24<09:46,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 112 Loss: 0.181 | Acc: 93.600 46800/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 112 Loss: 2.994 | Acc: 61.510 6151/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 113 Loss: 0.177 | Acc: 93.764 46882/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 113 Loss: 3.079 | Acc: 61.640 6164/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 114 Loss: 0.172 | Acc: 93.962 46981/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 114 Loss: 3.159 | Acc: 61.540 6154/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 115 Loss: 0.174 | Acc: 93.938 46969/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 115 Loss: 2.987 | Acc: 61.600 6160/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 116 Loss: 0.165 | Acc: 94.180 47090/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 116 Loss: 3.246 | Acc: 61.270 6127/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 117 Loss: 0.161 | Acc: 94.330 47165/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 117 Loss: 3.154 | Acc: 61.170 6117/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 118 Loss: 0.160 | Acc: 94.284 47142/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 118 Loss: 3.139 | Acc: 61.520 6152/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 119 Loss: 0.156 | Acc: 94.598 47299/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 119 Loss: 3.243 | Acc: 61.520 6152/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 120 Loss: 0.154 | Acc: 94.574 47287/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 120 Loss: 3.258 | Acc: 61.040 6104/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 121 Loss: 0.152 | Acc: 94.654 47327/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 121 Loss: 3.322 | Acc: 61.140 6114/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 122 Loss: 0.145 | Acc: 95.016 47508/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 122 Loss: 3.400 | Acc: 61.560 6156/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 123 Loss: 0.140 | Acc: 95.018 47509/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 123 Loss: 3.411 | Acc: 61.330 6133/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 124 Loss: 0.137 | Acc: 95.222 47611/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 124 Loss: 3.397 | Acc: 61.580 6158/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 125 Loss: 0.134 | Acc: 95.260 47630/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 125 Loss: 3.471 | Acc: 61.130 6113/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 126 Loss: 0.130 | Acc: 95.416 47708/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 126 Loss: 3.461 | Acc: 60.930 6093/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 127 Loss: 0.133 | Acc: 95.442 47721/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 127 Loss: 3.549 | Acc: 61.120 6112/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 128 Loss: 0.123 | Acc: 95.738 47869/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 128 Loss: 3.631 | Acc: 61.130 6113/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 129 Loss: 0.123 | Acc: 95.618 47809/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 129 Loss: 3.591 | Acc: 61.150 6115/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 130 Loss: 0.123 | Acc: 95.670 47835/50000


 45%|██████████████████████▉                            | 141/313 [00:18<00:22,  7.63it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 138 Loss: 0.100 | Acc: 96.472 48236/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 138 Loss: 3.989 | Acc: 61.140 6114/10000


 89%|███████████████████████████████████████████▍     | 1385/1563 [09:02<01:09,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 145 Loss: 4.155 | Acc: 60.950 6095/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 146 Loss: 0.084 | Acc: 97.214 48607/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 146 Loss: 4.340 | Acc: 61.030 6103/10000


 56%|████████████████████████████▏                     | 881/1563 [05:45<04:28,  2.54it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 154 Loss: 0.071 | Acc: 97.560 48780/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 154 Loss: 4.636 | Acc: 61.020 6102/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 155 Loss: 0.072 | Acc: 97.494 48747/50000


100%|███████████████████████████████████████████████████| 313/313 [00:40<00:00,  7.64it/s]


[Test] 155 Loss: 4.727 | Acc: 60.920 6092/10000


 33%|████████████████▌                                 | 519/1563 [03:23<06:48,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 163 Loss: 0.061 | Acc: 97.946 48973/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 163 Loss: 4.983 | Acc: 61.140 6114/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 164 Loss: 0.058 | Acc: 98.078 49039/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.62it/s]


[Test] 164 Loss: 4.957 | Acc: 61.040 6104/10000


 17%|████████▎                                         | 259/1563 [01:41<08:30,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 171 Loss: 0.054 | Acc: 98.250 49125/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 171 Loss: 5.162 | Acc: 61.160 6116/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 172 Loss: 0.055 | Acc: 98.192 49096/50000


  8%|███▉                                                | 24/313 [00:03<00:37,  7.64it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

 88%|███████████████████████████████████████████▏     | 1377/1563 [08:59<01:12,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.55it/s]


[Train] 180 Loss: 0.046 | Acc: 98.490 49245/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 180 Loss: 5.379 | Acc: 61.120 6112/10000


 55%|███████████████████████████▍                      | 858/1563 [05:36<04:36,  2.55it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 181 Loss: 5.433 | Acc: 60.940 6094/10000


 28%|█████████████▉                                    | 435/1563 [02:50<07:21,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 188 Loss: 0.045 | Acc: 98.514 49257/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 188 Loss: 5.491 | Acc: 60.870 6087/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 189 Loss: 0.044 | Acc: 98.554 49277/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 189 Loss: 5.511 | Acc: 60.870 6087/10000


  2%|█▏                                                 | 35/1563 [00:13<09:57,  2.56it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 190 Loss: 0.044 | Acc: 98.558 49279/50000


  3%|█▋                                                  | 10/313 [00:01<00:40,  7.52it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100%|█████████████████████████████████████████████████| 1563/1563 [10:12<00:00,  2.55it/s]


[Train] 197 Loss: 0.042 | Acc: 98.648 49324/50000


100%|███████████████████████████████████████████████████| 313/313 [00:41<00:00,  7.63it/s]


[Test] 197 Loss: 5.532 | Acc: 60.860 6086/10000


100%|█████████████████████████████████████████████████| 1563/1563 [10:11<00:00,  2.55it/s]


[Train] 198 Loss: 0.044 | Acc: 98.574 49287/50000


 45%|██████████████████████▉                            | 141/313 [00:18<00:22,  7.63it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
# ### Automate the benchmark
# ###### for c100

# not_working = [
#     (1, False, False, 4),
#     (1, False, False, 8),
#     (1, False, False, 12),
#     (1, False, True, 4),
#     (1, False, True, 8),
#     (1, False, True, 12),
# ]

# cuda_idx = 0
# dataset = 'cifar100'
# # for seed in [147, 258, 369]:
# for seed in [147]:
#     for patch_size in [1, 2, 4, 8]:
#         for sparse_attention in [False, True]:
#             for sparse_mlp in [False, True]:
#                 for nlayers in [4, 8, 12]:

#                     print(f'''
#                         Experimenting on {dataset} Dataset 
#                         patch:{patch_size},
#                         sparse_att: {sparse_attention},
#                         sparse_mlp: {sparse_mlp},
#                         num_layers : {nlayers},
#                         seed: {seed}
#                     ''')
            
#                 ### check if config is in not_working case
#                     exit = False
#                     for nw in not_working:
#                         if patch_size==nw[0] and \
#                             sparse_attention==nw[1] and \
#                             sparse_mlp==nw[2] and\
#                             nlayers==nw[3]:
                            
#                             exit=True
#                             break
#                     if exit:
#                         print(f'Exiting as the config is in NOT WORKING')
#                         continue


#                     benchmark(dataset=dataset, 
#                               patch_size=patch_size, 
#                               num_layers=nlayers, 
#                               SEED=seed, 
#                               sparse_att=sparse_attention, sparse_mlp=sparse_mlp, 
#                               cuda=cuda_idx
#                              )

In [None]:
## c10 works on all patch sizes on 3090 using 64 batch size
## c100 doesn't work on patch 1 for dense 

00.0_Transformer_replicate_and_SparseMixing_development_v0.ipynb
01.0_Vision_Transformers_ViT_experiment_CIFAR10_v0.ipynb
01.1_Vision_Transformers_mixer_ViT_experiment_CIFAR10_v0.ipynb
01.2_ViT_sparse_mixer_development_tinyImagenet_v0.ipynb
01.3_ViT_sparse_butterfly_tinyImagenet_cifar_benchmark_v0.ipynb
01.3_ViT_sparse_butterfly_tinyImagenet_cifar_benchmark_v1.ipynb
01.3_ViT_sparse_butterfly_tinyImagenet_cifar_benchmark_v2.ipynb
01.4_ViT_sparse_butterfly_tinyImagenet_cifar_benchmark2_v0.ipynb
models
output
__pycache__
tiny_imagenet.py
transformers_lib2_dev.py
transformers_lib3_dev.py
transformers_lib.py
