In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
import glob
from PIL import Image
from sklearn.model_selection import train_test_split
import torchvision
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
from tensorboardX import SummaryWriter
from torch.optim.lr_scheduler import LambdaLR, StepLR

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
class Config:

    batch_size = 64
    epochs = 300
    lr = 1e-3
    
    channel = 3

    height = 32
    width = 32

    data_root = '../dataset/cifar10'

    dropout_rate = 0.1
    attn_dropout = 0

    patch_size = 4
    num_patches = int((height * width) / (patch_size ** 2))

    layers = 12
#     embedding_d = 768
    embedding_d = 512
    mlp_size = 1024
    heads = 8
    
    dimhead = 512

    num_classes = 10
    
    log_f = 100

In [4]:
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
])

In [5]:
trainset = torchvision.datasets.CIFAR10(root=Config.data_root, train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=Config.batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root=Config.data_root, train=False,
                                       download=True, transform=transforms.ToTensor())
testloader = torch.utils.data.DataLoader(testset, batch_size=Config.batch_size,
                                         shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
class Model(nn.Module):
    
    def __init__(self):
        super(Model, self).__init__()
        
        self.num_patches = (Config.height * Config.width) // Config.patch_size ** 2
        self.patch_d = Config.channel * Config.patch_size * Config.patch_size
        
        self.patch_embedding = PatchEmbedding()
        self.class_token = nn.Parameter(torch.randn(1, 1, Config.embedding_d), requires_grad=True)
        self.position_embedding = nn.Parameter(torch.randn(1, Config.num_patches + 1, Config.embedding_d), requires_grad=True)
        self.cls_token = nn.Parameter(torch.randn(1, 1, Config.dimhead))
        self.dropout = nn.Dropout(Config.dropout_rate)
        
        self.transformer = Transformer()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(Config.dimhead),
            nn.Linear(Config.dimhead, Config.num_classes))
        
        
    def forward(self, x):
        x = self.patch_embedding(x)
        class_token = self.class_token.expand(Config.batch_size, -1, -1)
        x = torch.cat((class_token, x), dim=1)
        x = self.position_embedding + x
        x = self.dropout(x)
        
        x = self.transformer(x)
        
        x = x[:, 0]

        x = self.mlp_head(x)
        
        return x
    
    
class Transformer(nn.Module):
    def __init__(self, dim=Config.dimhead, mlp_dim=512):
        super().__init__()
        
        self.layers = nn.ModuleList([])
        for _ in range(Config.layers):
            self.layers.append(nn.ModuleList([MABlock(), FeedForward()]))
                               
    def forward(self, x):
        
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
            
        return x
    
class MABlock(nn.Module):

    def __init__(self):
        super(MABlock, self).__init__()

        self.layer_norm = nn.LayerNorm(Config.embedding_d)
        self.multihead_attention = nn.MultiheadAttention(Config.embedding_d, Config.heads, Config.attn_dropout, batch_first=True)

    def forward(self, x):
        x = self.layer_norm(x)
        x, _ = self.multihead_attention(x, x, x, need_weights=False)

        return x
        
        
class PatchEmbedding(nn.Module):

    def __init__(self):
        super(PatchEmbedding, self).__init__()

        self.conv = nn.Conv2d(in_channels=Config.channel,
                              out_channels=Config.embedding_d,
                              kernel_size=Config.patch_size,
                              stride=Config.patch_size)

        self.flatten = nn.Flatten(start_dim=2, end_dim=3)

    def forward(self, x):
        x = self.conv(x)  # ([b, 768, 4, 4])
        x = self.flatten(x)  # ([b, 768, 16])

        return x.permute(0, 2, 1)
        
        
class FeedForward(nn.Module):
    
    def __init__(self):
        super(FeedForward, self).__init__()

        self.layer_norm = nn.LayerNorm(Config.embedding_d)
        self.mlp = nn.Sequential(
            nn.Linear(Config.embedding_d, Config.mlp_size),
            nn.GELU(),
            nn.Dropout(Config.dropout_rate),
            nn.Linear(Config.mlp_size, Config.embedding_d),
            nn.Dropout(Config.dropout_rate)
        )

    def forward(self, x):
        x = self.layer_norm(x)
        x = self.mlp(x)

        return x

In [8]:
model = Model().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=Config.lr)
criterion = nn.CrossEntropyLoss()

In [9]:
scheduler = StepLR(optimizer, 5)

In [10]:
warmup_scheduler = LambdaLR(optimizer, lr_lambda=lambda step: min(1.0, step / 10000))
step_scheduler = StepLR(optimizer, step_size=5, gamma=.9)

In [11]:
writer = SummaryWriter()

In [12]:
def test(epoch):

    model.eval()
    
    test_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        
        for batch_idx, (inputs, targets) in enumerate(tqdm(testloader)):
            
            if Config.batch_size != len(inputs):
                inputs = inputs.repeat(Config.batch_size // inputs.size(0) + 1, 1, 1, 1)[:Config.batch_size]
                targets = targets.repeat(Config.batch_size // targets.size(0) + 1)[:Config.batch_size]
            
            inputs, targets = inputs.to(device), targets.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    acc = 100. * correct / total

    print(f'Epoch {epoch} val loss: {test_loss:.5f}, test acc: {(acc):.5f}')

    return test_loss, acc

In [13]:
def train(epoch):
    
    model.train()
    
    train_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(tqdm(trainloader)):
        
        if Config.batch_size != len(inputs):
            inputs = inputs.repeat(Config.batch_size // inputs.size(0) + 1, 1, 1, 1)[:Config.batch_size]
            targets = targets.repeat(Config.batch_size // targets.size(0) + 1)[:Config.batch_size]
        
        inputs, targets = inputs.to(device), targets.to(device)
        
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        
        acc = 100.*correct/total
        
        if batch_idx % Config.log_f == 0:
            print(f'Epoch {epoch}, batch index {batch_idx} || train loss: {train_loss/(batch_idx+1)}, train acc: {acc}')

    return train_loss/(batch_idx+1), acc

In [None]:
for epoch in range(Config.epochs):

    train_loss, train_acc = train(epoch)
    test_loss, test_acc = test(epoch)
    
    scheduler.step()

    writer.add_scalar('train/loss', train_loss, epoch)
    writer.add_scalar('train/acc', train_acc, epoch)
    writer.add_scalar('test/loss', test_loss, epoch)
    writer.add_scalar('test/acc', test_acc, epoch)

  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 0, batch index 0 || train loss: 2.5789201259613037, train acc: 7.8125
Epoch 0, batch index 100 || train loss: 2.471000834266738, train acc: 10.055693069306932
Epoch 0, batch index 200 || train loss: 2.4677635686314523, train acc: 10.051305970149254
Epoch 0, batch index 300 || train loss: 2.464347756186197, train acc: 10.101744186046512
Epoch 0, batch index 400 || train loss: 2.464761009240091, train acc: 9.955579800498754
Epoch 0, batch index 500 || train loss: 2.4635067419139687, train acc: 10.04553393213573
Epoch 0, batch index 600 || train loss: 2.462752358092246, train acc: 10.004159733777039
Epoch 0, batch index 700 || train loss: 2.4627684104799714, train acc: 10.065977175463624


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 0 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 1, batch index 0 || train loss: 2.443060874938965, train acc: 17.1875
Epoch 1, batch index 100 || train loss: 2.461934538170843, train acc: 10.040222772277227
Epoch 1, batch index 200 || train loss: 2.460662644893969, train acc: 9.934701492537313
Epoch 1, batch index 300 || train loss: 2.4621958312798182, train acc: 10.039451827242525
Epoch 1, batch index 400 || train loss: 2.4604383418684885, train acc: 10.045199501246882
Epoch 1, batch index 500 || train loss: 2.4623470967877172, train acc: 10.017465069860279
Epoch 1, batch index 600 || train loss: 2.4624151901873494, train acc: 10.030158069883527
Epoch 1, batch index 700 || train loss: 2.4613290499688554, train acc: 10.061519258202567


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 1 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 2, batch index 0 || train loss: 2.502495527267456, train acc: 3.125
Epoch 2, batch index 100 || train loss: 2.464301571987643, train acc: 9.870049504950495
Epoch 2, batch index 200 || train loss: 2.4616386463393027, train acc: 10.066853233830846
Epoch 2, batch index 300 || train loss: 2.4630681082259778, train acc: 10.122508305647841
Epoch 2, batch index 400 || train loss: 2.461320012584886, train acc: 10.173784289276808
Epoch 2, batch index 500 || train loss: 2.4620709585810374, train acc: 9.98315868263473
Epoch 2, batch index 600 || train loss: 2.4615058200728277, train acc: 10.04575707154742
Epoch 2, batch index 700 || train loss: 2.4644534683091495, train acc: 9.974589871611983


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 2 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 3, batch index 0 || train loss: 2.5045053958892822, train acc: 12.5
Epoch 3, batch index 100 || train loss: 2.461139737969578, train acc: 10.071163366336634
Epoch 3, batch index 200 || train loss: 2.4662613109569644, train acc: 9.872512437810945
Epoch 3, batch index 300 || train loss: 2.4664306640625, train acc: 9.8421926910299
Epoch 3, batch index 400 || train loss: 2.463029032633489, train acc: 9.920511221945137
Epoch 3, batch index 500 || train loss: 2.4656598848735025, train acc: 9.842814371257486
Epoch 3, batch index 600 || train loss: 2.463915765781371, train acc: 9.889767054908486
Epoch 3, batch index 700 || train loss: 2.4637389475540834, train acc: 9.901034236804565


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 3 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 4, batch index 0 || train loss: 2.4989843368530273, train acc: 10.9375
Epoch 4, batch index 100 || train loss: 2.465723450821225, train acc: 9.962871287128714
Epoch 4, batch index 200 || train loss: 2.468480279789635, train acc: 9.864738805970148
Epoch 4, batch index 300 || train loss: 2.466855088737716, train acc: 9.852574750830565
Epoch 4, batch index 400 || train loss: 2.4622817889710613, train acc: 9.963372817955113
Epoch 4, batch index 500 || train loss: 2.461453351669921, train acc: 10.017465069860279
Epoch 4, batch index 600 || train loss: 2.461219683264734, train acc: 10.030158069883527
Epoch 4, batch index 700 || train loss: 2.460922086459934, train acc: 10.050374465049929


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 4 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 5, batch index 0 || train loss: 2.467496156692505, train acc: 12.5
Epoch 5, batch index 100 || train loss: 2.4640786081257433, train acc: 10.009282178217822
Epoch 5, batch index 200 || train loss: 2.469671646753947, train acc: 9.888059701492537
Epoch 5, batch index 300 || train loss: 2.468252645378493, train acc: 9.847383720930232
Epoch 5, batch index 400 || train loss: 2.4636138805427454, train acc: 9.998441396508728
Epoch 5, batch index 500 || train loss: 2.46245513657134, train acc: 10.054890219560878
Epoch 5, batch index 600 || train loss: 2.464117944736449, train acc: 10.019758735440933
Epoch 5, batch index 700 || train loss: 2.4632819899478755, train acc: 9.983505706134094


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 5 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 6, batch index 0 || train loss: 2.4794998168945312, train acc: 14.0625
Epoch 6, batch index 100 || train loss: 2.459561725654224, train acc: 9.854579207920793
Epoch 6, batch index 200 || train loss: 2.465007319379209, train acc: 9.701492537313433
Epoch 6, batch index 300 || train loss: 2.4591628261578835, train acc: 9.940822259136212
Epoch 6, batch index 400 || train loss: 2.4615324530518263, train acc: 10.045199501246882
Epoch 6, batch index 500 || train loss: 2.4625205451143004, train acc: 9.914545908183634
Epoch 6, batch index 600 || train loss: 2.461139289789311, train acc: 9.985960898502496
Epoch 6, batch index 700 || train loss: 2.460537140107529, train acc: 9.983505706134094


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 6 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 7, batch index 0 || train loss: 2.5358774662017822, train acc: 6.25
Epoch 7, batch index 100 || train loss: 2.469434905760359, train acc: 9.761757425742575
Epoch 7, batch index 200 || train loss: 2.4674718249496537, train acc: 9.810323383084578
Epoch 7, batch index 300 || train loss: 2.465337636858918, train acc: 9.914867109634551
Epoch 7, batch index 400 || train loss: 2.464899617240316, train acc: 9.877649625935161
Epoch 7, batch index 500 || train loss: 2.464105183016992, train acc: 9.955089820359282
Epoch 7, batch index 600 || train loss: 2.4643315546921207, train acc: 9.926164725457571
Epoch 7, batch index 700 || train loss: 2.464109370099665, train acc: 9.903263195435093


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 7 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 8, batch index 0 || train loss: 2.3693196773529053, train acc: 6.25
Epoch 8, batch index 100 || train loss: 2.4506696403616726, train acc: 10.442450495049505
Epoch 8, batch index 200 || train loss: 2.457531325259612, train acc: 9.91915422885572
Epoch 8, batch index 300 || train loss: 2.4612124988010953, train acc: 9.93563122923588
Epoch 8, batch index 400 || train loss: 2.462672555238529, train acc: 9.823098503740649
Epoch 8, batch index 500 || train loss: 2.4651040017247916, train acc: 9.808507984031936
Epoch 8, batch index 600 || train loss: 2.4630485874245847, train acc: 9.894966722129784
Epoch 8, batch index 700 || train loss: 2.4628651652288505, train acc: 9.927781740370898


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 8 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 9, batch index 0 || train loss: 2.464365005493164, train acc: 9.375
Epoch 9, batch index 100 || train loss: 2.469321345338727, train acc: 9.699876237623762
Epoch 9, batch index 200 || train loss: 2.4622108390675255, train acc: 9.996890547263682
Epoch 9, batch index 300 || train loss: 2.4649442731344027, train acc: 9.940822259136212
Epoch 9, batch index 400 || train loss: 2.4657941673164654, train acc: 9.89323566084788
Epoch 9, batch index 500 || train loss: 2.4648703591314383, train acc: 9.98939620758483
Epoch 9, batch index 600 || train loss: 2.46284268501396, train acc: 10.035357737104825
Epoch 9, batch index 700 || train loss: 2.4630730352796264, train acc: 10.063748216833096


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 9 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 10, batch index 0 || train loss: 2.5350489616394043, train acc: 0.0
Epoch 10, batch index 100 || train loss: 2.46423614851319, train acc: 9.79269801980198
Epoch 10, batch index 200 || train loss: 2.465148495204413, train acc: 9.701492537313433
Epoch 10, batch index 300 || train loss: 2.46451450819985, train acc: 9.743563122923588
Epoch 10, batch index 400 || train loss: 2.4611357643717247, train acc: 9.959476309226932
Epoch 10, batch index 500 || train loss: 2.4627376602081483, train acc: 9.97068363273453
Epoch 10, batch index 600 || train loss: 2.4627226942192495, train acc: 10.022358569051582
Epoch 10, batch index 700 || train loss: 2.462322543248981, train acc: 10.021398002853067


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 10 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 11, batch index 0 || train loss: 2.5294666290283203, train acc: 4.6875
Epoch 11, batch index 100 || train loss: 2.4688670918492988, train acc: 9.545173267326733
Epoch 11, batch index 200 || train loss: 2.4597835813588764, train acc: 9.849191542288557
Epoch 11, batch index 300 || train loss: 2.4580785634113704, train acc: 9.883720930232558
Epoch 11, batch index 400 || train loss: 2.4608237125035237, train acc: 9.947786783042394
Epoch 11, batch index 500 || train loss: 2.460254979942611, train acc: 9.96444610778443
Epoch 11, batch index 600 || train loss: 2.4604684088670474, train acc: 9.972961730449251
Epoch 11, batch index 700 || train loss: 2.4614338820398958, train acc: 10.037000713266762


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 11 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 12, batch index 0 || train loss: 2.3925740718841553, train acc: 14.0625
Epoch 12, batch index 100 || train loss: 2.4612696737346083, train acc: 9.715346534653465
Epoch 12, batch index 200 || train loss: 2.4621582292205657, train acc: 10.004664179104477
Epoch 12, batch index 300 || train loss: 2.4626352850384885, train acc: 9.987541528239202
Epoch 12, batch index 400 || train loss: 2.463760328411759, train acc: 9.854270573566085
Epoch 12, batch index 500 || train loss: 2.463224722715671, train acc: 9.889595808383234
Epoch 12, batch index 600 || train loss: 2.4638103899265484, train acc: 9.824771214642263
Epoch 12, batch index 700 || train loss: 2.4633528006059806, train acc: 9.849768188302425


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 12 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 13, batch index 0 || train loss: 2.570009708404541, train acc: 6.25
Epoch 13, batch index 100 || train loss: 2.4448027469144007, train acc: 10.426980198019802
Epoch 13, batch index 200 || train loss: 2.4612766533941772, train acc: 9.872512437810945
Epoch 13, batch index 300 || train loss: 2.4612305671273673, train acc: 10.101744186046512
Epoch 13, batch index 400 || train loss: 2.462889859206659, train acc: 9.881546134663342
Epoch 13, batch index 500 || train loss: 2.4627530465344947, train acc: 9.858408183632735
Epoch 13, batch index 600 || train loss: 2.4637296108556863, train acc: 9.874168053244592
Epoch 13, batch index 700 || train loss: 2.46285414899807, train acc: 9.878744650499288


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 13 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 14, batch index 0 || train loss: 2.3583059310913086, train acc: 10.9375
Epoch 14, batch index 100 || train loss: 2.4757670270334375, train acc: 9.545173267326733
Epoch 14, batch index 200 || train loss: 2.4672553432521536, train acc: 9.958022388059701
Epoch 14, batch index 300 || train loss: 2.4668588614543014, train acc: 10.003114617940199
Epoch 14, batch index 400 || train loss: 2.462050349337799, train acc: 10.165991271820449
Epoch 14, batch index 500 || train loss: 2.464051507904144, train acc: 9.98939620758483
Epoch 14, batch index 600 || train loss: 2.4635645407011824, train acc: 9.996360232945092
Epoch 14, batch index 700 || train loss: 2.4638746621435277, train acc: 9.965674037089872


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 14 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 15, batch index 0 || train loss: 2.423712968826294, train acc: 12.5
Epoch 15, batch index 100 || train loss: 2.460726452345895, train acc: 9.622524752475247
Epoch 15, batch index 200 || train loss: 2.4563278463942493, train acc: 10.230099502487562
Epoch 15, batch index 300 || train loss: 2.4575716878884655, train acc: 10.236710963455149
Epoch 15, batch index 400 || train loss: 2.458954193348302, train acc: 10.130922693266832
Epoch 15, batch index 500 || train loss: 2.459111207973457, train acc: 10.107909181636726
Epoch 15, batch index 600 || train loss: 2.4593801748336057, train acc: 10.076955074875208
Epoch 15, batch index 700 || train loss: 2.462527089384245, train acc: 9.983505706134094


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 15 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 16, batch index 0 || train loss: 2.525423526763916, train acc: 6.25
Epoch 16, batch index 100 || train loss: 2.4636208019634283, train acc: 10.117574257425742
Epoch 16, batch index 200 || train loss: 2.468550610898146, train acc: 9.64707711442786
Epoch 16, batch index 300 || train loss: 2.465368204338606, train acc: 9.966777408637874
Epoch 16, batch index 400 || train loss: 2.4654967826500793, train acc: 9.98285536159601
Epoch 16, batch index 500 || train loss: 2.4658279999525483, train acc: 10.011227544910179
Epoch 16, batch index 600 || train loss: 2.464826421610726, train acc: 10.019758735440933
Epoch 16, batch index 700 || train loss: 2.465047689715398, train acc: 9.943384450784594


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 16 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 17, batch index 0 || train loss: 2.4071974754333496, train acc: 14.0625
Epoch 17, batch index 100 || train loss: 2.4603785972784062, train acc: 10.194925742574258
Epoch 17, batch index 200 || train loss: 2.4600669910658652, train acc: 10.167910447761194
Epoch 17, batch index 300 || train loss: 2.4627181651980377, train acc: 9.88891196013289
Epoch 17, batch index 400 || train loss: 2.4635294268553394, train acc: 9.795822942643392
Epoch 17, batch index 500 || train loss: 2.4625689978609064, train acc: 9.870883233532934
Epoch 17, batch index 600 || train loss: 2.4625734902062946, train acc: 9.93136439267887
Epoch 17, batch index 700 || train loss: 2.463475468495433, train acc: 9.952300285306706


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 17 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 18, batch index 0 || train loss: 2.505690336227417, train acc: 7.8125
Epoch 18, batch index 100 || train loss: 2.4698534271504617, train acc: 9.251237623762377
Epoch 18, batch index 200 || train loss: 2.463659043335796, train acc: 9.70926616915423
Epoch 18, batch index 300 || train loss: 2.4624846472692647, train acc: 9.920058139534884
Epoch 18, batch index 400 || train loss: 2.4607070705242586, train acc: 9.904925187032418
Epoch 18, batch index 500 || train loss: 2.4622367898861093, train acc: 9.942614770459082
Epoch 18, batch index 600 || train loss: 2.4610582877712917, train acc: 10.00155990016639
Epoch 18, batch index 700 || train loss: 2.4602553262860223, train acc: 10.001337375178316


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 18 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 19, batch index 0 || train loss: 2.4955101013183594, train acc: 4.6875
Epoch 19, batch index 100 || train loss: 2.4717922423145557, train acc: 9.26670792079208
Epoch 19, batch index 200 || train loss: 2.4683771643472547, train acc: 9.631529850746269
Epoch 19, batch index 300 || train loss: 2.466227867278546, train acc: 9.691652823920267
Epoch 19, batch index 400 || train loss: 2.463272096510243, train acc: 9.947786783042394
Epoch 19, batch index 500 || train loss: 2.464288879059508, train acc: 9.880239520958083
Epoch 19, batch index 600 || train loss: 2.4644939252818485, train acc: 9.944363560732112
Epoch 19, batch index 700 || train loss: 2.463844018723927, train acc: 9.95675820256776


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 19 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 20, batch index 0 || train loss: 2.462003707885742, train acc: 10.9375
Epoch 20, batch index 100 || train loss: 2.4678476281685406, train acc: 9.68440594059406
Epoch 20, batch index 200 || train loss: 2.46656896463081, train acc: 9.926927860696518
Epoch 20, batch index 300 || train loss: 2.4626571205367283, train acc: 10.029069767441861
Epoch 20, batch index 400 || train loss: 2.462437694506752, train acc: 10.06857855361596
Epoch 20, batch index 500 || train loss: 2.459518137092362, train acc: 10.129740518962075
Epoch 20, batch index 600 || train loss: 2.461116654702312, train acc: 10.07175540765391
Epoch 20, batch index 700 || train loss: 2.461282211091481, train acc: 10.045916547788874


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 20 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 21, batch index 0 || train loss: 2.474792718887329, train acc: 10.9375
Epoch 21, batch index 100 || train loss: 2.4584872179692336, train acc: 10.163985148514852
Epoch 21, batch index 200 || train loss: 2.4590548852189857, train acc: 10.043532338308458
Epoch 21, batch index 300 || train loss: 2.4590828822696724, train acc: 9.992732558139535
Epoch 21, batch index 400 || train loss: 2.4606946769200655, train acc: 10.060785536159601
Epoch 21, batch index 500 || train loss: 2.462055152999665, train acc: 9.945733532934131
Epoch 21, batch index 600 || train loss: 2.463704107605082, train acc: 9.892366888519135
Epoch 21, batch index 700 || train loss: 2.4637023246237284, train acc: 9.887660485021398


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 21 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 22, batch index 0 || train loss: 2.5135014057159424, train acc: 10.9375
Epoch 22, batch index 100 || train loss: 2.4554904144589265, train acc: 10.272277227722773
Epoch 22, batch index 200 || train loss: 2.458245690189191, train acc: 10.082400497512438
Epoch 22, batch index 300 || train loss: 2.459799453665648, train acc: 10.065406976744185
Epoch 22, batch index 400 || train loss: 2.458647006764971, train acc: 10.07637157107232
Epoch 22, batch index 500 || train loss: 2.460202558787759, train acc: 10.048652694610778
Epoch 22, batch index 600 || train loss: 2.4623655055009586, train acc: 10.014559068219635
Epoch 22, batch index 700 || train loss: 2.4628712443924496, train acc: 9.97904778887304


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 22 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 23, batch index 0 || train loss: 2.519338846206665, train acc: 6.25
Epoch 23, batch index 100 || train loss: 2.467277689735488, train acc: 9.529702970297029
Epoch 23, batch index 200 || train loss: 2.463755389351157, train acc: 9.584888059701493
Epoch 23, batch index 300 || train loss: 2.4660971552826636, train acc: 9.660506644518273
Epoch 23, batch index 400 || train loss: 2.463257473305871, train acc: 9.706203241895262
Epoch 23, batch index 500 || train loss: 2.46545174307452, train acc: 9.755489021956087
Epoch 23, batch index 600 || train loss: 2.4645130055121296, train acc: 9.790973377703827
Epoch 23, batch index 700 || train loss: 2.4636896367419974, train acc: 9.883202567760343


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 23 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 24, batch index 0 || train loss: 2.3895347118377686, train acc: 9.375
Epoch 24, batch index 100 || train loss: 2.4487730961034795, train acc: 10.380569306930694
Epoch 24, batch index 200 || train loss: 2.448237853263741, train acc: 10.70429104477612
Epoch 24, batch index 300 || train loss: 2.4521466568854957, train acc: 10.506644518272426
Epoch 24, batch index 400 || train loss: 2.4581654327468683, train acc: 10.282886533665835
Epoch 24, batch index 500 || train loss: 2.4599653308738967, train acc: 10.142215568862275
Epoch 24, batch index 600 || train loss: 2.46220125572852, train acc: 10.053556572379367
Epoch 24, batch index 700 || train loss: 2.463377990328126, train acc: 9.970131954350927


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 24 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 25, batch index 0 || train loss: 2.3970892429351807, train acc: 14.0625
Epoch 25, batch index 100 || train loss: 2.452996218558585, train acc: 10.365099009900991
Epoch 25, batch index 200 || train loss: 2.4636078900958767, train acc: 9.965796019900498
Epoch 25, batch index 300 || train loss: 2.456660513862027, train acc: 10.288621262458472
Epoch 25, batch index 400 || train loss: 2.460367865098682, train acc: 10.220542394014963
Epoch 25, batch index 500 || train loss: 2.461334944247248, train acc: 10.132859281437126
Epoch 25, batch index 600 || train loss: 2.460315930069782, train acc: 10.134151414309484
Epoch 25, batch index 700 || train loss: 2.4613956241906965, train acc: 10.043687589158345


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 25 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 26, batch index 0 || train loss: 2.4028632640838623, train acc: 7.8125
Epoch 26, batch index 100 || train loss: 2.4562442680396654, train acc: 10.442450495049505
Epoch 26, batch index 200 || train loss: 2.455263141375869, train acc: 10.191231343283581
Epoch 26, batch index 300 || train loss: 2.4591204533941324, train acc: 10.148463455149502
Epoch 26, batch index 400 || train loss: 2.4610378082256363, train acc: 10.014027431421447
Epoch 26, batch index 500 || train loss: 2.459304915692754, train acc: 9.914545908183634
Epoch 26, batch index 600 || train loss: 2.4601960511453536, train acc: 9.983361064891847
Epoch 26, batch index 700 || train loss: 2.459789830845875, train acc: 9.976818830242511


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 26 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 27, batch index 0 || train loss: 2.566044330596924, train acc: 7.8125
Epoch 27, batch index 100 || train loss: 2.4610756623862993, train acc: 9.668935643564357
Epoch 27, batch index 200 || train loss: 2.457828053194492, train acc: 9.880286069651742
Epoch 27, batch index 300 || train loss: 2.4599101709765057, train acc: 10.070598006644518
Epoch 27, batch index 400 || train loss: 2.4613717529839114, train acc: 10.091957605985037
Epoch 27, batch index 500 || train loss: 2.460962549179138, train acc: 10.02058383233533
Epoch 27, batch index 600 || train loss: 2.463237759277547, train acc: 9.848169717138104
Epoch 27, batch index 700 || train loss: 2.46229977410462, train acc: 9.930010699001427


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 27 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 28, batch index 0 || train loss: 2.3399112224578857, train acc: 15.625
Epoch 28, batch index 100 || train loss: 2.4600351801013, train acc: 10.179455445544555
Epoch 28, batch index 200 || train loss: 2.4670224355820993, train acc: 9.849191542288557
Epoch 28, batch index 300 || train loss: 2.463906362603273, train acc: 10.055024916943522
Epoch 28, batch index 400 || train loss: 2.460711171204907, train acc: 10.111440149625935
Epoch 28, batch index 500 || train loss: 2.4588494600650077, train acc: 10.076721556886227
Epoch 28, batch index 600 || train loss: 2.4598748497478975, train acc: 9.967762063227953
Epoch 28, batch index 700 || train loss: 2.4603522221814207, train acc: 10.012482168330957


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 28 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 29, batch index 0 || train loss: 2.377598285675049, train acc: 10.9375
Epoch 29, batch index 100 || train loss: 2.459911787864005, train acc: 10.148514851485148
Epoch 29, batch index 200 || train loss: 2.4595933506144814, train acc: 10.315609452736318
Epoch 29, batch index 300 || train loss: 2.457032699521594, train acc: 10.231519933554818
Epoch 29, batch index 400 || train loss: 2.455006427598416, train acc: 10.181577306733168
Epoch 29, batch index 500 || train loss: 2.4591236228714446, train acc: 10.142215568862275
Epoch 29, batch index 600 || train loss: 2.4600117865100675, train acc: 10.050956738768718
Epoch 29, batch index 700 || train loss: 2.4618623487959574, train acc: 9.990192582025678


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 29 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 30, batch index 0 || train loss: 2.5476608276367188, train acc: 10.9375
Epoch 30, batch index 100 || train loss: 2.4621759145566733, train acc: 10.535272277227723
Epoch 30, batch index 200 || train loss: 2.4621684005604454, train acc: 10.346703980099502
Epoch 30, batch index 300 || train loss: 2.4668446664398296, train acc: 10.086171096345515
Epoch 30, batch index 400 || train loss: 2.4636376605664108, train acc: 10.193266832917706
Epoch 30, batch index 500 || train loss: 2.460902798913434, train acc: 10.288797405189621
Epoch 30, batch index 600 || train loss: 2.462560085607646, train acc: 10.087354409317804
Epoch 30, batch index 700 || train loss: 2.463242023035395, train acc: 10.065977175463624


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 30 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 31, batch index 0 || train loss: 2.645651340484619, train acc: 6.25
Epoch 31, batch index 100 || train loss: 2.4675155512177116, train acc: 9.514232673267326
Epoch 31, batch index 200 || train loss: 2.465458370559844, train acc: 9.872512437810945
Epoch 31, batch index 300 || train loss: 2.462912306833109, train acc: 9.930440199335548
Epoch 31, batch index 400 || train loss: 2.461357908653202, train acc: 9.967269326683292
Epoch 31, batch index 500 || train loss: 2.461686384654093, train acc: 10.092315369261478
Epoch 31, batch index 600 || train loss: 2.462034920487745, train acc: 10.027558236272878
Epoch 31, batch index 700 || train loss: 2.4621437374773447, train acc: 10.043687589158345


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 31 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 32, batch index 0 || train loss: 2.481961965560913, train acc: 9.375
Epoch 32, batch index 100 || train loss: 2.460397123110176, train acc: 10.334158415841584
Epoch 32, batch index 200 || train loss: 2.4598899540023424, train acc: 9.872512437810945
Epoch 32, batch index 300 || train loss: 2.4637306180111196, train acc: 9.857765780730897
Epoch 32, batch index 400 || train loss: 2.4618603957264202, train acc: 9.955579800498754
Epoch 32, batch index 500 || train loss: 2.463042699886177, train acc: 9.908308383233534
Epoch 32, batch index 600 || train loss: 2.4634037085261795, train acc: 9.889767054908486
Epoch 32, batch index 700 || train loss: 2.4613282643099144, train acc: 9.974589871611983


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 32 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 33, batch index 0 || train loss: 2.3352675437927246, train acc: 17.1875
Epoch 33, batch index 100 || train loss: 2.4664978792171666, train acc: 10.411509900990099
Epoch 33, batch index 200 || train loss: 2.462170822703423, train acc: 10.199004975124378
Epoch 33, batch index 300 || train loss: 2.4623760115664663, train acc: 10.049833887043189
Epoch 33, batch index 400 || train loss: 2.465521139396991, train acc: 10.037406483790523
Epoch 33, batch index 500 || train loss: 2.4645508396887257, train acc: 10.04553393213573
Epoch 33, batch index 600 || train loss: 2.464323039856211, train acc: 10.066555740432612
Epoch 33, batch index 700 || train loss: 2.463177480643214, train acc: 10.065977175463624


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 33 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 34, batch index 0 || train loss: 2.429821252822876, train acc: 10.9375
Epoch 34, batch index 100 || train loss: 2.461538576843715, train acc: 9.900990099009901
Epoch 34, batch index 200 || train loss: 2.458186192299003, train acc: 10.21455223880597
Epoch 34, batch index 300 || train loss: 2.458733917470786, train acc: 10.210755813953488
Epoch 34, batch index 400 || train loss: 2.4587361354780315, train acc: 10.220542394014963
Epoch 34, batch index 500 || train loss: 2.4589713914189746, train acc: 10.135978043912175
Epoch 34, batch index 600 || train loss: 2.458973914731957, train acc: 10.219945923460898
Epoch 34, batch index 700 || train loss: 2.4612782998023803, train acc: 10.106098430813125


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 34 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 35, batch index 0 || train loss: 2.4245824813842773, train acc: 9.375
Epoch 35, batch index 100 || train loss: 2.455518335399061, train acc: 10.535272277227723
Epoch 35, batch index 200 || train loss: 2.4565241289376027, train acc: 10.463308457711443
Epoch 35, batch index 300 || train loss: 2.4603272680428336, train acc: 10.42358803986711
Epoch 35, batch index 400 || train loss: 2.4620604360490073, train acc: 10.271197007481296
Epoch 35, batch index 500 || train loss: 2.4610849811645323, train acc: 10.182759481037925
Epoch 35, batch index 600 || train loss: 2.46213336911257, train acc: 10.113352745424292
Epoch 35, batch index 700 || train loss: 2.4611012534306154, train acc: 10.15290656205421


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 35 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 36, batch index 0 || train loss: 2.413264513015747, train acc: 14.0625
Epoch 36, batch index 100 || train loss: 2.464752364866804, train acc: 10.040222772277227
Epoch 36, batch index 200 || train loss: 2.464502335780889, train acc: 9.973569651741293
Epoch 36, batch index 300 || train loss: 2.460358749592423, train acc: 9.956395348837209
Epoch 36, batch index 400 || train loss: 2.458352418909049, train acc: 9.994544887780549
Epoch 36, batch index 500 || train loss: 2.459676462257218, train acc: 9.97068363273453
Epoch 36, batch index 600 || train loss: 2.460672538014696, train acc: 9.978161397670549
Epoch 36, batch index 700 || train loss: 2.462025964481174, train acc: 9.983505706134094


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 36 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 37, batch index 0 || train loss: 2.46726655960083, train acc: 6.25
Epoch 37, batch index 100 || train loss: 2.468517525361316, train acc: 9.328589108910892
Epoch 37, batch index 200 || train loss: 2.466237504683917, train acc: 9.538246268656716
Epoch 37, batch index 300 || train loss: 2.4624377271265683, train acc: 9.831810631229235
Epoch 37, batch index 400 || train loss: 2.4625827362412527, train acc: 9.803615960099751
Epoch 37, batch index 500 || train loss: 2.463459906225909, train acc: 9.839695608782435
Epoch 37, batch index 600 || train loss: 2.463297976828653, train acc: 9.866368552412645
Epoch 37, batch index 700 || train loss: 2.46168994019273, train acc: 9.945613409415122


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 37 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 38, batch index 0 || train loss: 2.4611589908599854, train acc: 10.9375
Epoch 38, batch index 100 || train loss: 2.4670931513946837, train acc: 10.024752475247524
Epoch 38, batch index 200 || train loss: 2.462214007306455, train acc: 10.004664179104477
Epoch 38, batch index 300 || train loss: 2.4636209525935278, train acc: 9.946013289036545
Epoch 38, batch index 400 || train loss: 2.463559282688131, train acc: 9.967269326683292
Epoch 38, batch index 500 || train loss: 2.462141631844039, train acc: 10.01434630738523
Epoch 38, batch index 600 || train loss: 2.4620735700832626, train acc: 10.017158901830284
Epoch 38, batch index 700 || train loss: 2.4614987128471344, train acc: 10.050374465049929


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 38 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 39, batch index 0 || train loss: 2.461775541305542, train acc: 14.0625
Epoch 39, batch index 100 || train loss: 2.4587873893209022, train acc: 10.411509900990099
Epoch 39, batch index 200 || train loss: 2.46665236250085, train acc: 9.888059701492537
Epoch 39, batch index 300 || train loss: 2.4652641207672827, train acc: 10.127699335548172
Epoch 39, batch index 400 || train loss: 2.464323242406298, train acc: 10.138715710723192
Epoch 39, batch index 500 || train loss: 2.4608861605326333, train acc: 10.179640718562874
Epoch 39, batch index 600 || train loss: 2.4609850619279605, train acc: 10.058756239600665
Epoch 39, batch index 700 || train loss: 2.4599799633706345, train acc: 10.074893009985734


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 39 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 40, batch index 0 || train loss: 2.5220398902893066, train acc: 7.8125
Epoch 40, batch index 100 || train loss: 2.4634613660302493, train acc: 10.581683168316832
Epoch 40, batch index 200 || train loss: 2.465385130981901, train acc: 10.113495024875622
Epoch 40, batch index 300 || train loss: 2.4679367566029495, train acc: 10.003114617940199
Epoch 40, batch index 400 || train loss: 2.4652165849309906, train acc: 10.006234413965087
Epoch 40, batch index 500 || train loss: 2.4634714683372816, train acc: 10.04553393213573
Epoch 40, batch index 600 || train loss: 2.461848513258872, train acc: 10.02495840266223
Epoch 40, batch index 700 || train loss: 2.4627081188087625, train acc: 10.021398002853067


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 40 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 41, batch index 0 || train loss: 2.462698221206665, train acc: 12.5
Epoch 41, batch index 100 || train loss: 2.462041765156359, train acc: 10.024752475247524
Epoch 41, batch index 200 || train loss: 2.4572183943506496, train acc: 10.113495024875622
Epoch 41, batch index 300 || train loss: 2.4563086349702754, train acc: 10.164036544850498
Epoch 41, batch index 400 || train loss: 2.4600346623513465, train acc: 10.146508728179551
Epoch 41, batch index 500 || train loss: 2.4607555285661284, train acc: 10.017465069860279
Epoch 41, batch index 600 || train loss: 2.461562191587121, train acc: 10.037957570715474
Epoch 41, batch index 700 || train loss: 2.462289281646467, train acc: 10.072664051355208


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 41 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 42, batch index 0 || train loss: 2.3851442337036133, train acc: 12.5
Epoch 42, batch index 100 || train loss: 2.465361689577008, train acc: 9.823638613861386
Epoch 42, batch index 200 || train loss: 2.4645922278883443, train acc: 9.926927860696518
Epoch 42, batch index 300 || train loss: 2.463015678317048, train acc: 10.003114617940199
Epoch 42, batch index 400 || train loss: 2.460175448225026, train acc: 10.06857855361596
Epoch 42, batch index 500 || train loss: 2.4636560723691168, train acc: 9.945733532934131
Epoch 42, batch index 600 || train loss: 2.461867999713155, train acc: 10.019758735440933
Epoch 42, batch index 700 || train loss: 2.4613952010912494, train acc: 10.06820613409415


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 42 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 43, batch index 0 || train loss: 2.490438461303711, train acc: 6.25
Epoch 43, batch index 100 || train loss: 2.4564202020664028, train acc: 9.885519801980198
Epoch 43, batch index 200 || train loss: 2.4605454639415836, train acc: 10.284514925373134
Epoch 43, batch index 300 || train loss: 2.464224864478127, train acc: 10.252284053156146
Epoch 43, batch index 400 || train loss: 2.4656569922059552, train acc: 10.15430174563591
Epoch 43, batch index 500 || train loss: 2.4623991239094685, train acc: 10.145334331337326
Epoch 43, batch index 600 || train loss: 2.4616939846965518, train acc: 10.110752911813645
Epoch 43, batch index 700 || train loss: 2.4620532118814986, train acc: 10.06820613409415


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 43 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 44, batch index 0 || train loss: 2.5964834690093994, train acc: 4.6875
Epoch 44, batch index 100 || train loss: 2.4630569231392134, train acc: 10.024752475247524
Epoch 44, batch index 200 || train loss: 2.4646391702528616, train acc: 9.958022388059701
Epoch 44, batch index 300 || train loss: 2.464428570579453, train acc: 10.013496677740864
Epoch 44, batch index 400 || train loss: 2.4632041953745625, train acc: 9.865960099750623
Epoch 44, batch index 500 || train loss: 2.465256336920276, train acc: 9.892714570858283
Epoch 44, batch index 600 || train loss: 2.463850582697229, train acc: 9.907965890183029
Epoch 44, batch index 700 || train loss: 2.463479455289419, train acc: 9.958987161198289


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 44 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 45, batch index 0 || train loss: 2.372812509536743, train acc: 14.0625
Epoch 45, batch index 100 || train loss: 2.457499374257456, train acc: 10.256806930693068
Epoch 45, batch index 200 || train loss: 2.457555789852617, train acc: 10.113495024875622
Epoch 45, batch index 300 || train loss: 2.4634985551485586, train acc: 9.946013289036545
Epoch 45, batch index 400 || train loss: 2.46357973971569, train acc: 9.916614713216958
Epoch 45, batch index 500 || train loss: 2.462507191770329, train acc: 9.867764471057884
Epoch 45, batch index 600 || train loss: 2.463282328080417, train acc: 9.835170549084859
Epoch 45, batch index 700 || train loss: 2.463498692029553, train acc: 9.925552781740372


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 45 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 46, batch index 0 || train loss: 2.6460585594177246, train acc: 4.6875
Epoch 46, batch index 100 || train loss: 2.462415350545751, train acc: 9.746287128712872
Epoch 46, batch index 200 || train loss: 2.4599735879186375, train acc: 9.98134328358209
Epoch 46, batch index 300 || train loss: 2.4626721115999444, train acc: 10.096553156146179
Epoch 46, batch index 400 || train loss: 2.462944610457765, train acc: 10.181577306733168
Epoch 46, batch index 500 || train loss: 2.462839491115121, train acc: 10.117265469061877
Epoch 46, batch index 600 || train loss: 2.462817459852247, train acc: 10.123752079866888
Epoch 46, batch index 700 || train loss: 2.4627057696545176, train acc: 10.061519258202567


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 46 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 47, batch index 0 || train loss: 2.4710323810577393, train acc: 12.5
Epoch 47, batch index 100 || train loss: 2.4567339184260604, train acc: 10.148514851485148
Epoch 47, batch index 200 || train loss: 2.459890128368169, train acc: 9.841417910447761
Epoch 47, batch index 300 || train loss: 2.4618233224482235, train acc: 9.904485049833887
Epoch 47, batch index 400 || train loss: 2.4582530465209276, train acc: 10.185473815461346
Epoch 47, batch index 500 || train loss: 2.4607616460727835, train acc: 10.089196606786427
Epoch 47, batch index 600 || train loss: 2.4605746471544667, train acc: 10.139351081530782
Epoch 47, batch index 700 || train loss: 2.462639073672546, train acc: 10.003566333808845


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 47 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 48, batch index 0 || train loss: 2.5914499759674072, train acc: 7.8125
Epoch 48, batch index 100 || train loss: 2.45918341910485, train acc: 10.566212871287128
Epoch 48, batch index 200 || train loss: 2.4635364202717644, train acc: 10.307835820895523
Epoch 48, batch index 300 || train loss: 2.4649777792616936, train acc: 9.992732558139535
Epoch 48, batch index 400 || train loss: 2.464349065338287, train acc: 10.014027431421447
Epoch 48, batch index 500 || train loss: 2.463697230268619, train acc: 9.951971057884231
Epoch 48, batch index 600 || train loss: 2.4616577855362474, train acc: 9.99896006655574
Epoch 48, batch index 700 || train loss: 2.46077874757764, train acc: 9.992421540656206


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 48 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 49, batch index 0 || train loss: 2.509634494781494, train acc: 7.8125
Epoch 49, batch index 100 || train loss: 2.462549148219647, train acc: 9.854579207920793
Epoch 49, batch index 200 || train loss: 2.459758719401573, train acc: 10.338930348258707
Epoch 49, batch index 300 || train loss: 2.4623367643831577, train acc: 10.257475083056478
Epoch 49, batch index 400 || train loss: 2.460948212188378, train acc: 10.146508728179551
Epoch 49, batch index 500 || train loss: 2.4604606062114356, train acc: 10.185878243512974
Epoch 49, batch index 600 || train loss: 2.4609471228277426, train acc: 10.079554908485857
Epoch 49, batch index 700 || train loss: 2.4610324048791905, train acc: 10.041458630527817


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 49 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 50, batch index 0 || train loss: 2.464236259460449, train acc: 4.6875
Epoch 50, batch index 100 || train loss: 2.458924930874664, train acc: 9.436881188118813
Epoch 50, batch index 200 || train loss: 2.4552672597306286, train acc: 9.856965174129353
Epoch 50, batch index 300 || train loss: 2.4602520244066106, train acc: 9.868147840531561
Epoch 50, batch index 400 || train loss: 2.4618339710996633, train acc: 9.98285536159601
Epoch 50, batch index 500 || train loss: 2.462947998694079, train acc: 9.948852295409182
Epoch 50, batch index 600 || train loss: 2.4609725741102375, train acc: 10.035357737104825
Epoch 50, batch index 700 || train loss: 2.4612164062032007, train acc: 9.99910841654779


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 50 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 51, batch index 0 || train loss: 2.438786745071411, train acc: 9.375
Epoch 51, batch index 100 || train loss: 2.465358401289081, train acc: 10.117574257425742
Epoch 51, batch index 200 || train loss: 2.4667761966363706, train acc: 10.004664179104477
Epoch 51, batch index 300 || train loss: 2.464842812959538, train acc: 10.044642857142858
Epoch 51, batch index 400 || train loss: 2.4621057599559983, train acc: 10.06468204488778
Epoch 51, batch index 500 || train loss: 2.461077842407836, train acc: 9.98315868263473
Epoch 51, batch index 600 || train loss: 2.4582047351386502, train acc: 10.066555740432612
Epoch 51, batch index 700 || train loss: 2.459672176888938, train acc: 10.021398002853067


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 51 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 52, batch index 0 || train loss: 2.430954933166504, train acc: 10.9375
Epoch 52, batch index 100 || train loss: 2.446538191030521, train acc: 10.844678217821782
Epoch 52, batch index 200 || train loss: 2.4544840262303897, train acc: 10.338930348258707
Epoch 52, batch index 300 || train loss: 2.457631751152368, train acc: 10.215946843853821
Epoch 52, batch index 400 || train loss: 2.460537102454321, train acc: 10.185473815461346
Epoch 52, batch index 500 || train loss: 2.459408260391144, train acc: 10.188997005988025
Epoch 52, batch index 600 || train loss: 2.4589215745148367, train acc: 10.136751247920133
Epoch 52, batch index 700 || train loss: 2.461373776750116, train acc: 9.958987161198289


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 52 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 53, batch index 0 || train loss: 2.4232017993927, train acc: 12.5
Epoch 53, batch index 100 || train loss: 2.4573031368822154, train acc: 10.952970297029703
Epoch 53, batch index 200 || train loss: 2.45968782367991, train acc: 10.541044776119403
Epoch 53, batch index 300 || train loss: 2.4601993529107484, train acc: 10.37686877076412
Epoch 53, batch index 400 || train loss: 2.46249595544582, train acc: 10.142612219451372
Epoch 53, batch index 500 || train loss: 2.463277424642902, train acc: 10.101671656686626
Epoch 53, batch index 600 || train loss: 2.4626029763562904, train acc: 10.149750415973378
Epoch 53, batch index 700 || train loss: 2.464269075516117, train acc: 10.079350927246791


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 53 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 54, batch index 0 || train loss: 2.4989230632781982, train acc: 6.25
Epoch 54, batch index 100 || train loss: 2.464098793445247, train acc: 9.637995049504951
Epoch 54, batch index 200 || train loss: 2.4609367835581004, train acc: 10.020211442786069
Epoch 54, batch index 300 || train loss: 2.4635053219589285, train acc: 10.023878737541528
Epoch 54, batch index 400 || train loss: 2.4640615081549284, train acc: 10.06468204488778
Epoch 54, batch index 500 || train loss: 2.4635201904350175, train acc: 10.017465069860279
Epoch 54, batch index 600 || train loss: 2.4619644798970657, train acc: 10.066555740432612
Epoch 54, batch index 700 || train loss: 2.4627029035298866, train acc: 9.97904778887304


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 54 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 55, batch index 0 || train loss: 2.498764753341675, train acc: 10.9375
Epoch 55, batch index 100 || train loss: 2.4592893572136907, train acc: 10.071163366336634
Epoch 55, batch index 200 || train loss: 2.4564285717200285, train acc: 10.074626865671641
Epoch 55, batch index 300 || train loss: 2.457399603536359, train acc: 10.080980066445182
Epoch 55, batch index 400 || train loss: 2.4604666453049964, train acc: 9.869856608478804
Epoch 55, batch index 500 || train loss: 2.461431748376873, train acc: 9.911427145708583
Epoch 55, batch index 600 || train loss: 2.4603714367712595, train acc: 10.022358569051582
Epoch 55, batch index 700 || train loss: 2.461423916415379, train acc: 9.963445078459344


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 55 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 56, batch index 0 || train loss: 2.5194954872131348, train acc: 10.9375
Epoch 56, batch index 100 || train loss: 2.465404423156587, train acc: 9.560643564356436
Epoch 56, batch index 200 || train loss: 2.4649865461226126, train acc: 9.584888059701493
Epoch 56, batch index 300 || train loss: 2.4642787043042356, train acc: 9.857765780730897
Epoch 56, batch index 400 || train loss: 2.4635164398802187, train acc: 9.990648379052368
Epoch 56, batch index 500 || train loss: 2.4642469692610933, train acc: 9.967564870259482
Epoch 56, batch index 600 || train loss: 2.46282191363825, train acc: 9.941763727121463
Epoch 56, batch index 700 || train loss: 2.462096009887064, train acc: 10.014711126961483


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 56 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 57, batch index 0 || train loss: 2.508751153945923, train acc: 9.375
Epoch 57, batch index 100 || train loss: 2.4730433402675214, train acc: 9.746287128712872
Epoch 57, batch index 200 || train loss: 2.466671712362944, train acc: 9.950248756218905
Epoch 57, batch index 300 || train loss: 2.461887160804977, train acc: 10.117317275747508
Epoch 57, batch index 400 || train loss: 2.460690839034958, train acc: 10.177680798004987
Epoch 57, batch index 500 || train loss: 2.4627775855644973, train acc: 10.079840319361278
Epoch 57, batch index 600 || train loss: 2.4632810606139275, train acc: 10.030158069883527
Epoch 57, batch index 700 || train loss: 2.463092579821207, train acc: 10.001337375178316


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 57 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 58, batch index 0 || train loss: 2.437537431716919, train acc: 7.8125
Epoch 58, batch index 100 || train loss: 2.4558587097885587, train acc: 9.854579207920793
Epoch 58, batch index 200 || train loss: 2.4619104020038054, train acc: 9.732587064676617
Epoch 58, batch index 300 || train loss: 2.4604182084929507, train acc: 9.977159468438538
Epoch 58, batch index 400 || train loss: 2.4598724449661904, train acc: 9.998441396508728
Epoch 58, batch index 500 || train loss: 2.459388717681824, train acc: 10.132859281437126
Epoch 58, batch index 600 || train loss: 2.4607768626062327, train acc: 10.07175540765391
Epoch 58, batch index 700 || train loss: 2.4606393148827657, train acc: 10.126159058487875


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 58 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 59, batch index 0 || train loss: 2.527143716812134, train acc: 6.25
Epoch 59, batch index 100 || train loss: 2.464089554135162, train acc: 9.607054455445544
Epoch 59, batch index 200 || train loss: 2.4576611839123625, train acc: 10.17568407960199
Epoch 59, batch index 300 || train loss: 2.459576364371468, train acc: 10.148463455149502
Epoch 59, batch index 400 || train loss: 2.4614076780856697, train acc: 10.041302992518704
Epoch 59, batch index 500 || train loss: 2.461818462360405, train acc: 10.05177145708583
Epoch 59, batch index 600 || train loss: 2.461337434273591, train acc: 10.02495840266223
Epoch 59, batch index 700 || train loss: 2.46174197156147, train acc: 10.037000713266762


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 59 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 60, batch index 0 || train loss: 2.4598305225372314, train acc: 14.0625
Epoch 60, batch index 100 || train loss: 2.467935880812088, train acc: 10.009282178217822
Epoch 60, batch index 200 || train loss: 2.463297350489678, train acc: 10.043532338308458
Epoch 60, batch index 300 || train loss: 2.461376505436691, train acc: 10.200373754152825
Epoch 60, batch index 400 || train loss: 2.4589309829131625, train acc: 10.337437655860349
Epoch 60, batch index 500 || train loss: 2.4597092555192654, train acc: 10.195234530938123
Epoch 60, batch index 600 || train loss: 2.4630394989559536, train acc: 10.121152246256239
Epoch 60, batch index 700 || train loss: 2.4631205554015287, train acc: 10.052603423680457


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 60 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 61, batch index 0 || train loss: 2.5657424926757812, train acc: 9.375
Epoch 61, batch index 100 || train loss: 2.4733602977035067, train acc: 9.668935643564357
Epoch 61, batch index 200 || train loss: 2.4648354207698384, train acc: 9.849191542288557
Epoch 61, batch index 300 || train loss: 2.4621940791804926, train acc: 9.894102990033222
Epoch 61, batch index 400 || train loss: 2.4623077754070635, train acc: 9.959476309226932
Epoch 61, batch index 500 || train loss: 2.461636371955186, train acc: 9.961327345309382
Epoch 61, batch index 600 || train loss: 2.4615437425115143, train acc: 9.936564059900167
Epoch 61, batch index 700 || train loss: 2.4629270105321126, train acc: 9.954529243937232


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 61 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 62, batch index 0 || train loss: 2.4176464080810547, train acc: 10.9375
Epoch 62, batch index 100 || train loss: 2.449186381727162, train acc: 10.241336633663366
Epoch 62, batch index 200 || train loss: 2.452469519714811, train acc: 10.237873134328359
Epoch 62, batch index 300 || train loss: 2.4589478129960374, train acc: 9.992732558139535
Epoch 62, batch index 400 || train loss: 2.4609657891670667, train acc: 10.021820448877806
Epoch 62, batch index 500 || train loss: 2.464157511374194, train acc: 9.99563373253493
Epoch 62, batch index 600 || train loss: 2.4618357283105072, train acc: 10.089954242928453
Epoch 62, batch index 700 || train loss: 2.4622956927594717, train acc: 10.065977175463624


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 62 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 63, batch index 0 || train loss: 2.470952272415161, train acc: 9.375
Epoch 63, batch index 100 || train loss: 2.4670860413277502, train acc: 9.282178217821782
Epoch 63, batch index 200 || train loss: 2.4616968524989797, train acc: 9.444962686567164
Epoch 63, batch index 300 || train loss: 2.461518438155469, train acc: 9.577450166112957
Epoch 63, batch index 400 || train loss: 2.4632257588783704, train acc: 9.682824189526185
Epoch 63, batch index 500 || train loss: 2.4625837522114584, train acc: 9.70870758483034
Epoch 63, batch index 600 || train loss: 2.4624857402680917, train acc: 9.923564891846922
Epoch 63, batch index 700 || train loss: 2.4621734442282337, train acc: 9.983505706134094


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 63 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 64, batch index 0 || train loss: 2.5579607486724854, train acc: 7.8125
Epoch 64, batch index 100 || train loss: 2.476390522305328, train acc: 9.127475247524753
Epoch 64, batch index 200 || train loss: 2.46479570806323, train acc: 9.787002487562189
Epoch 64, batch index 300 || train loss: 2.462424711531579, train acc: 9.987541528239202
Epoch 64, batch index 400 || train loss: 2.4617814785821777, train acc: 10.107543640897756
Epoch 64, batch index 500 || train loss: 2.4630569583641555, train acc: 10.067365269461078
Epoch 64, batch index 600 || train loss: 2.4637888759225857, train acc: 10.02495840266223
Epoch 64, batch index 700 || train loss: 2.463454100612907, train acc: 10.010253209700428


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 64 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 65, batch index 0 || train loss: 2.496778726577759, train acc: 4.6875
Epoch 65, batch index 100 || train loss: 2.460041872345575, train acc: 10.133044554455445
Epoch 65, batch index 200 || train loss: 2.4623192103941047, train acc: 9.958022388059701
Epoch 65, batch index 300 || train loss: 2.4659828569405895, train acc: 9.8421926910299
Epoch 65, batch index 400 || train loss: 2.4644217247380285, train acc: 9.80751246882793
Epoch 65, batch index 500 || train loss: 2.4636305758577146, train acc: 9.824101796407186
Epoch 65, batch index 600 || train loss: 2.463226029162796, train acc: 9.824771214642263
Epoch 65, batch index 700 || train loss: 2.462616734769987, train acc: 9.903263195435093


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 65 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 66, batch index 0 || train loss: 2.5836055278778076, train acc: 1.5625
Epoch 66, batch index 100 || train loss: 2.463563631076624, train acc: 10.179455445544555
Epoch 66, batch index 200 || train loss: 2.469618307417305, train acc: 9.7636815920398
Epoch 66, batch index 300 || train loss: 2.4670547250893424, train acc: 9.961586378737541
Epoch 66, batch index 400 || train loss: 2.4679795988182773, train acc: 9.88544264339152
Epoch 66, batch index 500 || train loss: 2.465074719069247, train acc: 10.03929640718563
Epoch 66, batch index 600 || train loss: 2.464103608282156, train acc: 10.053556572379367
Epoch 66, batch index 700 || train loss: 2.46357659504519, train acc: 10.063748216833096


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 66 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 67, batch index 0 || train loss: 2.505544900894165, train acc: 4.6875
Epoch 67, batch index 100 || train loss: 2.452574614251014, train acc: 10.024752475247524
Epoch 67, batch index 200 || train loss: 2.458097049845985, train acc: 10.113495024875622
Epoch 67, batch index 300 || train loss: 2.4599390742786698, train acc: 10.091362126245848
Epoch 67, batch index 400 || train loss: 2.4593053214865135, train acc: 10.025716957605985
Epoch 67, batch index 500 || train loss: 2.458924898368394, train acc: 10.064246506986027
Epoch 67, batch index 600 || train loss: 2.4586745724701844, train acc: 10.157549916805324
Epoch 67, batch index 700 || train loss: 2.4596563429703218, train acc: 10.037000713266762


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 67 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 68, batch index 0 || train loss: 2.5003068447113037, train acc: 7.8125
Epoch 68, batch index 100 || train loss: 2.4668471081422108, train acc: 10.055693069306932
Epoch 68, batch index 200 || train loss: 2.465846434161438, train acc: 9.880286069651742
Epoch 68, batch index 300 || train loss: 2.468452890846024, train acc: 9.743563122923588
Epoch 68, batch index 400 || train loss: 2.4652734837329895, train acc: 9.928304239401497
Epoch 68, batch index 500 || train loss: 2.4623034600012317, train acc: 10.107909181636726
Epoch 68, batch index 600 || train loss: 2.461522486761287, train acc: 10.17054908485857
Epoch 68, batch index 700 || train loss: 2.4621567668316198, train acc: 10.115014265335235


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 68 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 69, batch index 0 || train loss: 2.426579236984253, train acc: 7.8125
Epoch 69, batch index 100 || train loss: 2.458084410960131, train acc: 9.746287128712872
Epoch 69, batch index 200 || train loss: 2.4647535838891024, train acc: 9.748134328358208
Epoch 69, batch index 300 || train loss: 2.463068587439401, train acc: 9.894102990033222
Epoch 69, batch index 400 || train loss: 2.462269363260626, train acc: 9.955579800498754
Epoch 69, batch index 500 || train loss: 2.462881820168562, train acc: 9.870883233532934
Epoch 69, batch index 600 || train loss: 2.4645496569139986, train acc: 9.894966722129784
Epoch 69, batch index 700 || train loss: 2.4636060232442727, train acc: 9.95675820256776


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 69 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 70, batch index 0 || train loss: 2.383749008178711, train acc: 14.0625
Epoch 70, batch index 100 || train loss: 2.444106928192743, train acc: 10.287747524752476
Epoch 70, batch index 200 || train loss: 2.45321503088842, train acc: 10.230099502487562
Epoch 70, batch index 300 || train loss: 2.45683677647993, train acc: 10.179609634551495
Epoch 70, batch index 400 || train loss: 2.459930468081239, train acc: 10.123129675810473
Epoch 70, batch index 500 || train loss: 2.462781335065465, train acc: 10.03929640718563
Epoch 70, batch index 600 || train loss: 2.463092931693485, train acc: 10.006759567387688
Epoch 70, batch index 700 || train loss: 2.4632910035986364, train acc: 9.996879457917261


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 70 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 71, batch index 0 || train loss: 2.372596502304077, train acc: 12.5
Epoch 71, batch index 100 || train loss: 2.4565592850788986, train acc: 9.962871287128714
Epoch 71, batch index 200 || train loss: 2.4618895042001903, train acc: 9.849191542288557
Epoch 71, batch index 300 || train loss: 2.4613858599995457, train acc: 9.956395348837209
Epoch 71, batch index 400 || train loss: 2.4625362808864906, train acc: 9.963372817955113
Epoch 71, batch index 500 || train loss: 2.463959451683029, train acc: 9.973802395209582
Epoch 71, batch index 600 || train loss: 2.464432787776192, train acc: 9.915765391014975
Epoch 71, batch index 700 || train loss: 2.4624682482911244, train acc: 9.914407988587731


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 71 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 72, batch index 0 || train loss: 2.420977830886841, train acc: 12.5
Epoch 72, batch index 100 || train loss: 2.4644853714669104, train acc: 10.10210396039604
Epoch 72, batch index 200 || train loss: 2.4590715851949816, train acc: 10.338930348258707
Epoch 72, batch index 300 || train loss: 2.458956335866174, train acc: 10.189991694352159
Epoch 72, batch index 400 || train loss: 2.4610597891105974, train acc: 10.06857855361596
Epoch 72, batch index 500 || train loss: 2.4613021357568674, train acc: 10.017465069860279
Epoch 72, batch index 600 || train loss: 2.4620566455377717, train acc: 10.00155990016639
Epoch 72, batch index 700 || train loss: 2.4619988333991865, train acc: 9.97904778887304


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 72 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 73, batch index 0 || train loss: 2.3518359661102295, train acc: 12.5
Epoch 73, batch index 100 || train loss: 2.4507778705936847, train acc: 10.62809405940594
Epoch 73, batch index 200 || train loss: 2.4603668563994603, train acc: 10.183457711442786
Epoch 73, batch index 300 || train loss: 2.460090378194156, train acc: 9.920058139534884
Epoch 73, batch index 400 || train loss: 2.4585354667054746, train acc: 10.115336658354115
Epoch 73, batch index 500 || train loss: 2.459715450595239, train acc: 10.082959081836327
Epoch 73, batch index 600 || train loss: 2.460331164659954, train acc: 9.967762063227953
Epoch 73, batch index 700 || train loss: 2.4623302039338246, train acc: 10.010253209700428


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 73 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 74, batch index 0 || train loss: 2.3584213256835938, train acc: 15.625
Epoch 74, batch index 100 || train loss: 2.4576286585024087, train acc: 10.225866336633663
Epoch 74, batch index 200 || train loss: 2.4603825955841674, train acc: 10.21455223880597
Epoch 74, batch index 300 || train loss: 2.465627037409532, train acc: 9.862956810631228
Epoch 74, batch index 400 || train loss: 2.4623716448310606, train acc: 9.951683291770573
Epoch 74, batch index 500 || train loss: 2.464117057309179, train acc: 9.914545908183634
Epoch 74, batch index 600 || train loss: 2.463496573554498, train acc: 9.881967554076539
Epoch 74, batch index 700 || train loss: 2.4631687101726016, train acc: 9.95675820256776


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 74 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 75, batch index 0 || train loss: 2.3677937984466553, train acc: 14.0625
Epoch 75, batch index 100 || train loss: 2.446972091599266, train acc: 10.334158415841584
Epoch 75, batch index 200 || train loss: 2.4515657104662996, train acc: 10.25342039800995
Epoch 75, batch index 300 || train loss: 2.45441882950919, train acc: 10.184800664451828
Epoch 75, batch index 400 || train loss: 2.4591888346874207, train acc: 10.002337905236908
Epoch 75, batch index 500 || train loss: 2.4597598759238117, train acc: 10.03305888223553
Epoch 75, batch index 600 || train loss: 2.4616698671299684, train acc: 9.933964226289518
Epoch 75, batch index 700 || train loss: 2.462430833239698, train acc: 9.958987161198289


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 75 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 76, batch index 0 || train loss: 2.554163932800293, train acc: 6.25
Epoch 76, batch index 100 || train loss: 2.4680581399709873, train acc: 9.993811881188119
Epoch 76, batch index 200 || train loss: 2.4663060233367617, train acc: 10.121268656716419
Epoch 76, batch index 300 || train loss: 2.4651287235690904, train acc: 9.925249169435215
Epoch 76, batch index 400 || train loss: 2.464227882703938, train acc: 9.877649625935161
Epoch 76, batch index 500 || train loss: 2.4623403173244878, train acc: 10.058008982035927
Epoch 76, batch index 600 || train loss: 2.462649466392403, train acc: 9.99896006655574
Epoch 76, batch index 700 || train loss: 2.462073977425503, train acc: 10.012482168330957


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 76 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 77, batch index 0 || train loss: 2.401890754699707, train acc: 12.5
Epoch 77, batch index 100 || train loss: 2.468749232811503, train acc: 9.947400990099009
Epoch 77, batch index 200 || train loss: 2.4618218846582063, train acc: 10.17568407960199
Epoch 77, batch index 300 || train loss: 2.462961808391584, train acc: 10.127699335548172
Epoch 77, batch index 400 || train loss: 2.463394030668492, train acc: 10.084164588528678
Epoch 77, batch index 500 || train loss: 2.4619303986935797, train acc: 10.117265469061877
Epoch 77, batch index 600 || train loss: 2.4613022958974473, train acc: 10.040557404326123
Epoch 77, batch index 700 || train loss: 2.4604095868479336, train acc: 9.990192582025678


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 77 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 78, batch index 0 || train loss: 2.5488955974578857, train acc: 6.25
Epoch 78, batch index 100 || train loss: 2.460353183274222, train acc: 9.931930693069306
Epoch 78, batch index 200 || train loss: 2.457996835756065, train acc: 10.276741293532337
Epoch 78, batch index 300 || train loss: 2.4577280548323825, train acc: 10.252284053156146
Epoch 78, batch index 400 || train loss: 2.4583611345647873, train acc: 10.212749376558603
Epoch 78, batch index 500 || train loss: 2.4592280173729995, train acc: 10.148453093812375
Epoch 78, batch index 600 || train loss: 2.4583876025855242, train acc: 10.183548252911814
Epoch 78, batch index 700 || train loss: 2.4614372600332306, train acc: 10.081579885877318


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 78 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 79, batch index 0 || train loss: 2.555128335952759, train acc: 10.9375
Epoch 79, batch index 100 || train loss: 2.4618383350938853, train acc: 10.334158415841584
Epoch 79, batch index 200 || train loss: 2.46373574769319, train acc: 10.13681592039801
Epoch 79, batch index 300 || train loss: 2.4637727515642034, train acc: 10.205564784053156
Epoch 79, batch index 400 || train loss: 2.461015783343232, train acc: 10.130922693266832
Epoch 79, batch index 500 || train loss: 2.4623137510227346, train acc: 9.980039920159681
Epoch 79, batch index 600 || train loss: 2.463141933257092, train acc: 9.978161397670549
Epoch 79, batch index 700 || train loss: 2.462697769537802, train acc: 9.970131954350927


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 79 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 80, batch index 0 || train loss: 2.4791879653930664, train acc: 10.9375
Epoch 80, batch index 100 || train loss: 2.4538115062335932, train acc: 10.51980198019802
Epoch 80, batch index 200 || train loss: 2.4588419892894686, train acc: 10.39334577114428
Epoch 80, batch index 300 || train loss: 2.4615005369598286, train acc: 10.169227574750831
Epoch 80, batch index 400 || train loss: 2.4624000148582934, train acc: 10.06468204488778
Epoch 80, batch index 500 || train loss: 2.462053881909795, train acc: 10.03929640718563
Epoch 80, batch index 600 || train loss: 2.46322258498625, train acc: 10.00155990016639
Epoch 80, batch index 700 || train loss: 2.4644100098058943, train acc: 10.021398002853067


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 80 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 81, batch index 0 || train loss: 2.5219008922576904, train acc: 10.9375
Epoch 81, batch index 100 || train loss: 2.460334586625052, train acc: 9.962871287128714
Epoch 81, batch index 200 || train loss: 2.456086838423316, train acc: 10.113495024875622
Epoch 81, batch index 300 || train loss: 2.4565371722478013, train acc: 10.132890365448505
Epoch 81, batch index 400 || train loss: 2.458065564495667, train acc: 10.103647132169575
Epoch 81, batch index 500 || train loss: 2.4582870772736753, train acc: 10.151571856287426
Epoch 81, batch index 600 || train loss: 2.4595573682356595, train acc: 10.076955074875208
Epoch 81, batch index 700 || train loss: 2.4603327885163835, train acc: 9.981276747503566


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 81 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 82, batch index 0 || train loss: 2.411746025085449, train acc: 10.9375
Epoch 82, batch index 100 || train loss: 2.454058562174882, train acc: 10.767326732673267
Epoch 82, batch index 200 || train loss: 2.4559385527425737, train acc: 10.572139303482587
Epoch 82, batch index 300 || train loss: 2.460435288293021, train acc: 10.288621262458472
Epoch 82, batch index 400 || train loss: 2.4629554992304774, train acc: 10.181577306733168
Epoch 82, batch index 500 || train loss: 2.46217580517371, train acc: 10.170284431137725
Epoch 82, batch index 600 || train loss: 2.461943965584982, train acc: 10.035357737104825
Epoch 82, batch index 700 || train loss: 2.463389490878531, train acc: 10.02808487874465


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 82 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 83, batch index 0 || train loss: 2.590090751647949, train acc: 7.8125
Epoch 83, batch index 100 || train loss: 2.464299265700992, train acc: 10.396039603960396
Epoch 83, batch index 200 || train loss: 2.4650454331393266, train acc: 9.965796019900498
Epoch 83, batch index 300 || train loss: 2.463137764471314, train acc: 10.080980066445182
Epoch 83, batch index 400 || train loss: 2.4617091634327037, train acc: 10.06468204488778
Epoch 83, batch index 500 || train loss: 2.462166110912483, train acc: 9.955089820359282
Epoch 83, batch index 600 || train loss: 2.4629550063471233, train acc: 9.939163893510816
Epoch 83, batch index 700 || train loss: 2.4617426694714903, train acc: 10.023626961483595


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 83 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 84, batch index 0 || train loss: 2.442420482635498, train acc: 9.375
Epoch 84, batch index 100 || train loss: 2.4620050085653173, train acc: 10.179455445544555
Epoch 84, batch index 200 || train loss: 2.4605283322025886, train acc: 9.818097014925373
Epoch 84, batch index 300 || train loss: 2.462612365171363, train acc: 9.930440199335548
Epoch 84, batch index 400 || train loss: 2.4623490354961293, train acc: 10.037406483790523
Epoch 84, batch index 500 || train loss: 2.4633640505358607, train acc: 10.048652694610778
Epoch 84, batch index 600 || train loss: 2.4614202436710553, train acc: 10.165349417637271
Epoch 84, batch index 700 || train loss: 2.462563572868641, train acc: 10.06820613409415


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 84 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 85, batch index 0 || train loss: 2.420416831970215, train acc: 10.9375
Epoch 85, batch index 100 || train loss: 2.463691199179923, train acc: 10.163985148514852
Epoch 85, batch index 200 || train loss: 2.4630960599700016, train acc: 9.973569651741293
Epoch 85, batch index 300 || train loss: 2.4619082303538273, train acc: 9.847383720930232
Epoch 85, batch index 400 || train loss: 2.463839061836947, train acc: 9.795822942643392
Epoch 85, batch index 500 || train loss: 2.4629534542441607, train acc: 9.855289421157684
Epoch 85, batch index 600 || train loss: 2.463208031138644, train acc: 9.915765391014975
Epoch 85, batch index 700 || train loss: 2.462000992770882, train acc: 9.932239657631955


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 85 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 86, batch index 0 || train loss: 2.386270523071289, train acc: 12.5
Epoch 86, batch index 100 || train loss: 2.466677960782948, train acc: 10.133044554455445
Epoch 86, batch index 200 || train loss: 2.4635151239176887, train acc: 9.934701492537313
Epoch 86, batch index 300 || train loss: 2.464698489718263, train acc: 10.018687707641195
Epoch 86, batch index 400 || train loss: 2.4652516574336407, train acc: 10.07637157107232
Epoch 86, batch index 500 || train loss: 2.4633531884518924, train acc: 10.082959081836327
Epoch 86, batch index 600 || train loss: 2.463544060108864, train acc: 10.053556572379367
Epoch 86, batch index 700 || train loss: 2.4632388525104387, train acc: 10.097182596291013


  0%|          | 0/157 [00:00<?, ?it/s]

Epoch 86 val loss: 385.68193, test acc: 9.95223


  0%|          | 0/782 [00:00<?, ?it/s]

Epoch 87, batch index 0 || train loss: 2.4388186931610107, train acc: 18.75
Epoch 87, batch index 100 || train loss: 2.4618716098294398, train acc: 10.504331683168317
