# HW3 Image Classification
## We strongly recommend that you run with [Kaggle](https://www.kaggle.com/t/86ca241732c04da99aca6490080bae73) for this homework

If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

# Check GPU Type

In [1]:
!nvidia-smi

Mon Apr 29 15:13:59 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P0              27W / 250W |      0MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

# Import Packages

In [2]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import torchvision
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.notebook import tqdm
import random

from torch.utils.tensorboard import SummaryWriter

2024-04-29 15:14:08.361140: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-29 15:14:08.361271: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-29 15:14:08.478514: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

# Transforms
Torchvision provides lots of useful utilities for image preprocessing, data *wrapping* as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [4]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    
      transforms.RandomChoice(transforms=[
        # Apply TrivialAugmentWide data augmentation method
        transforms.TrivialAugmentWide(),

        # Return original image
        transforms.Lambda(lambda x: x),
    ],
                            p=[0.95, 0.05]),


    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])


# Datasets
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [5]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
            
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            label = int(fname.split("/")[-1].split("_")[0]) #TODO win系统改为\\,不然标签全是-1
        except:
            label = -1 # test has no label
            
        return im,label

In [6]:
# files = sorted([os.path.join('/kaggle/input/ml2023spring-hw3/train',x) for x in os.listdir('/kaggle/input/ml2023spring-hw3/train') if x.endswith(".jpg")])
# files[0].split("/")[-1].split("_")[0]

# Model

In [7]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 11)
#             nn.Linear(512*4*4, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
#             nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)
    
class ResNet50(nn.Module):
    def __init__(self):
        super(ResNet50, self).__init__()
        self.cnn = nn.Sequential(*list(torchvision.models.resnet50(weights=None).children())[:-1])
        num_features = torchvision.models.resnet50(weights=None).fc.in_features
        self.fc = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(num_features, 1024),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(512, 11)
        )
        
    
    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# FocalLoss

In [8]:
import torch.nn.functional as F
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super().__init__()
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = alpha
            else:
                self.alpha = Variable(alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average
        
    def forward(self, inputs, targets):
        N = inputs.size(0)
        C = inputs.size(1)
        P = F.softmax(inputs, dim=1)
        
        class_mask = inputs.data.new(N, C).fill_(0)
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)
        class_mask.scatter_(1, ids.data, 1.)
        
        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.data.view(-1)]
        probs = (P*class_mask).sum(1).view(-1, 1)
        
        log_p = probs.log()
        
        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p
        
        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
            
        return loss

# Configurations

In [9]:
_exp_name = "transform_500epoch_0.1labelsmooth_0.5drop_lrsche_tta"

# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)
# model = torchvision.models.resnet50(weights=None)
# model.fc = nn.Linear(in_features=2048, out_features=11, bias=True) # 原始为一千分类，改为十一分类
# model.to(device)
# model = ResNet50().to(device)

# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 500

# If no improvement in 'patience' epochs, early stop.
patience = 50

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
# alpha = torch.Tensor([1, 2.317, 0.663, 1.008, 1.172, 0.750, 2.259, 3.55, 1.163, 0.663, 1.402])
# criterion = FocalLoss(11, alpha=alpha)

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Create a learning rate scheduler that reduces the learning rate when the metric stops improving
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.8, patience=patience/2, threshold=0.05)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-8)

# Focal Loss

# Dataloader

In [10]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("/kaggle/input/ml2023spring-hw3/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=6, pin_memory=True)
valid_set = FoodDataset("/kaggle/input/ml2023spring-hw3/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)



# Start Training

In [11]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0
writer = SummaryWriter('./runs/'+_exp_name)

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()
    # print('training')
    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader, leave=True):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        # imgs = imgs.half()
        # print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    # if valid_acc > best_acc:
    #     with open(f"./{_exp_name}_log.txt","a"):
    #         print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    # else:
    #     with open(f"./{_exp_name}_log.txt","a"):
    #         print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

    writer.add_scalar('Loss/train', train_loss, epoch+1)
    writer.add_scalar('Loss/valid', valid_loss, epoch+1)
    writer.add_scalar('Acc/train', train_acc, epoch+1)
    writer.add_scalar('Acc/valid', valid_acc, epoch+1)

    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch+1}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

    # Update learning rate based on the best_acc so far
    scheduler.step(best_acc)

  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 001/500 ] loss = 2.25354, acc = 0.21417


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 001/500 ] loss = 2.06173, acc = 0.29883
Best model found at epoch 1, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 002/500 ] loss = 2.13214, acc = 0.27090


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 002/500 ] loss = 1.98313, acc = 0.33181
Best model found at epoch 2, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 003/500 ] loss = 2.06656, acc = 0.30583


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 003/500 ] loss = 1.94581, acc = 0.35555
Best model found at epoch 3, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 004/500 ] loss = 1.99580, acc = 0.34295


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 004/500 ] loss = 1.85162, acc = 0.43050
Best model found at epoch 4, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 005/500 ] loss = 1.95620, acc = 0.37102


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 005/500 ] loss = 1.88678, acc = 0.40907


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 006/500 ] loss = 1.89067, acc = 0.40466


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 006/500 ] loss = 1.74665, acc = 0.46070
Best model found at epoch 6, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 007/500 ] loss = 1.81983, acc = 0.43939


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 007/500 ] loss = 1.64272, acc = 0.50494
Best model found at epoch 7, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 008/500 ] loss = 1.78005, acc = 0.45561


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 008/500 ] loss = 1.61852, acc = 0.53521
Best model found at epoch 8, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 009/500 ] loss = 1.72839, acc = 0.48637


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 009/500 ] loss = 1.56027, acc = 0.55956
Best model found at epoch 9, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 010/500 ] loss = 1.68986, acc = 0.49771


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 010/500 ] loss = 1.71815, acc = 0.47906


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 011/500 ] loss = 1.64947, acc = 0.52389


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 011/500 ] loss = 1.75050, acc = 0.47109


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 012/500 ] loss = 1.62133, acc = 0.53493


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 012/500 ] loss = 1.56997, acc = 0.54563


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 013/500 ] loss = 1.60932, acc = 0.54080


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 013/500 ] loss = 1.53761, acc = 0.55516


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 014/500 ] loss = 1.56404, acc = 0.55882


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 014/500 ] loss = 1.58025, acc = 0.54209


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 015/500 ] loss = 1.54156, acc = 0.57305


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 015/500 ] loss = 1.47727, acc = 0.58949
Best model found at epoch 15, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 016/500 ] loss = 1.51388, acc = 0.58420


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 016/500 ] loss = 1.47524, acc = 0.59214
Best model found at epoch 16, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 017/500 ] loss = 1.48281, acc = 0.59922


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 017/500 ] loss = 1.55394, acc = 0.55036


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 018/500 ] loss = 1.47460, acc = 0.60569


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 018/500 ] loss = 1.47520, acc = 0.58252


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 019/500 ] loss = 1.45675, acc = 0.61176


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 019/500 ] loss = 1.41743, acc = 0.60662
Best model found at epoch 19, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 020/500 ] loss = 1.43546, acc = 0.62052


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 020/500 ] loss = 1.35718, acc = 0.65220
Best model found at epoch 20, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 021/500 ] loss = 1.42321, acc = 0.62580


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 021/500 ] loss = 1.30765, acc = 0.67020
Best model found at epoch 21, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 022/500 ] loss = 1.40096, acc = 0.63645


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 022/500 ] loss = 1.36393, acc = 0.63052


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 023/500 ] loss = 1.38201, acc = 0.64650


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 023/500 ] loss = 1.42640, acc = 0.60722


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 024/500 ] loss = 1.36035, acc = 0.65555


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 024/500 ] loss = 1.34122, acc = 0.64850


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 025/500 ] loss = 1.33835, acc = 0.66481


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 025/500 ] loss = 1.34164, acc = 0.65145


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 026/500 ] loss = 1.32278, acc = 0.68143


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 026/500 ] loss = 1.44528, acc = 0.61677


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 027/500 ] loss = 1.29730, acc = 0.68143


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 027/500 ] loss = 1.34958, acc = 0.65245


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 028/500 ] loss = 1.29471, acc = 0.69029


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 028/500 ] loss = 1.23976, acc = 0.69448
Best model found at epoch 28, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 029/500 ] loss = 1.27279, acc = 0.69626


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 029/500 ] loss = 1.23563, acc = 0.70792
Best model found at epoch 29, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 030/500 ] loss = 1.28222, acc = 0.69228


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 030/500 ] loss = 1.36684, acc = 0.65638


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 031/500 ] loss = 1.24843, acc = 0.70740


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 031/500 ] loss = 1.25136, acc = 0.69613


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 032/500 ] loss = 1.24429, acc = 0.71069


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 032/500 ] loss = 1.28885, acc = 0.67886


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 033/500 ] loss = 1.24208, acc = 0.70939


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 033/500 ] loss = 1.31339, acc = 0.67676


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 034/500 ] loss = 1.21054, acc = 0.72940


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 034/500 ] loss = 1.32924, acc = 0.64941


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 035/500 ] loss = 1.21695, acc = 0.72363


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 035/500 ] loss = 1.29179, acc = 0.66887


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 036/500 ] loss = 1.19771, acc = 0.73537


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 036/500 ] loss = 1.29627, acc = 0.67527


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 037/500 ] loss = 1.17424, acc = 0.74701


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 037/500 ] loss = 1.23980, acc = 0.70451


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 038/500 ] loss = 1.15415, acc = 0.75478


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 038/500 ] loss = 1.35283, acc = 0.65604


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 039/500 ] loss = 1.16461, acc = 0.74920


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 039/500 ] loss = 1.30754, acc = 0.68372


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 040/500 ] loss = 1.14036, acc = 0.76254


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 040/500 ] loss = 1.20720, acc = 0.72489
Best model found at epoch 40, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 041/500 ] loss = 1.12591, acc = 0.76592


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 041/500 ] loss = 1.39274, acc = 0.62200


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 042/500 ] loss = 1.12441, acc = 0.76881


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 042/500 ] loss = 1.25478, acc = 0.69060


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 043/500 ] loss = 1.11097, acc = 0.77389


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 043/500 ] loss = 1.18179, acc = 0.72441


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 044/500 ] loss = 1.10938, acc = 0.77229


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 044/500 ] loss = 1.28824, acc = 0.68212


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 045/500 ] loss = 1.10243, acc = 0.77846


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 045/500 ] loss = 1.22612, acc = 0.69675


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 046/500 ] loss = 1.08321, acc = 0.78762


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 046/500 ] loss = 1.39047, acc = 0.64779


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 047/500 ] loss = 1.06714, acc = 0.79100


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 047/500 ] loss = 1.14834, acc = 0.74796
Best model found at epoch 47, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 048/500 ] loss = 1.06652, acc = 0.79568


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 048/500 ] loss = 1.21181, acc = 0.71943


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 049/500 ] loss = 1.04362, acc = 0.80434


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 049/500 ] loss = 1.27794, acc = 0.68948


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 050/500 ] loss = 1.04938, acc = 0.79996


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 050/500 ] loss = 1.35703, acc = 0.65850


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 051/500 ] loss = 1.04791, acc = 0.80364


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 051/500 ] loss = 1.20051, acc = 0.72850


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 052/500 ] loss = 1.04372, acc = 0.80374


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 052/500 ] loss = 1.24652, acc = 0.69409


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 053/500 ] loss = 1.03052, acc = 0.80573


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 053/500 ] loss = 1.17985, acc = 0.73437


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 054/500 ] loss = 1.02522, acc = 0.81111


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 054/500 ] loss = 1.22365, acc = 0.72505


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 055/500 ] loss = 1.02246, acc = 0.81230


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 055/500 ] loss = 1.30347, acc = 0.68608


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 056/500 ] loss = 1.00125, acc = 0.82205


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 056/500 ] loss = 1.33631, acc = 0.66077


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 057/500 ] loss = 1.00026, acc = 0.82315


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 057/500 ] loss = 1.17808, acc = 0.73261


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 058/500 ] loss = 0.97486, acc = 0.83499


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 058/500 ] loss = 1.19598, acc = 0.72715


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 059/500 ] loss = 0.99109, acc = 0.82633


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 059/500 ] loss = 1.18958, acc = 0.72937


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 060/500 ] loss = 0.98095, acc = 0.83260


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 060/500 ] loss = 1.21130, acc = 0.71390


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 061/500 ] loss = 0.96195, acc = 0.83768


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 061/500 ] loss = 1.21163, acc = 0.71975


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 062/500 ] loss = 0.96781, acc = 0.83459


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 062/500 ] loss = 1.18383, acc = 0.72941


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 063/500 ] loss = 0.97608, acc = 0.83181


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 063/500 ] loss = 1.26048, acc = 0.68270


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 064/500 ] loss = 0.96805, acc = 0.83420


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 064/500 ] loss = 1.19190, acc = 0.72663


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 065/500 ] loss = 0.95421, acc = 0.83927


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 065/500 ] loss = 1.22809, acc = 0.72498


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 066/500 ] loss = 0.94545, acc = 0.84654


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 066/500 ] loss = 1.16376, acc = 0.73700


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 067/500 ] loss = 0.93485, acc = 0.85330


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 067/500 ] loss = 1.25735, acc = 0.70547


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 068/500 ] loss = 0.92231, acc = 0.85947


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 068/500 ] loss = 1.14124, acc = 0.74721


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 069/500 ] loss = 0.93980, acc = 0.84853


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 069/500 ] loss = 1.45043, acc = 0.62796


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 070/500 ] loss = 0.92193, acc = 0.85589


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 070/500 ] loss = 1.11108, acc = 0.75797
Best model found at epoch 70, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 071/500 ] loss = 0.93808, acc = 0.85072


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 071/500 ] loss = 1.20875, acc = 0.71952


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 072/500 ] loss = 0.91875, acc = 0.85669


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 072/500 ] loss = 1.13904, acc = 0.75696


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 073/500 ] loss = 0.91304, acc = 0.85798


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 073/500 ] loss = 1.46398, acc = 0.64813


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 074/500 ] loss = 0.88406, acc = 0.87321


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 074/500 ] loss = 1.17038, acc = 0.73866


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 075/500 ] loss = 0.88595, acc = 0.86853


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 075/500 ] loss = 1.18584, acc = 0.73099


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 076/500 ] loss = 0.86860, acc = 0.88057


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 076/500 ] loss = 1.11840, acc = 0.76532
Best model found at epoch 76, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 077/500 ] loss = 0.86814, acc = 0.87590


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 077/500 ] loss = 1.18360, acc = 0.72032


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 078/500 ] loss = 0.86648, acc = 0.87709


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 078/500 ] loss = 1.16746, acc = 0.74133


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 079/500 ] loss = 0.88627, acc = 0.86883


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 079/500 ] loss = 1.19193, acc = 0.74200


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 080/500 ] loss = 0.85754, acc = 0.88445


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 080/500 ] loss = 1.13090, acc = 0.75733


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 081/500 ] loss = 0.86934, acc = 0.87689


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 081/500 ] loss = 1.15798, acc = 0.74383


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 082/500 ] loss = 0.85635, acc = 0.88276


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 082/500 ] loss = 1.13304, acc = 0.75095


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 083/500 ] loss = 0.85697, acc = 0.88067


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 083/500 ] loss = 1.24592, acc = 0.70995


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 084/500 ] loss = 0.84985, acc = 0.88724


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 084/500 ] loss = 1.12931, acc = 0.75084


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 085/500 ] loss = 0.83750, acc = 0.88844


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 085/500 ] loss = 1.16452, acc = 0.74291


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 086/500 ] loss = 0.83619, acc = 0.88863


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 086/500 ] loss = 1.12032, acc = 0.76263


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 087/500 ] loss = 0.84817, acc = 0.88684


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 087/500 ] loss = 1.13993, acc = 0.74958


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 088/500 ] loss = 0.84410, acc = 0.88923


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 088/500 ] loss = 1.12491, acc = 0.76069


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 089/500 ] loss = 0.82115, acc = 0.89729


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 089/500 ] loss = 1.27943, acc = 0.70556


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 090/500 ] loss = 0.82474, acc = 0.89550


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 090/500 ] loss = 1.14701, acc = 0.74732


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 091/500 ] loss = 0.82600, acc = 0.89461


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 091/500 ] loss = 1.12445, acc = 0.74988


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 092/500 ] loss = 0.83368, acc = 0.89053


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 092/500 ] loss = 1.23664, acc = 0.71728


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 093/500 ] loss = 0.82484, acc = 0.89391


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 093/500 ] loss = 1.15353, acc = 0.75598


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 094/500 ] loss = 0.82662, acc = 0.89321


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 094/500 ] loss = 1.14958, acc = 0.75669


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 095/500 ] loss = 0.82723, acc = 0.89122


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 095/500 ] loss = 1.29199, acc = 0.70181


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 096/500 ] loss = 0.83205, acc = 0.89062


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 096/500 ] loss = 1.13127, acc = 0.76450


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 097/500 ] loss = 0.82493, acc = 0.89411


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 097/500 ] loss = 1.11099, acc = 0.77238
Best model found at epoch 97, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 098/500 ] loss = 0.81233, acc = 0.89998


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 098/500 ] loss = 1.20742, acc = 0.73451


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 099/500 ] loss = 0.80991, acc = 0.89849


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 099/500 ] loss = 1.20090, acc = 0.73747


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 100/500 ] loss = 0.80381, acc = 0.90137


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 100/500 ] loss = 1.12579, acc = 0.76199


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 101/500 ] loss = 0.79175, acc = 0.90705


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 101/500 ] loss = 1.14147, acc = 0.75361


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 102/500 ] loss = 0.78628, acc = 0.90884


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 102/500 ] loss = 1.09078, acc = 0.77149


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 103/500 ] loss = 0.78961, acc = 0.90625


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 103/500 ] loss = 1.11060, acc = 0.76683


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 104/500 ] loss = 0.78834, acc = 0.91003


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 104/500 ] loss = 1.11592, acc = 0.76395


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 105/500 ] loss = 0.79014, acc = 0.90774


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 105/500 ] loss = 1.11859, acc = 0.75853


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 106/500 ] loss = 0.78802, acc = 0.90605


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 106/500 ] loss = 1.14033, acc = 0.76084


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 107/500 ] loss = 0.78724, acc = 0.90894


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 107/500 ] loss = 1.13064, acc = 0.75415


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 108/500 ] loss = 0.77335, acc = 0.91561


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 108/500 ] loss = 1.14016, acc = 0.75408


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 109/500 ] loss = 0.77817, acc = 0.91182


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 109/500 ] loss = 1.12203, acc = 0.75899


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 110/500 ] loss = 0.78580, acc = 0.90904


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 110/500 ] loss = 1.16232, acc = 0.74127


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 111/500 ] loss = 0.78285, acc = 0.90615


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 111/500 ] loss = 1.07092, acc = 0.77960
Best model found at epoch 111, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 112/500 ] loss = 0.76969, acc = 0.91371


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 112/500 ] loss = 1.09741, acc = 0.76687


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 113/500 ] loss = 0.76670, acc = 0.91531


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 113/500 ] loss = 1.14958, acc = 0.75326


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 114/500 ] loss = 0.76693, acc = 0.91710


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 114/500 ] loss = 1.08621, acc = 0.77645


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 115/500 ] loss = 0.77081, acc = 0.91511


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 115/500 ] loss = 1.13299, acc = 0.75593


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 116/500 ] loss = 0.77300, acc = 0.91162


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 116/500 ] loss = 1.10814, acc = 0.77327


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 117/500 ] loss = 0.76795, acc = 0.91531


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 117/500 ] loss = 1.16096, acc = 0.75118


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 118/500 ] loss = 0.76971, acc = 0.91501


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 118/500 ] loss = 1.11491, acc = 0.76934


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 119/500 ] loss = 0.77085, acc = 0.91511


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 119/500 ] loss = 1.06878, acc = 0.78417
Best model found at epoch 119, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 120/500 ] loss = 0.76681, acc = 0.91451


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 120/500 ] loss = 1.16746, acc = 0.74769


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 121/500 ] loss = 0.75698, acc = 0.91899


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 121/500 ] loss = 1.13551, acc = 0.75433


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 122/500 ] loss = 0.76251, acc = 0.91491


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 122/500 ] loss = 1.10029, acc = 0.77263


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 123/500 ] loss = 0.76374, acc = 0.91650


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 123/500 ] loss = 1.13707, acc = 0.75461


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 124/500 ] loss = 0.76831, acc = 0.91700


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 124/500 ] loss = 1.11942, acc = 0.76064


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 125/500 ] loss = 0.76481, acc = 0.91212


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 125/500 ] loss = 1.13725, acc = 0.75813


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 126/500 ] loss = 0.74861, acc = 0.92188


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 126/500 ] loss = 1.11883, acc = 0.76450


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 127/500 ] loss = 0.74780, acc = 0.92387


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 127/500 ] loss = 1.09698, acc = 0.77117


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 128/500 ] loss = 0.75227, acc = 0.91909


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 128/500 ] loss = 1.32977, acc = 0.69754


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 129/500 ] loss = 0.73558, acc = 0.92665


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 129/500 ] loss = 1.11749, acc = 0.76608


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 130/500 ] loss = 0.73694, acc = 0.92536


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 130/500 ] loss = 1.07487, acc = 0.77505


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 131/500 ] loss = 0.73068, acc = 0.92934


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 131/500 ] loss = 1.06392, acc = 0.78255


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 132/500 ] loss = 0.73824, acc = 0.92615


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 132/500 ] loss = 1.10889, acc = 0.76852


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 133/500 ] loss = 0.73462, acc = 0.92715


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 133/500 ] loss = 1.09254, acc = 0.77076


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 134/500 ] loss = 0.73936, acc = 0.92436


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 134/500 ] loss = 1.13194, acc = 0.75881


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 135/500 ] loss = 0.73654, acc = 0.92615


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 135/500 ] loss = 1.07507, acc = 0.78147


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 136/500 ] loss = 0.73998, acc = 0.92546


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 136/500 ] loss = 1.06879, acc = 0.78367


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 137/500 ] loss = 0.73042, acc = 0.92944


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 137/500 ] loss = 1.09253, acc = 0.77391


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 138/500 ] loss = 0.73075, acc = 0.92516


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 138/500 ] loss = 1.12725, acc = 0.75664


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 139/500 ] loss = 0.73773, acc = 0.92436


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 139/500 ] loss = 1.15338, acc = 0.75296


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 140/500 ] loss = 0.73705, acc = 0.92446


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 140/500 ] loss = 1.08300, acc = 0.77642


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 141/500 ] loss = 0.72971, acc = 0.92844


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 141/500 ] loss = 1.06389, acc = 0.78316


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 142/500 ] loss = 0.72809, acc = 0.92884


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 142/500 ] loss = 1.05465, acc = 0.78753
Best model found at epoch 142, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 143/500 ] loss = 0.72629, acc = 0.93083


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 143/500 ] loss = 1.04844, acc = 0.78060


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 144/500 ] loss = 0.73006, acc = 0.92586


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 144/500 ] loss = 1.07250, acc = 0.78207


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 145/500 ] loss = 0.72908, acc = 0.92785


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 145/500 ] loss = 1.06444, acc = 0.78479


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 146/500 ] loss = 0.72862, acc = 0.92705


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 146/500 ] loss = 1.08290, acc = 0.77908


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 147/500 ] loss = 0.72919, acc = 0.92705


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 147/500 ] loss = 1.07687, acc = 0.77825


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 148/500 ] loss = 0.72393, acc = 0.93063


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 148/500 ] loss = 1.05256, acc = 0.78531


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 149/500 ] loss = 0.72082, acc = 0.93213


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 149/500 ] loss = 1.09760, acc = 0.77469


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 150/500 ] loss = 0.72276, acc = 0.92735


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 150/500 ] loss = 1.10481, acc = 0.76864


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 151/500 ] loss = 0.72589, acc = 0.92924


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 151/500 ] loss = 1.11815, acc = 0.76306


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 152/500 ] loss = 0.71778, acc = 0.93203


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 152/500 ] loss = 1.06057, acc = 0.78342


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 153/500 ] loss = 0.72812, acc = 0.92705


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 153/500 ] loss = 1.05866, acc = 0.78424


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 154/500 ] loss = 0.71401, acc = 0.93412


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 154/500 ] loss = 1.05962, acc = 0.78760
Best model found at epoch 154, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 155/500 ] loss = 0.72518, acc = 0.93033


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 155/500 ] loss = 1.05715, acc = 0.78595


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 156/500 ] loss = 0.72025, acc = 0.92924


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 156/500 ] loss = 1.08861, acc = 0.77789


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 157/500 ] loss = 0.72351, acc = 0.92894


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 157/500 ] loss = 1.08324, acc = 0.77604


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 158/500 ] loss = 0.71021, acc = 0.93352


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 158/500 ] loss = 1.11749, acc = 0.76578


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 159/500 ] loss = 0.72239, acc = 0.93043


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 159/500 ] loss = 1.07420, acc = 0.77828


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 160/500 ] loss = 0.72897, acc = 0.92874


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 160/500 ] loss = 1.06422, acc = 0.79004
Best model found at epoch 160, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 161/500 ] loss = 0.71467, acc = 0.93302


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 161/500 ] loss = 1.05608, acc = 0.78609


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 162/500 ] loss = 0.70730, acc = 0.93561


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 162/500 ] loss = 1.10725, acc = 0.76729


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 163/500 ] loss = 0.72460, acc = 0.92964


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 163/500 ] loss = 1.06469, acc = 0.78504


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 164/500 ] loss = 0.71840, acc = 0.93133


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 164/500 ] loss = 1.07446, acc = 0.77382


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 165/500 ] loss = 0.70913, acc = 0.93680


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 165/500 ] loss = 1.07316, acc = 0.78186


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 166/500 ] loss = 0.70868, acc = 0.93352


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 166/500 ] loss = 1.14681, acc = 0.75854


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 167/500 ] loss = 0.71021, acc = 0.93422


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 167/500 ] loss = 1.08578, acc = 0.78312


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 168/500 ] loss = 0.71158, acc = 0.93392


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 168/500 ] loss = 1.04651, acc = 0.79178
Best model found at epoch 168, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 169/500 ] loss = 0.71290, acc = 0.93183


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 169/500 ] loss = 1.08150, acc = 0.77419


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 170/500 ] loss = 0.70166, acc = 0.93859


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 170/500 ] loss = 1.02476, acc = 0.80283
Best model found at epoch 170, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 171/500 ] loss = 0.69970, acc = 0.93850


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 171/500 ] loss = 1.10416, acc = 0.76809


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 172/500 ] loss = 0.68734, acc = 0.94258


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 172/500 ] loss = 1.07364, acc = 0.77574


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 173/500 ] loss = 0.71253, acc = 0.93173


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 173/500 ] loss = 1.07881, acc = 0.78083


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 174/500 ] loss = 0.69535, acc = 0.93760


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 174/500 ] loss = 1.04718, acc = 0.79438


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 175/500 ] loss = 0.69574, acc = 0.94148


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 175/500 ] loss = 1.06680, acc = 0.78869


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 176/500 ] loss = 0.69322, acc = 0.94138


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 176/500 ] loss = 1.08288, acc = 0.77279


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 177/500 ] loss = 0.69543, acc = 0.93979


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 177/500 ] loss = 1.06933, acc = 0.78077


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 178/500 ] loss = 0.69317, acc = 0.93989


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 178/500 ] loss = 1.07298, acc = 0.78090


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 179/500 ] loss = 0.69363, acc = 0.94029


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 179/500 ] loss = 1.06098, acc = 0.78469


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 180/500 ] loss = 0.69306, acc = 0.93909


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 180/500 ] loss = 1.04057, acc = 0.79632


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 181/500 ] loss = 0.69547, acc = 0.93790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 181/500 ] loss = 1.06230, acc = 0.78584


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 182/500 ] loss = 0.68778, acc = 0.94078


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 182/500 ] loss = 1.08893, acc = 0.77675


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 183/500 ] loss = 0.68510, acc = 0.94327


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 183/500 ] loss = 1.04916, acc = 0.79203


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 184/500 ] loss = 0.68579, acc = 0.94178


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 184/500 ] loss = 1.06701, acc = 0.78691


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 185/500 ] loss = 0.68963, acc = 0.94387


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 185/500 ] loss = 1.09639, acc = 0.77268


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 186/500 ] loss = 0.68805, acc = 0.94258


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 186/500 ] loss = 1.11655, acc = 0.76763


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 187/500 ] loss = 0.68763, acc = 0.94248


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 187/500 ] loss = 1.06170, acc = 0.78648


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 188/500 ] loss = 0.68643, acc = 0.94218


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 188/500 ] loss = 1.06711, acc = 0.78403


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 189/500 ] loss = 0.68981, acc = 0.93959


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 189/500 ] loss = 1.05289, acc = 0.78862


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 190/500 ] loss = 0.69156, acc = 0.94198


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 190/500 ] loss = 1.06642, acc = 0.78497


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 191/500 ] loss = 0.69506, acc = 0.93959


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 191/500 ] loss = 1.04332, acc = 0.79116


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 192/500 ] loss = 0.68179, acc = 0.94228


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 192/500 ] loss = 1.06483, acc = 0.78472


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 193/500 ] loss = 0.68913, acc = 0.94258


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 193/500 ] loss = 1.06992, acc = 0.78526


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 194/500 ] loss = 0.68239, acc = 0.94367


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 194/500 ] loss = 1.05522, acc = 0.79064


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 195/500 ] loss = 0.67934, acc = 0.94467


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 195/500 ] loss = 1.04900, acc = 0.79013


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 196/500 ] loss = 0.68148, acc = 0.94377


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 196/500 ] loss = 1.04942, acc = 0.80178


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 197/500 ] loss = 0.67555, acc = 0.94705


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 197/500 ] loss = 1.03931, acc = 0.79826


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 198/500 ] loss = 0.67936, acc = 0.94367


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 198/500 ] loss = 1.04373, acc = 0.79639


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 199/500 ] loss = 0.67459, acc = 0.94586


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 199/500 ] loss = 1.03062, acc = 0.80345
Best model found at epoch 199, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 200/500 ] loss = 0.67213, acc = 0.94686


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 200/500 ] loss = 1.06018, acc = 0.79319


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 201/500 ] loss = 0.67179, acc = 0.94616


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 201/500 ] loss = 1.06457, acc = 0.79118


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 202/500 ] loss = 0.67800, acc = 0.94765


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 202/500 ] loss = 1.04224, acc = 0.80073


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 203/500 ] loss = 0.67184, acc = 0.94755


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 203/500 ] loss = 1.03214, acc = 0.79997


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 204/500 ] loss = 0.67043, acc = 0.94666


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 204/500 ] loss = 1.04868, acc = 0.79376


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 205/500 ] loss = 0.65941, acc = 0.95173


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 205/500 ] loss = 1.04365, acc = 0.78952


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 206/500 ] loss = 0.67257, acc = 0.94835


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 206/500 ] loss = 1.03296, acc = 0.80091


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 207/500 ] loss = 0.66626, acc = 0.95104


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 207/500 ] loss = 1.03940, acc = 0.79619


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 208/500 ] loss = 0.67416, acc = 0.94835


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 208/500 ] loss = 1.02999, acc = 0.79810


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 209/500 ] loss = 0.66401, acc = 0.95243


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 209/500 ] loss = 1.04905, acc = 0.79077


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 210/500 ] loss = 0.67106, acc = 0.94785


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 210/500 ] loss = 1.05191, acc = 0.79580


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 211/500 ] loss = 0.66828, acc = 0.94904


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 211/500 ] loss = 1.03945, acc = 0.79906


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 212/500 ] loss = 0.66583, acc = 0.95074


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 212/500 ] loss = 1.04247, acc = 0.79630


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 213/500 ] loss = 0.66496, acc = 0.95133


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 213/500 ] loss = 1.02271, acc = 0.80103


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 214/500 ] loss = 0.65989, acc = 0.95382


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 214/500 ] loss = 1.04108, acc = 0.79415


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 215/500 ] loss = 0.66755, acc = 0.94815


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 215/500 ] loss = 1.03425, acc = 0.79856


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 216/500 ] loss = 0.67041, acc = 0.94775


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 216/500 ] loss = 1.02696, acc = 0.80610
Best model found at epoch 216, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 217/500 ] loss = 0.67282, acc = 0.94556


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 217/500 ] loss = 1.07236, acc = 0.78449


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 218/500 ] loss = 0.67200, acc = 0.94686


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 218/500 ] loss = 1.03156, acc = 0.80445


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 219/500 ] loss = 0.66187, acc = 0.95123


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 219/500 ] loss = 1.02801, acc = 0.80014


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 220/500 ] loss = 0.66591, acc = 0.95024


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 220/500 ] loss = 1.03237, acc = 0.79557


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 221/500 ] loss = 0.66153, acc = 0.95034


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 221/500 ] loss = 1.02120, acc = 0.80066


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 222/500 ] loss = 0.66369, acc = 0.94934


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 222/500 ] loss = 1.01871, acc = 0.80539


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 223/500 ] loss = 0.66349, acc = 0.94944


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 223/500 ] loss = 1.02153, acc = 0.80932
Best model found at epoch 223, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 224/500 ] loss = 0.64731, acc = 0.95810


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 224/500 ] loss = 1.02597, acc = 0.80541


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 225/500 ] loss = 0.65051, acc = 0.95521


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 225/500 ] loss = 1.03137, acc = 0.80178


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 226/500 ] loss = 0.66545, acc = 0.95104


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 226/500 ] loss = 1.01271, acc = 0.80459


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 227/500 ] loss = 0.65172, acc = 0.95442


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 227/500 ] loss = 1.00290, acc = 0.80975
Best model found at epoch 227, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 228/500 ] loss = 0.66282, acc = 0.95024


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 228/500 ] loss = 1.03147, acc = 0.80276


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 229/500 ] loss = 0.65447, acc = 0.95233


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 229/500 ] loss = 1.00715, acc = 0.80850


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 230/500 ] loss = 0.65417, acc = 0.95492


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 230/500 ] loss = 1.02522, acc = 0.80635


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 231/500 ] loss = 0.64698, acc = 0.95780


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 231/500 ] loss = 1.04662, acc = 0.79555


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 232/500 ] loss = 0.65674, acc = 0.95342


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 232/500 ] loss = 1.06629, acc = 0.78750


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 233/500 ] loss = 0.65433, acc = 0.95342


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 233/500 ] loss = 1.00459, acc = 0.80612


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 234/500 ] loss = 0.65000, acc = 0.95531


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 234/500 ] loss = 1.05416, acc = 0.79635


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 235/500 ] loss = 0.65808, acc = 0.95203


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 235/500 ] loss = 1.03380, acc = 0.79824


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 236/500 ] loss = 0.64808, acc = 0.95502


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 236/500 ] loss = 1.02964, acc = 0.79909


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 237/500 ] loss = 0.65287, acc = 0.95293


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 237/500 ] loss = 1.03981, acc = 0.79493


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 238/500 ] loss = 0.64983, acc = 0.95521


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 238/500 ] loss = 1.01197, acc = 0.80284


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 239/500 ] loss = 0.65184, acc = 0.95372


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 239/500 ] loss = 1.03360, acc = 0.79996


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 240/500 ] loss = 0.64733, acc = 0.95631


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 240/500 ] loss = 1.02761, acc = 0.79852


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 241/500 ] loss = 0.64184, acc = 0.95800


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 241/500 ] loss = 1.03732, acc = 0.79552


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 242/500 ] loss = 0.65583, acc = 0.95253


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 242/500 ] loss = 1.01591, acc = 0.80267


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 243/500 ] loss = 0.64803, acc = 0.95601


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 243/500 ] loss = 1.02733, acc = 0.79589


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 244/500 ] loss = 0.64983, acc = 0.95462


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 244/500 ] loss = 1.00794, acc = 0.81533
Best model found at epoch 244, saving model


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 245/500 ] loss = 0.65660, acc = 0.95372


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 245/500 ] loss = 1.00946, acc = 0.80541


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 246/500 ] loss = 0.64163, acc = 0.95790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 246/500 ] loss = 1.02447, acc = 0.80875


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 247/500 ] loss = 0.64247, acc = 0.95790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 247/500 ] loss = 1.02539, acc = 0.80231


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 248/500 ] loss = 0.64877, acc = 0.95611


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 248/500 ] loss = 1.01696, acc = 0.80599


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 249/500 ] loss = 0.64138, acc = 0.95840


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 249/500 ] loss = 1.01449, acc = 0.80375


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 250/500 ] loss = 0.64791, acc = 0.95412


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 250/500 ] loss = 1.02350, acc = 0.80786


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 251/500 ] loss = 0.63752, acc = 0.95999


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 251/500 ] loss = 1.01973, acc = 0.80377


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 252/500 ] loss = 0.64183, acc = 0.95810


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 252/500 ] loss = 1.01623, acc = 0.80221


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 253/500 ] loss = 0.63929, acc = 0.95959


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 253/500 ] loss = 1.03567, acc = 0.79118


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 254/500 ] loss = 0.64396, acc = 0.95790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 254/500 ] loss = 1.00719, acc = 0.81053


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 255/500 ] loss = 0.64305, acc = 0.95482


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 255/500 ] loss = 1.03943, acc = 0.79993


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 256/500 ] loss = 0.64635, acc = 0.95651


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 256/500 ] loss = 1.02817, acc = 0.79692


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 257/500 ] loss = 0.64556, acc = 0.95671


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 257/500 ] loss = 1.01981, acc = 0.80619


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 258/500 ] loss = 0.64092, acc = 0.95760


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 258/500 ] loss = 1.03506, acc = 0.80123


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 259/500 ] loss = 0.63739, acc = 0.95910


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 259/500 ] loss = 1.00533, acc = 0.81181


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 260/500 ] loss = 0.64440, acc = 0.95701


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 260/500 ] loss = 1.01149, acc = 0.81030


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 261/500 ] loss = 0.63517, acc = 0.95959


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 261/500 ] loss = 1.02277, acc = 0.80489


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 262/500 ] loss = 0.63292, acc = 0.96069


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 262/500 ] loss = 1.02148, acc = 0.80203


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 263/500 ] loss = 0.63386, acc = 0.96188


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 263/500 ] loss = 1.01086, acc = 0.81001


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 264/500 ] loss = 0.64913, acc = 0.95332


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 264/500 ] loss = 1.01658, acc = 0.80831


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 265/500 ] loss = 0.63784, acc = 0.95691


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 265/500 ] loss = 1.03465, acc = 0.80144


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 266/500 ] loss = 0.64033, acc = 0.95790


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 266/500 ] loss = 1.01999, acc = 0.80509


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 267/500 ] loss = 0.63937, acc = 0.95800


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 267/500 ] loss = 1.01743, acc = 0.80235


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 268/500 ] loss = 0.63667, acc = 0.95850


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 268/500 ] loss = 1.01782, acc = 0.80327


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 269/500 ] loss = 0.63794, acc = 0.95780


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 269/500 ] loss = 1.02198, acc = 0.80587


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 270/500 ] loss = 0.63454, acc = 0.96148


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 270/500 ] loss = 1.01555, acc = 0.80731


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 271/500 ] loss = 0.63992, acc = 0.95870


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 271/500 ] loss = 1.01361, acc = 0.80436


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 272/500 ] loss = 0.64057, acc = 0.95770


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 272/500 ] loss = 1.01995, acc = 0.80414


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 273/500 ] loss = 0.63261, acc = 0.96188


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 273/500 ] loss = 1.00775, acc = 0.80896


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 274/500 ] loss = 0.63361, acc = 0.96129


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 274/500 ] loss = 1.01108, acc = 0.80968


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 275/500 ] loss = 0.62918, acc = 0.96318


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 275/500 ] loss = 1.01821, acc = 0.80925


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 276/500 ] loss = 0.63216, acc = 0.96129


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 276/500 ] loss = 1.01341, acc = 0.80432


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 277/500 ] loss = 0.63065, acc = 0.96109


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 277/500 ] loss = 1.01629, acc = 0.80064


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 278/500 ] loss = 0.63037, acc = 0.96009


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 278/500 ] loss = 1.00795, acc = 0.80930


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 279/500 ] loss = 0.63444, acc = 0.96059


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 279/500 ] loss = 1.01605, acc = 0.80811


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 280/500 ] loss = 0.62596, acc = 0.96447


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 280/500 ] loss = 1.02028, acc = 0.80404


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 281/500 ] loss = 0.63460, acc = 0.96069


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 281/500 ] loss = 1.01540, acc = 0.80754


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 282/500 ] loss = 0.62648, acc = 0.96467


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 282/500 ] loss = 1.00162, acc = 0.81368


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 283/500 ] loss = 0.62491, acc = 0.96387


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 283/500 ] loss = 1.00660, acc = 0.80701


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 284/500 ] loss = 0.63324, acc = 0.96089


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 284/500 ] loss = 1.02262, acc = 0.80240


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 285/500 ] loss = 0.62758, acc = 0.96198


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 285/500 ] loss = 1.01541, acc = 0.80281


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 286/500 ] loss = 0.63695, acc = 0.95939


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 286/500 ] loss = 1.01015, acc = 0.80719


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 287/500 ] loss = 0.62440, acc = 0.96467


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 287/500 ] loss = 1.01805, acc = 0.81072


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 288/500 ] loss = 0.62946, acc = 0.96109


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 288/500 ] loss = 1.00756, acc = 0.80530


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 289/500 ] loss = 0.62737, acc = 0.96278


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 289/500 ] loss = 0.99661, acc = 0.80968


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 290/500 ] loss = 0.63919, acc = 0.95750


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 290/500 ] loss = 1.01266, acc = 0.80041


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 291/500 ] loss = 0.62310, acc = 0.96348


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 291/500 ] loss = 1.00187, acc = 0.80587


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 292/500 ] loss = 0.63536, acc = 0.95930


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 292/500 ] loss = 1.02241, acc = 0.79826


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 293/500 ] loss = 0.63119, acc = 0.96139


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 293/500 ] loss = 1.00610, acc = 0.80704


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 294/500 ] loss = 0.62808, acc = 0.96248


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 294/500 ] loss = 1.01001, acc = 0.80893


  0%|          | 0/157 [00:00<?, ?it/s]

[ Train | 295/500 ] loss = 0.63332, acc = 0.96059


  0%|          | 0/57 [00:00<?, ?it/s]

[ Valid | 295/500 ] loss = 1.00552, acc = 0.80740
No improvment 50 consecutive epochs, early stopping


# Dataloader for test

In [12]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)
# Test Time Augmentation
test_set_tta = FoodDataset("/kaggle/input/ml2023spring-hw3/test", tfm=train_tfm)
test_loader_tta = DataLoader(test_set_tta, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [13]:
model_best = Classifier().to(device)
# model_best = torchvision.models.resnet50(weights=None)
# model_best.fc = nn.Linear(in_features=2048, out_features=11, bias=True) # 原始为一千分类，改为十一分类
# model_best.to(device)
# model_best = ResNet50().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
# with torch.no_grad():
#     for data,_ in tqdm(test_loader):
#         test_pred = model_best(data.to(device))
#         test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
#         prediction += test_label.squeeze().tolist()
        
        
pred = np.empty((0, 11))
pred_tta = np.empty((0, 11))
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        pred = np.concatenate((pred, test_pred.cpu().data.numpy()))
        # test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        # prediction += test_label.squeeze().tolist()
    for data,_ in tqdm(test_loader_tta):
        test_pred = model_best(data.to(device))
        pred_tta = np.concatenate((pred_tta, test_pred.cpu().data.numpy()))
    sum_pred = pred * 0.8 + pred_tta * 0.2
    test_label = np.argmax(sum_pred, axis=1)
    prediction = test_label.squeeze().tolist()

  0%|          | 0/47 [00:00<?, ?it/s]

  0%|          | 0/47 [00:00<?, ?it/s]

In [14]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction
df.to_csv(_exp_name+".csv",index = False)