# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework
https://www.kaggle.com/c/ml2022spring-hw3b/code?competitionId=34954&sortBy=dateCreated

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [1]:
# ! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip

In [2]:
# ! unzip food11.zip

In [3]:
!nvidia-smi

Fri Mar 11 22:56:03 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 456.71       Driver Version: 456.71       CUDA Version: 11.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208... WDDM  | 00000000:01:00.0  On |                  N/A |
| 28%   36C    P0    53W / 250W |    679MiB /  8192MiB |      5%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       

## log To Wandb

In [4]:
logToWandb = False
if logToWandb: 
    import wandb

# Training

In [5]:
_exp_name = "Restnet7"

In [6]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

## Hyper-parameters

In [7]:
batch_size = 64
lr_rate = 1e-3
weight_decay = 1e-5
n_layer = 6

# The number of training epochs and patience.
n_epochs = 3000
patience = 35 # If no improvement in 'patience' epochs, early stop

myseed = np.random.randint(1, 1000)  # set a random seed for reproducibility

In [8]:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [9]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0),
    transforms.RandomPerspective(distortion_scale=0.6, p=0.6),
    transforms.RandomAffine(degrees=(-30, 30), translate=(0, 0.4), scale=(0.8, 1.5)),
    
    transforms.ToTensor(),
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [10]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            label = int(fname.split("\\")[-1].split("_")[0]) #超級大bug!!!
        except:
            label = -1 # test has no label
        return im,label



In [11]:
# class Classifier(nn.Module):
#     def __init__(self):
#         super(Classifier, self).__init__()
#         # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
#         # torch.nn.MaxPool2d(kernel_size, stride, padding)
#         # input 維度 [3, 128, 128]
#         self.cnn = nn.Sequential(
#             nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

#             nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

#             nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
#             nn.BatchNorm2d(256),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

#             nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
#             nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
#         )
#         self.fc = nn.Sequential(
#             nn.Linear(512*4*4, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
#             nn.Linear(512, 11)
#         )

#     def forward(self, x):
#         out = self.cnn(x)
#         out = out.view(out.size()[0], -1)
#         return self.fc(out)

# class Classifier(nn.Module):
#     def __init__(self):
#         super(Classifier, self).__init__()
#         # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
#         # torch.nn.MaxPool2d(kernel_size, stride, padding)
#         # input 維度 [3, 128, 128]
#         self.cnn = nn.Sequential(
#             nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
#             nn.BatchNorm2d(64),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

#             nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
#             nn.BatchNorm2d(128),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

#             nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
#             nn.BatchNorm2d(256),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

#             nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
#             nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
#             nn.BatchNorm2d(512),
#             nn.ReLU(),
#             nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
#         )
#         self.fc = nn.Sequential(
#             nn.Linear(512*4*4, 1024),
#             nn.ReLU(),
#             nn.Linear(1024, 512),
#             nn.ReLU(),
#             nn.Linear(512, 11)
#         )

#     def forward(self, x):
#         out = self.cnn(x)
#         out = out.view(out.size()[0], -1)
#         return self.fc(out)

# https://github.com/itchencheng/pytorch-residual-networks
class ResBlockA(nn.Module):

    def __init__(self, in_chann, chann, stride):
        super(ResBlockA, self).__init__()

        self.conv1 = nn.Conv2d(in_chann, chann, kernel_size=3, padding=1, stride=stride)
        self.bn1   = nn.BatchNorm2d(chann)
        
        self.conv2 = nn.Conv2d(chann, chann, kernel_size=3, padding=1, stride=1)
        self.bn2   = nn.BatchNorm2d(chann)

    def forward(self, x):
        y = self.conv1(x)
        y = self.bn1(y)
        y = nn.functional.relu(y)
        
        y = self.conv2(y)
        y = self.bn2(y)
        
        if (x.shape == y.shape):
            z = x
        else:
            z = nn.functional.avg_pool2d(x, kernel_size=2, stride=2)            

            x_channel = x.size(1)
            y_channel = y.size(1)
            ch_res = (y_channel - x_channel)//2

            pad = (0, 0, 0, 0, ch_res, ch_res)
            z = nn.functional.pad(z, pad=pad, mode="constant", value=0)

        z = z + y
        z = nn.functional.relu(z)
        return z


class BaseNet(nn.Module):
    
    def __init__(self, Block, n):
        super(BaseNet, self).__init__()
        self.Block = Block
        self.conv0 = nn.Conv2d(3, 16, kernel_size=3, padding=1) # [64, 16, 128, 128]
        self.bn0   = nn.BatchNorm2d(16) # [64, 16, 128, 128]
        self.convs  = self._make_layers(n) # [64, 64, 32, 32]
        self.avgpool = nn.AvgPool2d(kernel_size=8, stride=1) # [64, 64, 25, 25]
        self.fc = nn.Linear(64*25*25, 11)

    def forward(self, x):
        x = self.conv0(x)
        x = self.bn0(x)
        x = nn.functional.relu(x)
        
        x = self.convs(x)
        
        x = self.avgpool(x)

        x = x.view(x.size(0),-1)
        x = self.fc(x)
        
        return x

    def _make_layers(self, n):
        layers = []
        in_chann = 16
        chann = 16
        stride = 1
        for i in range(3):
            for j in range(n):
                if ((i > 0) and (j == 0)):
                    in_chann = chann
                    chann = chann * 2
                    stride = 2

                layers += [self.Block(in_chann, chann, stride)]

                stride = 1
                in_chann = chann

        return nn.Sequential(*layers)


def ResNet(n):
    return BaseNet(ResBlockA, n)

In [12]:
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11\training sample ./food11\training\0_0.jpg
One ./food11\validation sample ./food11\validation\0_0.jpg


In [13]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(device)

# Initialize a model, and put it on the device specified.
model = ResNet(n_layer).to(device)

if logToWandb:
    wandb.init(
      # Set the project where this run will be logged
      project="ML2022Spring_HW3",
      name= '{} bat={} L={} lr={} wd={} {}'.format(myseed, batch_size, n_layer, lr_rate, weight_decay, _exp_name)
    )
    wandb.watch(model)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=lr_rate, weight_decay=weight_decay) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break
            
    if logToWandb:
        wandb.log({'Acc/train': train_acc, 'epoch':epoch})
        wandb.log({'Loss/train': train_loss, 'epoch':epoch})
        wandb.log({'Acc/valid': valid_acc, 'epoch':epoch})
        wandb.log({'Loss/valid': valid_loss, 'epoch':epoch})
        wandb.log({'early_stop_count': stale})

cuda


[34m[1mwandb[0m: Currently logged in as: [33myuting[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 001/3000 ] loss = 6.60207, acc = 0.17433


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 001/3000 ] loss = 2.17320, acc = 0.20663
[ Valid | 001/3000 ] loss = 2.17320, acc = 0.20663 -> best
Best model found at epoch 0, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 002/3000 ] loss = 2.11762, acc = 0.25298


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 002/3000 ] loss = 2.02453, acc = 0.26847
[ Valid | 002/3000 ] loss = 2.02453, acc = 0.26847 -> best
Best model found at epoch 1, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 003/3000 ] loss = 2.06168, acc = 0.27385


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 003/3000 ] loss = 1.90049, acc = 0.33984
[ Valid | 003/3000 ] loss = 1.90049, acc = 0.33984 -> best
Best model found at epoch 2, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 004/3000 ] loss = 2.00139, acc = 0.29407


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 004/3000 ] loss = 1.97832, acc = 0.32672
[ Valid | 004/3000 ] loss = 1.97832, acc = 0.32672


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 005/3000 ] loss = 1.94970, acc = 0.32119


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 005/3000 ] loss = 2.37824, acc = 0.28379
[ Valid | 005/3000 ] loss = 2.37824, acc = 0.28379


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 006/3000 ] loss = 1.89922, acc = 0.34153


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 006/3000 ] loss = 2.14002, acc = 0.33243
[ Valid | 006/3000 ] loss = 2.14002, acc = 0.33243


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 007/3000 ] loss = 1.87832, acc = 0.35321


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 007/3000 ] loss = 1.85110, acc = 0.36330
[ Valid | 007/3000 ] loss = 1.85110, acc = 0.36330 -> best
Best model found at epoch 6, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 008/3000 ] loss = 1.82821, acc = 0.36683


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 008/3000 ] loss = 1.74898, acc = 0.39726
[ Valid | 008/3000 ] loss = 1.74898, acc = 0.39726 -> best
Best model found at epoch 7, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 009/3000 ] loss = 1.80926, acc = 0.37044


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 009/3000 ] loss = 1.85793, acc = 0.37284
[ Valid | 009/3000 ] loss = 1.85793, acc = 0.37284


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 010/3000 ] loss = 1.77683, acc = 0.37437


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 010/3000 ] loss = 1.74315, acc = 0.42376
[ Valid | 010/3000 ] loss = 1.74315, acc = 0.42376 -> best
Best model found at epoch 9, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 011/3000 ] loss = 1.75343, acc = 0.39714


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 011/3000 ] loss = 1.85826, acc = 0.39824
[ Valid | 011/3000 ] loss = 1.85826, acc = 0.39824


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 012/3000 ] loss = 1.72117, acc = 0.40897


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 012/3000 ] loss = 1.65629, acc = 0.43045
[ Valid | 012/3000 ] loss = 1.65629, acc = 0.43045 -> best
Best model found at epoch 11, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 013/3000 ] loss = 1.69881, acc = 0.42022


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 013/3000 ] loss = 2.62092, acc = 0.30227
[ Valid | 013/3000 ] loss = 2.62092, acc = 0.30227


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 014/3000 ] loss = 1.66039, acc = 0.43482


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 014/3000 ] loss = 1.58703, acc = 0.47474
[ Valid | 014/3000 ] loss = 1.58703, acc = 0.47474 -> best
Best model found at epoch 13, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 015/3000 ] loss = 1.63801, acc = 0.43669


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 015/3000 ] loss = 1.80718, acc = 0.43683
[ Valid | 015/3000 ] loss = 1.80718, acc = 0.43683


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 016/3000 ] loss = 1.60334, acc = 0.44865


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 016/3000 ] loss = 1.80858, acc = 0.43480
[ Valid | 016/3000 ] loss = 1.80858, acc = 0.43480


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 017/3000 ] loss = 1.58576, acc = 0.46226


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 017/3000 ] loss = 2.10937, acc = 0.40978
[ Valid | 017/3000 ] loss = 2.10937, acc = 0.40978


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 018/3000 ] loss = 1.56523, acc = 0.46919


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 018/3000 ] loss = 1.74680, acc = 0.44810
[ Valid | 018/3000 ] loss = 1.74680, acc = 0.44810


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 019/3000 ] loss = 1.54952, acc = 0.46935


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 019/3000 ] loss = 1.66729, acc = 0.44578
[ Valid | 019/3000 ] loss = 1.66729, acc = 0.44578


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 020/3000 ] loss = 1.51771, acc = 0.48442


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 020/3000 ] loss = 1.66959, acc = 0.49161
[ Valid | 020/3000 ] loss = 1.66959, acc = 0.49161 -> best
Best model found at epoch 19, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 021/3000 ] loss = 1.48992, acc = 0.49224


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 021/3000 ] loss = 1.38895, acc = 0.55548
[ Valid | 021/3000 ] loss = 1.38895, acc = 0.55548 -> best
Best model found at epoch 20, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 022/3000 ] loss = 1.47131, acc = 0.49230


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 022/3000 ] loss = 1.43930, acc = 0.53289
[ Valid | 022/3000 ] loss = 1.43930, acc = 0.53289


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 023/3000 ] loss = 1.44037, acc = 0.51315


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 023/3000 ] loss = 1.31619, acc = 0.57573
[ Valid | 023/3000 ] loss = 1.31619, acc = 0.57573 -> best
Best model found at epoch 22, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 024/3000 ] loss = 2.07301, acc = 0.49456


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 024/3000 ] loss = 1.36909, acc = 0.55173
[ Valid | 024/3000 ] loss = 1.36909, acc = 0.55173


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 025/3000 ] loss = 1.39776, acc = 0.52319


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 025/3000 ] loss = 1.33803, acc = 0.57872
[ Valid | 025/3000 ] loss = 1.33803, acc = 0.57872 -> best
Best model found at epoch 24, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 026/3000 ] loss = 1.36806, acc = 0.53659


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 026/3000 ] loss = 1.42688, acc = 0.53850
[ Valid | 026/3000 ] loss = 1.42688, acc = 0.53850


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 027/3000 ] loss = 1.36354, acc = 0.54532


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 027/3000 ] loss = 1.57803, acc = 0.49747
[ Valid | 027/3000 ] loss = 1.57803, acc = 0.49747


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 028/3000 ] loss = 1.34201, acc = 0.54379


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 028/3000 ] loss = 1.28969, acc = 0.57633
[ Valid | 028/3000 ] loss = 1.28969, acc = 0.57633


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 029/3000 ] loss = 1.30848, acc = 0.55700


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 029/3000 ] loss = 1.30168, acc = 0.57937
[ Valid | 029/3000 ] loss = 1.30168, acc = 0.57937 -> best
Best model found at epoch 28, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 030/3000 ] loss = 1.30138, acc = 0.55468


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 030/3000 ] loss = 1.30524, acc = 0.59185
[ Valid | 030/3000 ] loss = 1.30524, acc = 0.59185 -> best
Best model found at epoch 29, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 031/3000 ] loss = 1.27993, acc = 0.56996


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 031/3000 ] loss = 1.25511, acc = 0.57753
[ Valid | 031/3000 ] loss = 1.25511, acc = 0.57753


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 032/3000 ] loss = 1.28291, acc = 0.56794


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 032/3000 ] loss = 1.52914, acc = 0.54264
[ Valid | 032/3000 ] loss = 1.52914, acc = 0.54264


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 033/3000 ] loss = 1.24707, acc = 0.58391


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 033/3000 ] loss = 1.25929, acc = 0.59983
[ Valid | 033/3000 ] loss = 1.25929, acc = 0.59983 -> best
Best model found at epoch 32, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 034/3000 ] loss = 1.21951, acc = 0.58272


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 034/3000 ] loss = 1.69724, acc = 0.53328
[ Valid | 034/3000 ] loss = 1.69724, acc = 0.53328


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 035/3000 ] loss = 1.20763, acc = 0.59544


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 035/3000 ] loss = 1.55350, acc = 0.55047
[ Valid | 035/3000 ] loss = 1.55350, acc = 0.55047


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 036/3000 ] loss = 1.21719, acc = 0.58895


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 036/3000 ] loss = 1.13406, acc = 0.64780
[ Valid | 036/3000 ] loss = 1.13406, acc = 0.64780 -> best
Best model found at epoch 35, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 037/3000 ] loss = 1.21598, acc = 0.59095


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 037/3000 ] loss = 1.16736, acc = 0.61210
[ Valid | 037/3000 ] loss = 1.16736, acc = 0.61210


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 038/3000 ] loss = 1.19099, acc = 0.60143


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 038/3000 ] loss = 1.19381, acc = 0.62599
[ Valid | 038/3000 ] loss = 1.19381, acc = 0.62599


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 039/3000 ] loss = 1.15427, acc = 0.61119


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 039/3000 ] loss = 1.25520, acc = 0.60053
[ Valid | 039/3000 ] loss = 1.25520, acc = 0.60053


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 040/3000 ] loss = 1.14424, acc = 0.61544


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 040/3000 ] loss = 1.26842, acc = 0.62308
[ Valid | 040/3000 ] loss = 1.26842, acc = 0.62308


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 041/3000 ] loss = 1.13953, acc = 0.61851


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 041/3000 ] loss = 1.40077, acc = 0.56414
[ Valid | 041/3000 ] loss = 1.40077, acc = 0.56414


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 042/3000 ] loss = 1.13776, acc = 0.61974


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 042/3000 ] loss = 1.56939, acc = 0.57107
[ Valid | 042/3000 ] loss = 1.56939, acc = 0.57107


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 043/3000 ] loss = 1.13248, acc = 0.62312


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 043/3000 ] loss = 1.11853, acc = 0.63967
[ Valid | 043/3000 ] loss = 1.11853, acc = 0.63967


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 044/3000 ] loss = 1.10367, acc = 0.62528


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 044/3000 ] loss = 1.87356, acc = 0.49365
[ Valid | 044/3000 ] loss = 1.87356, acc = 0.49365


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 045/3000 ] loss = 1.11654, acc = 0.62323


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 045/3000 ] loss = 1.09716, acc = 0.64809
[ Valid | 045/3000 ] loss = 1.09716, acc = 0.64809 -> best
Best model found at epoch 44, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 046/3000 ] loss = 1.08386, acc = 0.63831


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 046/3000 ] loss = 1.41991, acc = 0.60284
[ Valid | 046/3000 ] loss = 1.41991, acc = 0.60284


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 047/3000 ] loss = 1.08581, acc = 0.63885


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 047/3000 ] loss = 1.20564, acc = 0.62512
[ Valid | 047/3000 ] loss = 1.20564, acc = 0.62512


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 048/3000 ] loss = 1.06376, acc = 0.64458


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 048/3000 ] loss = 1.29842, acc = 0.61749
[ Valid | 048/3000 ] loss = 1.29842, acc = 0.61749


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 049/3000 ] loss = 1.06035, acc = 0.64127


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 049/3000 ] loss = 1.23080, acc = 0.64596
[ Valid | 049/3000 ] loss = 1.23080, acc = 0.64596


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 050/3000 ] loss = 1.03729, acc = 0.65724


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 050/3000 ] loss = 1.17168, acc = 0.65444
[ Valid | 050/3000 ] loss = 1.17168, acc = 0.65444 -> best
Best model found at epoch 49, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 051/3000 ] loss = 1.01445, acc = 0.65938


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 051/3000 ] loss = 1.18363, acc = 0.66208
[ Valid | 051/3000 ] loss = 1.18363, acc = 0.66208 -> best
Best model found at epoch 50, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 052/3000 ] loss = 1.01127, acc = 0.66391


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 052/3000 ] loss = 1.11104, acc = 0.65995
[ Valid | 052/3000 ] loss = 1.11104, acc = 0.65995


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 053/3000 ] loss = 1.02733, acc = 0.65669


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 053/3000 ] loss = 1.23670, acc = 0.67162
[ Valid | 053/3000 ] loss = 1.23670, acc = 0.67162 -> best
Best model found at epoch 52, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 054/3000 ] loss = 0.98801, acc = 0.66855


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 054/3000 ] loss = 1.01997, acc = 0.69420
[ Valid | 054/3000 ] loss = 1.01997, acc = 0.69420 -> best
Best model found at epoch 53, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 055/3000 ] loss = 0.99758, acc = 0.66714


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 055/3000 ] loss = 0.98664, acc = 0.70064
[ Valid | 055/3000 ] loss = 0.98664, acc = 0.70064 -> best
Best model found at epoch 54, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 056/3000 ] loss = 0.97045, acc = 0.67583


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 056/3000 ] loss = 2.34449, acc = 0.66391
[ Valid | 056/3000 ] loss = 2.34449, acc = 0.66391


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 057/3000 ] loss = 0.96371, acc = 0.67708


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 057/3000 ] loss = 1.12921, acc = 0.69147
[ Valid | 057/3000 ] loss = 1.12921, acc = 0.69147


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 058/3000 ] loss = 0.96370, acc = 0.67986


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 058/3000 ] loss = 1.05259, acc = 0.68811
[ Valid | 058/3000 ] loss = 1.05259, acc = 0.68811


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 059/3000 ] loss = 0.94586, acc = 0.68589


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 059/3000 ] loss = 1.19356, acc = 0.67241
[ Valid | 059/3000 ] loss = 1.19356, acc = 0.67241


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 060/3000 ] loss = 0.96923, acc = 0.67833


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 060/3000 ] loss = 1.26405, acc = 0.61991
[ Valid | 060/3000 ] loss = 1.26405, acc = 0.61991


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 061/3000 ] loss = 0.95316, acc = 0.68171


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 061/3000 ] loss = 1.32796, acc = 0.61277
[ Valid | 061/3000 ] loss = 1.32796, acc = 0.61277


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 062/3000 ] loss = 0.96160, acc = 0.67270


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 062/3000 ] loss = 1.49309, acc = 0.63948
[ Valid | 062/3000 ] loss = 1.49309, acc = 0.63948


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 063/3000 ] loss = 0.92391, acc = 0.69284


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 063/3000 ] loss = 1.21811, acc = 0.70384
[ Valid | 063/3000 ] loss = 1.21811, acc = 0.70384 -> best
Best model found at epoch 62, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 064/3000 ] loss = 0.90985, acc = 0.69831


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 064/3000 ] loss = 1.15228, acc = 0.65948
[ Valid | 064/3000 ] loss = 1.15228, acc = 0.65948


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 065/3000 ] loss = 0.93005, acc = 0.69347


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 065/3000 ] loss = 1.02549, acc = 0.67975
[ Valid | 065/3000 ] loss = 1.02549, acc = 0.67975


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 066/3000 ] loss = 0.89809, acc = 0.70002


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 066/3000 ] loss = 1.02892, acc = 0.71082
[ Valid | 066/3000 ] loss = 1.02892, acc = 0.71082 -> best
Best model found at epoch 65, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 067/3000 ] loss = 0.90116, acc = 0.69621


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 067/3000 ] loss = 2.02302, acc = 0.67018
[ Valid | 067/3000 ] loss = 2.02302, acc = 0.67018


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 068/3000 ] loss = 0.88943, acc = 0.70583


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 068/3000 ] loss = 1.06315, acc = 0.69697
[ Valid | 068/3000 ] loss = 1.06315, acc = 0.69697


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 069/3000 ] loss = 0.86752, acc = 0.71512


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 069/3000 ] loss = 1.72357, acc = 0.62830
[ Valid | 069/3000 ] loss = 1.72357, acc = 0.62830


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 070/3000 ] loss = 0.87718, acc = 0.70748


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 070/3000 ] loss = 1.17492, acc = 0.68812
[ Valid | 070/3000 ] loss = 1.17492, acc = 0.68812


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 071/3000 ] loss = 0.95271, acc = 0.70974


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 071/3000 ] loss = 1.41209, acc = 0.63176
[ Valid | 071/3000 ] loss = 1.41209, acc = 0.63176


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 072/3000 ] loss = 0.86304, acc = 0.70744


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 072/3000 ] loss = 1.08175, acc = 0.69728
[ Valid | 072/3000 ] loss = 1.08175, acc = 0.69728


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 073/3000 ] loss = 0.86424, acc = 0.71187


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 073/3000 ] loss = 0.94006, acc = 0.72189
[ Valid | 073/3000 ] loss = 0.94006, acc = 0.72189 -> best
Best model found at epoch 72, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 074/3000 ] loss = 0.84773, acc = 0.71530


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 074/3000 ] loss = 0.94170, acc = 0.72160
[ Valid | 074/3000 ] loss = 0.94170, acc = 0.72160


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 075/3000 ] loss = 0.83646, acc = 0.72163


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 075/3000 ] loss = 1.06536, acc = 0.68956
[ Valid | 075/3000 ] loss = 1.06536, acc = 0.68956


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 076/3000 ] loss = 0.84322, acc = 0.71716


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 076/3000 ] loss = 1.13379, acc = 0.67809
[ Valid | 076/3000 ] loss = 1.13379, acc = 0.67809


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 077/3000 ] loss = 0.84415, acc = 0.72185


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 077/3000 ] loss = 1.05214, acc = 0.70838
[ Valid | 077/3000 ] loss = 1.05214, acc = 0.70838


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 078/3000 ] loss = 0.81201, acc = 0.72899


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 078/3000 ] loss = 3.21688, acc = 0.68822
[ Valid | 078/3000 ] loss = 3.21688, acc = 0.68822


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 079/3000 ] loss = 0.84907, acc = 0.71371


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 079/3000 ] loss = 0.97043, acc = 0.70386
[ Valid | 079/3000 ] loss = 0.97043, acc = 0.70386


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 080/3000 ] loss = 0.80922, acc = 0.73141


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 080/3000 ] loss = 0.96739, acc = 0.72253
[ Valid | 080/3000 ] loss = 0.96739, acc = 0.72253 -> best
Best model found at epoch 79, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 081/3000 ] loss = 0.81726, acc = 0.72452


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 081/3000 ] loss = 1.09405, acc = 0.70780
[ Valid | 081/3000 ] loss = 1.09405, acc = 0.70780


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 082/3000 ] loss = 0.81142, acc = 0.73351


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 082/3000 ] loss = 1.10269, acc = 0.67245
[ Valid | 082/3000 ] loss = 1.10269, acc = 0.67245


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 083/3000 ] loss = 0.81367, acc = 0.73062


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 083/3000 ] loss = 1.19493, acc = 0.71590
[ Valid | 083/3000 ] loss = 1.19493, acc = 0.71590


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 084/3000 ] loss = 0.79079, acc = 0.74048


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 084/3000 ] loss = 0.95080, acc = 0.71359
[ Valid | 084/3000 ] loss = 0.95080, acc = 0.71359


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 085/3000 ] loss = 0.79493, acc = 0.73734


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 085/3000 ] loss = 0.93767, acc = 0.73366
[ Valid | 085/3000 ] loss = 0.93767, acc = 0.73366 -> best
Best model found at epoch 84, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 086/3000 ] loss = 0.76835, acc = 0.74276


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 086/3000 ] loss = 0.92467, acc = 0.73442
[ Valid | 086/3000 ] loss = 0.92467, acc = 0.73442 -> best
Best model found at epoch 85, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 087/3000 ] loss = 0.80222, acc = 0.73337


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 087/3000 ] loss = 0.98504, acc = 0.70576
[ Valid | 087/3000 ] loss = 0.98504, acc = 0.70576


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 088/3000 ] loss = 0.77878, acc = 0.73940


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 088/3000 ] loss = 0.93293, acc = 0.71845
[ Valid | 088/3000 ] loss = 0.93293, acc = 0.71845


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 089/3000 ] loss = 0.77168, acc = 0.74417


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 089/3000 ] loss = 0.86848, acc = 0.74127
[ Valid | 089/3000 ] loss = 0.86848, acc = 0.74127 -> best
Best model found at epoch 88, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 090/3000 ] loss = 0.78873, acc = 0.73895


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 090/3000 ] loss = 1.17593, acc = 0.66498
[ Valid | 090/3000 ] loss = 1.17593, acc = 0.66498


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 091/3000 ] loss = 0.75669, acc = 0.74623


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 091/3000 ] loss = 1.01683, acc = 0.72373
[ Valid | 091/3000 ] loss = 1.01683, acc = 0.72373


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 092/3000 ] loss = 0.75084, acc = 0.75450


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 092/3000 ] loss = 1.01124, acc = 0.73393
[ Valid | 092/3000 ] loss = 1.01124, acc = 0.73393


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 093/3000 ] loss = 0.75961, acc = 0.74492


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 093/3000 ] loss = 1.15952, acc = 0.67481
[ Valid | 093/3000 ] loss = 1.15952, acc = 0.67481


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 094/3000 ] loss = 0.75638, acc = 0.74742


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 094/3000 ] loss = 0.94496, acc = 0.72225
[ Valid | 094/3000 ] loss = 0.94496, acc = 0.72225


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 095/3000 ] loss = 0.74007, acc = 0.75425


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 095/3000 ] loss = 1.18632, acc = 0.68761
[ Valid | 095/3000 ] loss = 1.18632, acc = 0.68761


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 096/3000 ] loss = 0.74032, acc = 0.75214


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 096/3000 ] loss = 1.68176, acc = 0.69218
[ Valid | 096/3000 ] loss = 1.68176, acc = 0.69218


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 097/3000 ] loss = 0.72686, acc = 0.75321


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 097/3000 ] loss = 0.89994, acc = 0.74733
[ Valid | 097/3000 ] loss = 0.89994, acc = 0.74733 -> best
Best model found at epoch 96, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 098/3000 ] loss = 0.74069, acc = 0.75286


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 098/3000 ] loss = 1.16117, acc = 0.72630
[ Valid | 098/3000 ] loss = 1.16117, acc = 0.72630


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 099/3000 ] loss = 0.73010, acc = 0.76004


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 099/3000 ] loss = 0.85980, acc = 0.75672
[ Valid | 099/3000 ] loss = 0.85980, acc = 0.75672 -> best
Best model found at epoch 98, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 100/3000 ] loss = 0.72510, acc = 0.75738


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 100/3000 ] loss = 1.00435, acc = 0.75285
[ Valid | 100/3000 ] loss = 1.00435, acc = 0.75285


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 101/3000 ] loss = 0.71790, acc = 0.75899


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 101/3000 ] loss = 1.01780, acc = 0.71706
[ Valid | 101/3000 ] loss = 1.01780, acc = 0.71706


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 102/3000 ] loss = 0.72015, acc = 0.76224


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 102/3000 ] loss = 0.95840, acc = 0.73492
[ Valid | 102/3000 ] loss = 0.95840, acc = 0.73492


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 103/3000 ] loss = 0.69988, acc = 0.76456


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 103/3000 ] loss = 1.02854, acc = 0.72292
[ Valid | 103/3000 ] loss = 1.02854, acc = 0.72292


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 104/3000 ] loss = 0.70998, acc = 0.75750


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 104/3000 ] loss = 1.01630, acc = 0.74330
[ Valid | 104/3000 ] loss = 1.01630, acc = 0.74330


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 105/3000 ] loss = 0.69291, acc = 0.76540


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 105/3000 ] loss = 1.47211, acc = 0.73453
[ Valid | 105/3000 ] loss = 1.47211, acc = 0.73453


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 106/3000 ] loss = 0.70510, acc = 0.76571


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 106/3000 ] loss = 1.21975, acc = 0.69979
[ Valid | 106/3000 ] loss = 1.21975, acc = 0.69979


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 107/3000 ] loss = 0.68931, acc = 0.77018


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 107/3000 ] loss = 0.89664, acc = 0.75640
[ Valid | 107/3000 ] loss = 0.89664, acc = 0.75640


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 108/3000 ] loss = 0.70552, acc = 0.76734


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 108/3000 ] loss = 0.85256, acc = 0.75518
[ Valid | 108/3000 ] loss = 0.85256, acc = 0.75518


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 109/3000 ] loss = 0.70056, acc = 0.76823


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 109/3000 ] loss = 0.92809, acc = 0.74909
[ Valid | 109/3000 ] loss = 0.92809, acc = 0.74909


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 110/3000 ] loss = 0.70944, acc = 0.76829


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 110/3000 ] loss = 0.93204, acc = 0.74070
[ Valid | 110/3000 ] loss = 0.93204, acc = 0.74070


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 111/3000 ] loss = 0.67250, acc = 0.77730


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 111/3000 ] loss = 1.12272, acc = 0.72545
[ Valid | 111/3000 ] loss = 1.12272, acc = 0.72545


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 112/3000 ] loss = 0.68329, acc = 0.77419


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 112/3000 ] loss = 0.88684, acc = 0.75911
[ Valid | 112/3000 ] loss = 0.88684, acc = 0.75911 -> best
Best model found at epoch 111, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 113/3000 ] loss = 0.66569, acc = 0.77736


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 113/3000 ] loss = 1.72069, acc = 0.71930
[ Valid | 113/3000 ] loss = 1.72069, acc = 0.71930


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 114/3000 ] loss = 0.66581, acc = 0.77837


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 114/3000 ] loss = 1.18145, acc = 0.75768
[ Valid | 114/3000 ] loss = 1.18145, acc = 0.75768


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 115/3000 ] loss = 0.68661, acc = 0.77399


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 115/3000 ] loss = 1.90240, acc = 0.74280
[ Valid | 115/3000 ] loss = 1.90240, acc = 0.74280


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 116/3000 ] loss = 0.65866, acc = 0.78288


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 116/3000 ] loss = 1.53266, acc = 0.70338
[ Valid | 116/3000 ] loss = 1.53266, acc = 0.70338


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 117/3000 ] loss = 0.65663, acc = 0.78226


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 117/3000 ] loss = 1.03861, acc = 0.72959
[ Valid | 117/3000 ] loss = 1.03861, acc = 0.72959


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 118/3000 ] loss = 0.67302, acc = 0.78109


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 118/3000 ] loss = 1.46229, acc = 0.73983
[ Valid | 118/3000 ] loss = 1.46229, acc = 0.73983


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 119/3000 ] loss = 0.64107, acc = 0.78909


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 119/3000 ] loss = 2.59992, acc = 0.75758
[ Valid | 119/3000 ] loss = 2.59992, acc = 0.75758


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 120/3000 ] loss = 0.65496, acc = 0.78722


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 120/3000 ] loss = 1.40331, acc = 0.76509
[ Valid | 120/3000 ] loss = 1.40331, acc = 0.76509 -> best
Best model found at epoch 119, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 121/3000 ] loss = 0.66018, acc = 0.78101


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 121/3000 ] loss = 2.11452, acc = 0.73829
[ Valid | 121/3000 ] loss = 2.11452, acc = 0.73829


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 122/3000 ] loss = 0.65345, acc = 0.78683


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 122/3000 ] loss = 0.97027, acc = 0.75602
[ Valid | 122/3000 ] loss = 0.97027, acc = 0.75602


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 123/3000 ] loss = 0.64765, acc = 0.78308


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 123/3000 ] loss = 0.96949, acc = 0.73935
[ Valid | 123/3000 ] loss = 0.96949, acc = 0.73935


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 124/3000 ] loss = 0.64432, acc = 0.78752


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 124/3000 ] loss = 2.01667, acc = 0.68290
[ Valid | 124/3000 ] loss = 2.01667, acc = 0.68290


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 125/3000 ] loss = 0.64031, acc = 0.78389


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 125/3000 ] loss = 1.08637, acc = 0.76374
[ Valid | 125/3000 ] loss = 1.08637, acc = 0.76374


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 126/3000 ] loss = 0.62242, acc = 0.79222


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 126/3000 ] loss = 0.81040, acc = 0.75892
[ Valid | 126/3000 ] loss = 0.81040, acc = 0.75892


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 127/3000 ] loss = 0.65571, acc = 0.78143


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 127/3000 ] loss = 0.92468, acc = 0.74542
[ Valid | 127/3000 ] loss = 0.92468, acc = 0.74542


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 128/3000 ] loss = 0.63710, acc = 0.79115


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 128/3000 ] loss = 0.85891, acc = 0.75542
[ Valid | 128/3000 ] loss = 0.85891, acc = 0.75542


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 129/3000 ] loss = 0.62774, acc = 0.79516


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 129/3000 ] loss = 0.90541, acc = 0.76849
[ Valid | 129/3000 ] loss = 0.90541, acc = 0.76849 -> best
Best model found at epoch 128, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 130/3000 ] loss = 0.61505, acc = 0.79639


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 130/3000 ] loss = 1.40845, acc = 0.71100
[ Valid | 130/3000 ] loss = 1.40845, acc = 0.71100


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 131/3000 ] loss = 0.61228, acc = 0.79579


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 131/3000 ] loss = 1.56809, acc = 0.75535
[ Valid | 131/3000 ] loss = 1.56809, acc = 0.75535


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 132/3000 ] loss = 0.61056, acc = 0.79510


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 132/3000 ] loss = 0.93349, acc = 0.77802
[ Valid | 132/3000 ] loss = 0.93349, acc = 0.77802 -> best
Best model found at epoch 131, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 133/3000 ] loss = 0.60387, acc = 0.79520


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 133/3000 ] loss = 0.91686, acc = 0.73287
[ Valid | 133/3000 ] loss = 0.91686, acc = 0.73287


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 134/3000 ] loss = 0.61515, acc = 0.79484


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 134/3000 ] loss = 0.95738, acc = 0.74388
[ Valid | 134/3000 ] loss = 0.95738, acc = 0.74388


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 135/3000 ] loss = 0.60190, acc = 0.79871


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 135/3000 ] loss = 0.97179, acc = 0.74144
[ Valid | 135/3000 ] loss = 0.97179, acc = 0.74144


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 136/3000 ] loss = 0.59575, acc = 0.79952


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 136/3000 ] loss = 0.99078, acc = 0.71299
[ Valid | 136/3000 ] loss = 0.99078, acc = 0.71299


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 137/3000 ] loss = 0.59839, acc = 0.79573


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 137/3000 ] loss = 1.30026, acc = 0.78043
[ Valid | 137/3000 ] loss = 1.30026, acc = 0.78043 -> best
Best model found at epoch 136, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 138/3000 ] loss = 0.58307, acc = 0.80518


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 138/3000 ] loss = 1.09783, acc = 0.77000
[ Valid | 138/3000 ] loss = 1.09783, acc = 0.77000


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 139/3000 ] loss = 0.59750, acc = 0.79776


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 139/3000 ] loss = 1.26000, acc = 0.73035
[ Valid | 139/3000 ] loss = 1.26000, acc = 0.73035


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 140/3000 ] loss = 0.58808, acc = 0.80421


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 140/3000 ] loss = 0.96069, acc = 0.75659
[ Valid | 140/3000 ] loss = 0.96069, acc = 0.75659


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 141/3000 ] loss = 0.57513, acc = 0.81095


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 141/3000 ] loss = 1.07856, acc = 0.73374
[ Valid | 141/3000 ] loss = 1.07856, acc = 0.73374


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 142/3000 ] loss = 0.58659, acc = 0.80778


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 142/3000 ] loss = 0.90665, acc = 0.74840
[ Valid | 142/3000 ] loss = 0.90665, acc = 0.74840


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 143/3000 ] loss = 0.58053, acc = 0.80829


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 143/3000 ] loss = 0.95660, acc = 0.72620
[ Valid | 143/3000 ] loss = 0.95660, acc = 0.72620


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 144/3000 ] loss = 0.60281, acc = 0.80194


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 144/3000 ] loss = 0.99418, acc = 0.70133
[ Valid | 144/3000 ] loss = 0.99418, acc = 0.70133


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 145/3000 ] loss = 0.56267, acc = 0.81202


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 145/3000 ] loss = 0.88925, acc = 0.73308
[ Valid | 145/3000 ] loss = 0.88925, acc = 0.73308


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 146/3000 ] loss = 0.57582, acc = 0.80659


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 146/3000 ] loss = 0.99073, acc = 0.74679
[ Valid | 146/3000 ] loss = 0.99073, acc = 0.74679


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 147/3000 ] loss = 0.57221, acc = 0.80784


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 147/3000 ] loss = 1.01172, acc = 0.74819
[ Valid | 147/3000 ] loss = 1.01172, acc = 0.74819


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 148/3000 ] loss = 0.57082, acc = 0.80845


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 148/3000 ] loss = 0.92287, acc = 0.76829
[ Valid | 148/3000 ] loss = 0.92287, acc = 0.76829


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 149/3000 ] loss = 0.57270, acc = 0.80681


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 149/3000 ] loss = 1.04041, acc = 0.73984
[ Valid | 149/3000 ] loss = 1.04041, acc = 0.73984


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 150/3000 ] loss = 0.55604, acc = 0.80849


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 150/3000 ] loss = 1.14238, acc = 0.72428
[ Valid | 150/3000 ] loss = 1.14238, acc = 0.72428


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 151/3000 ] loss = 0.56034, acc = 0.81750


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 151/3000 ] loss = 1.07863, acc = 0.75649
[ Valid | 151/3000 ] loss = 1.07863, acc = 0.75649


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 152/3000 ] loss = 0.54381, acc = 0.81712


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 152/3000 ] loss = 0.82372, acc = 0.78073
[ Valid | 152/3000 ] loss = 0.82372, acc = 0.78073 -> best
Best model found at epoch 151, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 153/3000 ] loss = 0.56731, acc = 0.80966


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 153/3000 ] loss = 0.95392, acc = 0.78428
[ Valid | 153/3000 ] loss = 0.95392, acc = 0.78428 -> best
Best model found at epoch 152, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 154/3000 ] loss = 0.54675, acc = 0.82083


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 154/3000 ] loss = 1.02327, acc = 0.77947
[ Valid | 154/3000 ] loss = 1.02327, acc = 0.77947


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 155/3000 ] loss = 0.56015, acc = 0.81696


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 155/3000 ] loss = 0.98030, acc = 0.76422
[ Valid | 155/3000 ] loss = 0.98030, acc = 0.76422


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 156/3000 ] loss = 0.54313, acc = 0.81486


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 156/3000 ] loss = 0.79619, acc = 0.78312
[ Valid | 156/3000 ] loss = 0.79619, acc = 0.78312


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 157/3000 ] loss = 0.54394, acc = 0.81752


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 157/3000 ] loss = 0.89308, acc = 0.75679
[ Valid | 157/3000 ] loss = 0.89308, acc = 0.75679


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 158/3000 ] loss = 0.53722, acc = 0.82200


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 158/3000 ] loss = 0.95237, acc = 0.76780
[ Valid | 158/3000 ] loss = 0.95237, acc = 0.76780


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 159/3000 ] loss = 0.54845, acc = 0.81679


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 159/3000 ] loss = 1.05891, acc = 0.75303
[ Valid | 159/3000 ] loss = 1.05891, acc = 0.75303


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 160/3000 ] loss = 0.53840, acc = 0.82117


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 160/3000 ] loss = 0.96929, acc = 0.77493
[ Valid | 160/3000 ] loss = 0.96929, acc = 0.77493


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 161/3000 ] loss = 0.53728, acc = 0.82254


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 161/3000 ] loss = 1.78484, acc = 0.75690
[ Valid | 161/3000 ] loss = 1.78484, acc = 0.75690


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 162/3000 ] loss = 0.53942, acc = 0.81829


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 162/3000 ] loss = 1.43265, acc = 0.77688
[ Valid | 162/3000 ] loss = 1.43265, acc = 0.77688


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 163/3000 ] loss = 0.52974, acc = 0.82260


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 163/3000 ] loss = 0.81164, acc = 0.79057
[ Valid | 163/3000 ] loss = 0.81164, acc = 0.79057 -> best
Best model found at epoch 162, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 164/3000 ] loss = 0.51817, acc = 0.82694


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 164/3000 ] loss = 0.80100, acc = 0.78177
[ Valid | 164/3000 ] loss = 0.80100, acc = 0.78177


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 165/3000 ] loss = 0.53805, acc = 0.82333


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 165/3000 ] loss = 0.88237, acc = 0.77505
[ Valid | 165/3000 ] loss = 0.88237, acc = 0.77505


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 166/3000 ] loss = 0.53055, acc = 0.82218


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 166/3000 ] loss = 0.86953, acc = 0.77887
[ Valid | 166/3000 ] loss = 0.86953, acc = 0.77887


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 167/3000 ] loss = 0.54253, acc = 0.82052


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 167/3000 ] loss = 0.83852, acc = 0.76712
[ Valid | 167/3000 ] loss = 0.83852, acc = 0.76712


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 168/3000 ] loss = 0.52238, acc = 0.82468


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 168/3000 ] loss = 0.82325, acc = 0.78286
[ Valid | 168/3000 ] loss = 0.82325, acc = 0.78286


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 169/3000 ] loss = 0.52311, acc = 0.82694


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 169/3000 ] loss = 0.77764, acc = 0.78459
[ Valid | 169/3000 ] loss = 0.77764, acc = 0.78459


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 170/3000 ] loss = 0.52012, acc = 0.82966


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 170/3000 ] loss = 1.02827, acc = 0.76878
[ Valid | 170/3000 ] loss = 1.02827, acc = 0.76878


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 171/3000 ] loss = 0.51140, acc = 0.82849


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 171/3000 ] loss = 0.94814, acc = 0.77300
[ Valid | 171/3000 ] loss = 0.94814, acc = 0.77300


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 172/3000 ] loss = 0.50925, acc = 0.82861


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 172/3000 ] loss = 0.95846, acc = 0.77184
[ Valid | 172/3000 ] loss = 0.95846, acc = 0.77184


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 173/3000 ] loss = 0.52649, acc = 0.82417


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 173/3000 ] loss = 0.95525, acc = 0.78486
[ Valid | 173/3000 ] loss = 0.95525, acc = 0.78486


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 174/3000 ] loss = 0.49807, acc = 0.83470


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 174/3000 ] loss = 0.98126, acc = 0.77581
[ Valid | 174/3000 ] loss = 0.98126, acc = 0.77581


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 175/3000 ] loss = 0.50791, acc = 0.82589


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 175/3000 ] loss = 1.09432, acc = 0.78314
[ Valid | 175/3000 ] loss = 1.09432, acc = 0.78314


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 176/3000 ] loss = 0.48881, acc = 0.83464


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 176/3000 ] loss = 0.88297, acc = 0.79453
[ Valid | 176/3000 ] loss = 0.88297, acc = 0.79453 -> best
Best model found at epoch 175, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 177/3000 ] loss = 0.51667, acc = 0.83004


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 177/3000 ] loss = 0.96154, acc = 0.76508
[ Valid | 177/3000 ] loss = 0.96154, acc = 0.76508


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 178/3000 ] loss = 0.50692, acc = 0.83018


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 178/3000 ] loss = 0.94784, acc = 0.75796
[ Valid | 178/3000 ] loss = 0.94784, acc = 0.75796


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 179/3000 ] loss = 0.50815, acc = 0.82962


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 179/3000 ] loss = 0.80408, acc = 0.78631
[ Valid | 179/3000 ] loss = 0.80408, acc = 0.78631


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 180/3000 ] loss = 0.50522, acc = 0.83329


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 180/3000 ] loss = 0.84595, acc = 0.77514
[ Valid | 180/3000 ] loss = 0.84595, acc = 0.77514


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 181/3000 ] loss = 0.49365, acc = 0.83323


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 181/3000 ] loss = 0.91531, acc = 0.76780
[ Valid | 181/3000 ] loss = 0.91531, acc = 0.76780


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 182/3000 ] loss = 0.47533, acc = 0.83827


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 182/3000 ] loss = 0.88665, acc = 0.77456
[ Valid | 182/3000 ] loss = 0.88665, acc = 0.77456


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 183/3000 ] loss = 0.49786, acc = 0.83421


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 183/3000 ] loss = 0.93097, acc = 0.77542
[ Valid | 183/3000 ] loss = 0.93097, acc = 0.77542


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 184/3000 ] loss = 0.47938, acc = 0.84050


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 184/3000 ] loss = 0.87495, acc = 0.77639
[ Valid | 184/3000 ] loss = 0.87495, acc = 0.77639


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 185/3000 ] loss = 0.48619, acc = 0.83772


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 185/3000 ] loss = 1.09310, acc = 0.76296
[ Valid | 185/3000 ] loss = 1.09310, acc = 0.76296


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 186/3000 ] loss = 0.47695, acc = 0.83651


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 186/3000 ] loss = 0.80162, acc = 0.79308
[ Valid | 186/3000 ] loss = 0.80162, acc = 0.79308


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 187/3000 ] loss = 0.48075, acc = 0.84081


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 187/3000 ] loss = 1.23176, acc = 0.78323
[ Valid | 187/3000 ] loss = 1.23176, acc = 0.78323


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 188/3000 ] loss = 0.47326, acc = 0.84210


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 188/3000 ] loss = 1.12165, acc = 0.72746
[ Valid | 188/3000 ] loss = 1.12165, acc = 0.72746


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 189/3000 ] loss = 0.47667, acc = 0.84321


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 189/3000 ] loss = 0.98407, acc = 0.76104
[ Valid | 189/3000 ] loss = 0.98407, acc = 0.76104


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 190/3000 ] loss = 0.48141, acc = 0.83419


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 190/3000 ] loss = 0.85813, acc = 0.77889
[ Valid | 190/3000 ] loss = 0.85813, acc = 0.77889


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 191/3000 ] loss = 0.47172, acc = 0.84028


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 191/3000 ] loss = 1.11621, acc = 0.78478
[ Valid | 191/3000 ] loss = 1.11621, acc = 0.78478


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 192/3000 ] loss = 0.48505, acc = 0.84052


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 192/3000 ] loss = 1.07249, acc = 0.76346
[ Valid | 192/3000 ] loss = 1.07249, acc = 0.76346


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 193/3000 ] loss = 0.46030, acc = 0.84694


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 193/3000 ] loss = 0.86412, acc = 0.78641
[ Valid | 193/3000 ] loss = 0.86412, acc = 0.78641


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 194/3000 ] loss = 0.47365, acc = 0.84006


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 194/3000 ] loss = 1.41796, acc = 0.78816
[ Valid | 194/3000 ] loss = 1.41796, acc = 0.78816


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 195/3000 ] loss = 0.47208, acc = 0.84071


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 195/3000 ] loss = 1.34951, acc = 0.78190
[ Valid | 195/3000 ] loss = 1.34951, acc = 0.78190


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 196/3000 ] loss = 0.47471, acc = 0.83990


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 196/3000 ] loss = 1.42848, acc = 0.77804
[ Valid | 196/3000 ] loss = 1.42848, acc = 0.77804


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 197/3000 ] loss = 0.48635, acc = 0.83581


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 197/3000 ] loss = 1.23882, acc = 0.75372
[ Valid | 197/3000 ] loss = 1.23882, acc = 0.75372


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 198/3000 ] loss = 0.46415, acc = 0.84129


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 198/3000 ] loss = 2.21389, acc = 0.78623
[ Valid | 198/3000 ] loss = 2.21389, acc = 0.78623


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 199/3000 ] loss = 0.45996, acc = 0.85050


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 199/3000 ] loss = 1.11180, acc = 0.75536
[ Valid | 199/3000 ] loss = 1.11180, acc = 0.75536


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 200/3000 ] loss = 0.56055, acc = 0.83575


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 200/3000 ] loss = 2.06395, acc = 0.78535
[ Valid | 200/3000 ] loss = 2.06395, acc = 0.78535


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 201/3000 ] loss = 0.45766, acc = 0.84831


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 201/3000 ] loss = 1.11705, acc = 0.76936
[ Valid | 201/3000 ] loss = 1.11705, acc = 0.76936


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 202/3000 ] loss = 0.46494, acc = 0.84361


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 202/3000 ] loss = 0.85319, acc = 0.79037
[ Valid | 202/3000 ] loss = 0.85319, acc = 0.79037


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 203/3000 ] loss = 0.46055, acc = 0.84706


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 203/3000 ] loss = 0.94043, acc = 0.80367
[ Valid | 203/3000 ] loss = 0.94043, acc = 0.80367 -> best
Best model found at epoch 202, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 204/3000 ] loss = 0.46399, acc = 0.84528


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 204/3000 ] loss = 0.91681, acc = 0.78178
[ Valid | 204/3000 ] loss = 0.91681, acc = 0.78178


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 205/3000 ] loss = 0.44739, acc = 0.85018


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 205/3000 ] loss = 0.99304, acc = 0.79066
[ Valid | 205/3000 ] loss = 0.99304, acc = 0.79066


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 206/3000 ] loss = 0.46093, acc = 0.84538


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 206/3000 ] loss = 0.98567, acc = 0.78178
[ Valid | 206/3000 ] loss = 0.98567, acc = 0.78178


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 207/3000 ] loss = 0.43504, acc = 0.85187


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 207/3000 ] loss = 0.95116, acc = 0.78583
[ Valid | 207/3000 ] loss = 0.95116, acc = 0.78583


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 208/3000 ] loss = 0.45140, acc = 0.84911


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 208/3000 ] loss = 1.13186, acc = 0.74976
[ Valid | 208/3000 ] loss = 1.13186, acc = 0.74976


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 209/3000 ] loss = 0.47145, acc = 0.84274


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 209/3000 ] loss = 0.82760, acc = 0.78323
[ Valid | 209/3000 ] loss = 0.82760, acc = 0.78323


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 210/3000 ] loss = 0.46550, acc = 0.84558


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 210/3000 ] loss = 0.81197, acc = 0.79654
[ Valid | 210/3000 ] loss = 0.81197, acc = 0.79654


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 211/3000 ] loss = 0.45522, acc = 0.84738


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 211/3000 ] loss = 1.60638, acc = 0.79712
[ Valid | 211/3000 ] loss = 1.60638, acc = 0.79712


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 212/3000 ] loss = 0.43765, acc = 0.85341


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 212/3000 ] loss = 0.82379, acc = 0.79491
[ Valid | 212/3000 ] loss = 0.82379, acc = 0.79491


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 213/3000 ] loss = 0.44115, acc = 0.85157


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 213/3000 ] loss = 0.87482, acc = 0.78746
[ Valid | 213/3000 ] loss = 0.87482, acc = 0.78746


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 214/3000 ] loss = 0.45120, acc = 0.84438


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 214/3000 ] loss = 1.75483, acc = 0.78970
[ Valid | 214/3000 ] loss = 1.75483, acc = 0.78970


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 215/3000 ] loss = 0.43931, acc = 0.85224


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 215/3000 ] loss = 0.96091, acc = 0.77851
[ Valid | 215/3000 ] loss = 0.96091, acc = 0.77851


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 216/3000 ] loss = 0.44624, acc = 0.84788


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 216/3000 ] loss = 1.19167, acc = 0.79587
[ Valid | 216/3000 ] loss = 1.19167, acc = 0.79587


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 217/3000 ] loss = 0.44613, acc = 0.84937


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 217/3000 ] loss = 1.41744, acc = 0.74890
[ Valid | 217/3000 ] loss = 1.41744, acc = 0.74890


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 218/3000 ] loss = 0.43935, acc = 0.85187


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 218/3000 ] loss = 0.93744, acc = 0.79328
[ Valid | 218/3000 ] loss = 0.93744, acc = 0.79328


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 219/3000 ] loss = 0.42305, acc = 0.85641


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 219/3000 ] loss = 1.16445, acc = 0.78999
[ Valid | 219/3000 ] loss = 1.16445, acc = 0.78999


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 220/3000 ] loss = 0.42626, acc = 0.85764


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 220/3000 ] loss = 0.83496, acc = 0.79933
[ Valid | 220/3000 ] loss = 0.83496, acc = 0.79933


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 221/3000 ] loss = 0.43235, acc = 0.85966


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 221/3000 ] loss = 1.00808, acc = 0.79374
[ Valid | 221/3000 ] loss = 1.00808, acc = 0.79374


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 222/3000 ] loss = 0.42622, acc = 0.85690


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 222/3000 ] loss = 0.83071, acc = 0.79713
[ Valid | 222/3000 ] loss = 0.83071, acc = 0.79713


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 223/3000 ] loss = 0.43477, acc = 0.85490


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 223/3000 ] loss = 0.84082, acc = 0.79337
[ Valid | 223/3000 ] loss = 0.84082, acc = 0.79337


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 224/3000 ] loss = 0.42366, acc = 0.85704


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 224/3000 ] loss = 0.93381, acc = 0.78902
[ Valid | 224/3000 ] loss = 0.93381, acc = 0.78902


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 225/3000 ] loss = 0.40586, acc = 0.85859


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 225/3000 ] loss = 0.99715, acc = 0.78093
[ Valid | 225/3000 ] loss = 0.99715, acc = 0.78093


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 226/3000 ] loss = 0.40686, acc = 0.86323


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 226/3000 ] loss = 0.92572, acc = 0.77871
[ Valid | 226/3000 ] loss = 0.92572, acc = 0.77871


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 227/3000 ] loss = 0.41843, acc = 0.85970


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 227/3000 ] loss = 0.76220, acc = 0.80390
[ Valid | 227/3000 ] loss = 0.76220, acc = 0.80390 -> best
Best model found at epoch 226, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 228/3000 ] loss = 0.41613, acc = 0.86006


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 228/3000 ] loss = 0.78649, acc = 0.80737
[ Valid | 228/3000 ] loss = 0.78649, acc = 0.80737 -> best
Best model found at epoch 227, saving model


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 229/3000 ] loss = 0.41877, acc = 0.85837


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 229/3000 ] loss = 0.84379, acc = 0.79218
[ Valid | 229/3000 ] loss = 0.84379, acc = 0.79218


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 230/3000 ] loss = 0.40769, acc = 0.86508


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 230/3000 ] loss = 1.22069, acc = 0.77811
[ Valid | 230/3000 ] loss = 1.22069, acc = 0.77811


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 231/3000 ] loss = 0.41567, acc = 0.86115


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 231/3000 ] loss = 1.04823, acc = 0.75506
[ Valid | 231/3000 ] loss = 1.04823, acc = 0.75506


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 232/3000 ] loss = 0.42362, acc = 0.85778


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 232/3000 ] loss = 0.84241, acc = 0.79846
[ Valid | 232/3000 ] loss = 0.84241, acc = 0.79846


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 233/3000 ] loss = 0.41536, acc = 0.85986


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 233/3000 ] loss = 0.95590, acc = 0.76916
[ Valid | 233/3000 ] loss = 0.95590, acc = 0.76916


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 234/3000 ] loss = 0.41567, acc = 0.86278


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 234/3000 ] loss = 0.83517, acc = 0.79180
[ Valid | 234/3000 ] loss = 0.83517, acc = 0.79180


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 235/3000 ] loss = 0.41328, acc = 0.85708


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 235/3000 ] loss = 0.88831, acc = 0.78777
[ Valid | 235/3000 ] loss = 0.88831, acc = 0.78777


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 236/3000 ] loss = 0.40142, acc = 0.86230


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 236/3000 ] loss = 0.81878, acc = 0.79954
[ Valid | 236/3000 ] loss = 0.81878, acc = 0.79954


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 237/3000 ] loss = 0.41680, acc = 0.86069


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 237/3000 ] loss = 0.78289, acc = 0.79954
[ Valid | 237/3000 ] loss = 0.78289, acc = 0.79954


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 238/3000 ] loss = 0.40418, acc = 0.86548


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 238/3000 ] loss = 0.82028, acc = 0.79549
[ Valid | 238/3000 ] loss = 0.82028, acc = 0.79549


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 239/3000 ] loss = 0.40486, acc = 0.86683


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 239/3000 ] loss = 0.84765, acc = 0.79520
[ Valid | 239/3000 ] loss = 0.84765, acc = 0.79520


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 240/3000 ] loss = 0.41177, acc = 0.86222


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 240/3000 ] loss = 0.92456, acc = 0.80079
[ Valid | 240/3000 ] loss = 0.92456, acc = 0.80079


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 241/3000 ] loss = 0.40484, acc = 0.86700


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 241/3000 ] loss = 1.47102, acc = 0.78554
[ Valid | 241/3000 ] loss = 1.47102, acc = 0.78554


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 242/3000 ] loss = 0.41127, acc = 0.86502


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 242/3000 ] loss = 0.97269, acc = 0.79761
[ Valid | 242/3000 ] loss = 0.97269, acc = 0.79761


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 243/3000 ] loss = 0.41200, acc = 0.86373


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 243/3000 ] loss = 0.97966, acc = 0.78431
[ Valid | 243/3000 ] loss = 0.97966, acc = 0.78431


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 244/3000 ] loss = 0.41352, acc = 0.86105


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 244/3000 ] loss = 1.16101, acc = 0.77262
[ Valid | 244/3000 ] loss = 1.16101, acc = 0.77262


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 245/3000 ] loss = 0.40223, acc = 0.86524


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 245/3000 ] loss = 0.90503, acc = 0.78382
[ Valid | 245/3000 ] loss = 0.90503, acc = 0.78382


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 246/3000 ] loss = 0.37942, acc = 0.87409


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 246/3000 ] loss = 0.92397, acc = 0.78209
[ Valid | 246/3000 ] loss = 0.92397, acc = 0.78209


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 247/3000 ] loss = 0.39600, acc = 0.86831


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 247/3000 ] loss = 0.96519, acc = 0.77485
[ Valid | 247/3000 ] loss = 0.96519, acc = 0.77485


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 248/3000 ] loss = 0.39748, acc = 0.86478


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 248/3000 ] loss = 1.54050, acc = 0.77290
[ Valid | 248/3000 ] loss = 1.54050, acc = 0.77290


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 249/3000 ] loss = 0.38958, acc = 0.86931


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 249/3000 ] loss = 1.06760, acc = 0.76829
[ Valid | 249/3000 ] loss = 1.06760, acc = 0.76829


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 250/3000 ] loss = 0.38741, acc = 0.87038


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 250/3000 ] loss = 4.53225, acc = 0.76086
[ Valid | 250/3000 ] loss = 4.53225, acc = 0.76086


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 251/3000 ] loss = 0.39392, acc = 0.87036


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 251/3000 ] loss = 2.47118, acc = 0.79065
[ Valid | 251/3000 ] loss = 2.47118, acc = 0.79065


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 252/3000 ] loss = 0.39099, acc = 0.86895


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 252/3000 ] loss = 0.89825, acc = 0.78265
[ Valid | 252/3000 ] loss = 0.89825, acc = 0.78265


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 253/3000 ] loss = 0.38043, acc = 0.87341


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 253/3000 ] loss = 1.51401, acc = 0.79057
[ Valid | 253/3000 ] loss = 1.51401, acc = 0.79057


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 254/3000 ] loss = 0.38886, acc = 0.87254


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 254/3000 ] loss = 0.99928, acc = 0.79931
[ Valid | 254/3000 ] loss = 0.99928, acc = 0.79931


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 255/3000 ] loss = 0.38549, acc = 0.86847


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 255/3000 ] loss = 1.32756, acc = 0.77039
[ Valid | 255/3000 ] loss = 1.32756, acc = 0.77039


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 256/3000 ] loss = 0.37607, acc = 0.87385


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 256/3000 ] loss = 1.02463, acc = 0.79328
[ Valid | 256/3000 ] loss = 1.02463, acc = 0.79328


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 257/3000 ] loss = 0.37360, acc = 0.87389


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 257/3000 ] loss = 0.95256, acc = 0.77965
[ Valid | 257/3000 ] loss = 0.95256, acc = 0.77965


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 258/3000 ] loss = 0.37456, acc = 0.87802


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 258/3000 ] loss = 1.13470, acc = 0.77898
[ Valid | 258/3000 ] loss = 1.13470, acc = 0.77898


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 259/3000 ] loss = 0.36881, acc = 0.87923


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 259/3000 ] loss = 1.04406, acc = 0.79174
[ Valid | 259/3000 ] loss = 1.04406, acc = 0.79174


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 260/3000 ] loss = 0.39000, acc = 0.87129


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 260/3000 ] loss = 0.90571, acc = 0.78507
[ Valid | 260/3000 ] loss = 0.90571, acc = 0.78507


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 261/3000 ] loss = 0.40059, acc = 0.85994


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 261/3000 ] loss = 0.89969, acc = 0.79017
[ Valid | 261/3000 ] loss = 0.89969, acc = 0.79017


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 262/3000 ] loss = 0.39161, acc = 0.86927


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 262/3000 ] loss = 1.00419, acc = 0.78727
[ Valid | 262/3000 ] loss = 1.00419, acc = 0.78727


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 263/3000 ] loss = 0.37761, acc = 0.87214


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 263/3000 ] loss = 1.21624, acc = 0.77262
[ Valid | 263/3000 ] loss = 1.21624, acc = 0.77262


  0%|          | 0/155 [00:00<?, ?it/s]

[ Train | 264/3000 ] loss = 0.36576, acc = 0.87560


  0%|          | 0/54 [00:00<?, ?it/s]

[ Valid | 264/3000 ] loss = 1.59072, acc = 0.77677
[ Valid | 264/3000 ] loss = 1.59072, acc = 0.77677
No improvment 35 consecutive epochs, early stopping


In [14]:
# if logToWandb: 
#     import json
#     config = {
#         'batch_size': batch_size,
#         'lr_rate': lr_rate,
#         'weight_decay': weight_decay,
#         'n_epochs': n_epochs,
#         'early_stop': patience,
#         'seed': myseed
#     }
#     with open('config.json', 'w') as f:
#         json.dump(config, f)

# #     wandb.save('config.json')

In [15]:
test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One ./food11\test sample ./food11\test\0001.jpg


# Testing and generate prediction CSV

In [16]:
# model_best = Classifier().to(device)
model_best = ResNet(n_layer).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [17]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv(_exp_name+"_submission.csv",index = False)

In [18]:
if logToWandb:
    wandb.finish()

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Acc/train,▁▂▃▃▄▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
Acc/valid,▁▂▃▃▅▆▅▅▆▅▆▆▇▇▇▇▇▇▇▇█▇███████▇██████████
Loss/train,█▇▆▆▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁
Loss/valid,▆▅▆▅▃▂▃▃█▃▂▂▂▂▂▂▁▁▄▁▂▂▁▁▂▁▂▁▁▂▇▁▁▁▂▁▁▃▁▁
early_stop_count,▁▁▁▁▂▁▂▂▁▂▁▂▁▁▂▁▃▁▁▃▁▂▃▁▃▂▃▂▃▅▆▁▃▅▆▂▄▅▆█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
Acc/train,0.87214
Acc/valid,0.77262
Loss/train,0.37761
Loss/valid,1.21624
early_stop_count,35.0
epoch,262.0


# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [19]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1, hue=0),
    transforms.RandomPerspective(distortion_scale=0.6, p=0.6),
    transforms.RandomAffine(degrees=(-30, 30), translate=(0, 0.4), scale=(0.8, 1.5)),
    
    transforms.ToTensor(),
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [20]:
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        
        x1 = self.relu(x1)
        
        x2 = self.cnn_layer2(x1)
        
        x2 = x1 + x2
        
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        
        x3 = self.relu(x3)
        
        x4 = self.cnn_layer4(x3)
        
        x4 = x3 + x4
        
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        
        x5 = self.relu(x5)
        
        x6 = self.cnn_layer6(x5)
        
        x6 = x5 + x6
        
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout

## ensemble csv

In [25]:
import numpy as np

def load_csv(filename):
    file = np.loadtxt( open ( filename , encoding= 'utf8' ), dtype=np.str , delimiter= ',' )
    return [row for row in file if row[0]!='']

csv1 = np.array(load_csv('Restnet1_submission.csv'))
csv1

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  file = np.loadtxt( open ( filename , encoding= 'utf8' ), dtype=np.str , delimiter= ',' )


array([['Id', 'Category', 'Category', ..., 'Category', 'Category',
        'Category'],
       ['1', '0', '9', ..., '4', '2', '4'],
       ['2', '9', '9', ..., '9', '9', '9'],
       ...,
       ['3345', '0', '0', ..., '0', '0', '0'],
       ['3346', '1', '2', ..., '2', '2', '2'],
       ['3347', '4', '4', ..., '4', '4', '4']], dtype='<U8')

In [38]:
idx = 1
csv2 = [list(csv1[0])]
for row in csv1[1:]:
    vals,counts = np.unique(row, return_counts=True)
    index = np.argmax(counts)
#     print(vals[index])
    csv2.append([idx,vals[index]])
    idx +=1

In [39]:
csv2

[['Id',
  'Category',
  'Category',
  'Category',
  'Category',
  'Category',
  'Category',
  'Category'],
 [1, '4'],
 [2, '9'],
 [3, '0'],
 [4, '2'],
 [5, '3'],
 [6, '9'],
 [7, '4'],
 [8, '9'],
 [9, '5'],
 [10, '9'],
 [11, '9'],
 [12, '0'],
 [13, '3'],
 [14, '2'],
 [15, '4'],
 [16, '3'],
 [17, '9'],
 [18, '10'],
 [19, '3'],
 [20, '4'],
 [21, '9'],
 [22, '10'],
 [23, '10'],
 [24, '5'],
 [25, '2'],
 [26, '2'],
 [27, '10'],
 [28, '2'],
 [29, '3'],
 [30, '4'],
 [31, '2'],
 [32, '5'],
 [33, '9'],
 [34, '5'],
 [35, '5'],
 [36, '3'],
 [37, '10'],
 [38, '9'],
 [39, '10'],
 [40, '0'],
 [41, '5'],
 [42, '3'],
 [43, '5'],
 [44, '5'],
 [45, '0'],
 [46, '0'],
 [47, '6'],
 [48, '4'],
 [49, '5'],
 [50, '5'],
 [51, '2'],
 [52, '0'],
 [53, '3'],
 [54, '10'],
 [55, '0'],
 [56, '3'],
 [57, '2'],
 [58, '5'],
 [59, '3'],
 [60, '3'],
 [61, '5'],
 [62, '0'],
 [63, '4'],
 [64, '9'],
 [65, '8'],
 [66, '2'],
 [67, '7'],
 [68, '2'],
 [69, '3'],
 [70, '9'],
 [71, '5'],
 [72, '3'],
 [73, '2'],
 [74, '4'],
 [75, '

In [42]:
import csv
with open('result.csv', 'w', newline='') as file:
    writer = csv.writer(file, quoting=csv.QUOTE_ALL,delimiter=',')
    
    writer.writerows(csv2)