# HW3 Image Classification
## We strongly recommend that you run with Kaggle for this homework


# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [1]:
# ! wget https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip

In [2]:
# ! unzip food11.zip

# Training

In [3]:
_exp_name = "sample"

In [4]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
from torchvision import models
from sklearn.model_selection import train_test_split

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [5]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

## **Transforms**
Torchvision provides lots of useful utilities for image preprocessing, data wrapping as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [6]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    # transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.RandomResizedCrop((224,224)),
    transforms.RandomHorizontalFlip(),
    transforms.AutoAugment(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


## **Datasets**
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [7]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        try:
            # label = int(fname.split("/")[-1].split("_")[0])
            label = int(os.path.basename(fname).split("_")[0])
        except:
            label = -1 # test has no label
        return im,label



In [8]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

In [9]:
batch_size = 64
_dataset_dir = "./food11"
# Construct datasets.
files = [ os.path.join(_dataset_dir, "training", x) for x in os.listdir(os.path.join(_dataset_dir, "training")) ] + \
        [ os.path.join(_dataset_dir, "validation", x) for x in os.listdir(os.path.join(_dataset_dir, "validation")) ]
train_files, valid_files = train_test_split(files, test_size=0.1, random_state=myseed)
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm, files=train_files)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm, files=valid_files)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One ./food11\training sample ./food11\training\2_238.jpg
One ./food11\validation sample ./food11\training\2_986.jpg


In [10]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 200
patience = 12 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
# model = Classifier().to(device)
model = models.resnet18()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 11)
model.to(device)
try:
    model.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
    print("Train from previous saved state.")
except:
    print("Train from scratch.")

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-5) 
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-2)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=8, eta_min=1e-6) # Customized learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.618, patience=patience // 3, verbose=True) # Customized learning rate scheduler
# scaler = torch.cuda.amp.GradScaler()

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):
        
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        # print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        # with torch.cuda.amp.autocast():
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Compute the gradients for parameters.
        loss.backward()
        # scaler.scale(loss).backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()
        # scaler.step(optimizer)
        # scaler.update()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    print(f"Learning rate: {optimizer.param_groups[0]['lr']}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        # with torch.cuda.amp.autocast():
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
    
    scheduler.step(metrics=valid_acc) # Update the learning rate scheduler
    
    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch + 1}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping. So far the best validation acc = {best_acc:.5f}.")
            break

Train from scratch.


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 2.18384, acc = 0.22813
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 2.12793, acc = 0.25176
[ Valid | 001/200 ] loss = 2.12793, acc = 0.25176 -> best
Best model found at epoch 1, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 2.04293, acc = 0.27796
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 1.85176, acc = 0.36080
[ Valid | 002/200 ] loss = 1.85176, acc = 0.36080 -> best
Best model found at epoch 2, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 1.97353, acc = 0.31140
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 1.85770, acc = 0.36077
[ Valid | 003/200 ] loss = 1.85770, acc = 0.36077


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 1.90882, acc = 0.34265
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 1.73604, acc = 0.40732
[ Valid | 004/200 ] loss = 1.73604, acc = 0.40732 -> best
Best model found at epoch 4, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 1.83443, acc = 0.36002
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 1.61767, acc = 0.43911
[ Valid | 005/200 ] loss = 1.61767, acc = 0.43911 -> best
Best model found at epoch 5, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 1.81087, acc = 0.37438
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 1.61140, acc = 0.47396
[ Valid | 006/200 ] loss = 1.61140, acc = 0.47396 -> best
Best model found at epoch 6, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 1.74343, acc = 0.39870
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 1.71697, acc = 0.41869
[ Valid | 007/200 ] loss = 1.71697, acc = 0.41869


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 1.69336, acc = 0.42194
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 1.52549, acc = 0.48289
[ Valid | 008/200 ] loss = 1.52549, acc = 0.48289 -> best
Best model found at epoch 8, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 1.65114, acc = 0.43397
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 1.49703, acc = 0.50744
[ Valid | 009/200 ] loss = 1.49703, acc = 0.50744 -> best
Best model found at epoch 9, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 1.61621, acc = 0.45253
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 1.74320, acc = 0.46396
[ Valid | 010/200 ] loss = 1.74320, acc = 0.46396


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 1.58639, acc = 0.46591
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.49271, acc = 0.49884
[ Valid | 011/200 ] loss = 1.49271, acc = 0.49884


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 1.53762, acc = 0.47986
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 1.39454, acc = 0.52402
[ Valid | 012/200 ] loss = 1.39454, acc = 0.52402 -> best
Best model found at epoch 12, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 1.50310, acc = 0.49131
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.34146, acc = 0.55729
[ Valid | 013/200 ] loss = 1.34146, acc = 0.55729 -> best
Best model found at epoch 13, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 1.44387, acc = 0.51079
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 1.14232, acc = 0.62497
[ Valid | 014/200 ] loss = 1.14232, acc = 0.62497 -> best
Best model found at epoch 14, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 1.44118, acc = 0.51414
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 1.22544, acc = 0.58247
[ Valid | 015/200 ] loss = 1.22544, acc = 0.58247


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 1.38755, acc = 0.52882
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 1.19415, acc = 0.61033
[ Valid | 016/200 ] loss = 1.19415, acc = 0.61033


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 1.38292, acc = 0.53576
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 1.16957, acc = 0.60426
[ Valid | 017/200 ] loss = 1.16957, acc = 0.60426


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 1.32756, acc = 0.54997
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 1.15012, acc = 0.61967
[ Valid | 018/200 ] loss = 1.15012, acc = 0.61967


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 1.31279, acc = 0.55648
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 1.18740, acc = 0.60256
[ Valid | 019/200 ] loss = 1.18740, acc = 0.60256


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 1.27883, acc = 0.57194
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 1.11560, acc = 0.62744
[ Valid | 020/200 ] loss = 1.11560, acc = 0.62744 -> best
Best model found at epoch 20, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 1.26589, acc = 0.57939
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 1.04267, acc = 0.64943
[ Valid | 021/200 ] loss = 1.04267, acc = 0.64943 -> best
Best model found at epoch 21, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 1.24170, acc = 0.57765
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 0.99910, acc = 0.67193
[ Valid | 022/200 ] loss = 0.99910, acc = 0.67193 -> best
Best model found at epoch 22, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 1.22573, acc = 0.59000
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 1.08724, acc = 0.63926
[ Valid | 023/200 ] loss = 1.08724, acc = 0.63926


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 1.17560, acc = 0.60424
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 1.11369, acc = 0.63890
[ Valid | 024/200 ] loss = 1.11369, acc = 0.63890


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 1.17987, acc = 0.60253
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 1.16227, acc = 0.63762
[ Valid | 025/200 ] loss = 1.16227, acc = 0.63762


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 1.17126, acc = 0.61131
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 0.95637, acc = 0.67979
[ Valid | 026/200 ] loss = 0.95637, acc = 0.67979 -> best
Best model found at epoch 26, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 1.14582, acc = 0.61442
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 0.94771, acc = 0.68429
[ Valid | 027/200 ] loss = 0.94771, acc = 0.68429 -> best
Best model found at epoch 27, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 1.14561, acc = 0.61759
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 1.06945, acc = 0.63902
[ Valid | 028/200 ] loss = 1.06945, acc = 0.63902


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 1.11846, acc = 0.62110
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 0.99405, acc = 0.67470
[ Valid | 029/200 ] loss = 0.99405, acc = 0.67470


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 030/200 ] loss = 1.10614, acc = 0.63055
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 030/200 ] loss = 1.08350, acc = 0.65164
[ Valid | 030/200 ] loss = 1.08350, acc = 0.65164


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 031/200 ] loss = 1.07426, acc = 0.63964
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 031/200 ] loss = 1.02256, acc = 0.65821
[ Valid | 031/200 ] loss = 1.02256, acc = 0.65821


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 032/200 ] loss = 1.08346, acc = 0.63554
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 032/200 ] loss = 0.88178, acc = 0.70161
[ Valid | 032/200 ] loss = 0.88178, acc = 0.70161 -> best
Best model found at epoch 32, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 033/200 ] loss = 1.04665, acc = 0.64725
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 033/200 ] loss = 0.92978, acc = 0.67949
[ Valid | 033/200 ] loss = 0.92978, acc = 0.67949


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 034/200 ] loss = 1.05563, acc = 0.64551
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 034/200 ] loss = 1.00095, acc = 0.67693
[ Valid | 034/200 ] loss = 1.00095, acc = 0.67693


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 035/200 ] loss = 1.04013, acc = 0.64867
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 035/200 ] loss = 0.83045, acc = 0.73643
[ Valid | 035/200 ] loss = 0.83045, acc = 0.73643 -> best
Best model found at epoch 35, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 036/200 ] loss = 1.02698, acc = 0.65443
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 036/200 ] loss = 0.88592, acc = 0.69756
[ Valid | 036/200 ] loss = 0.88592, acc = 0.69756


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 037/200 ] loss = 1.00759, acc = 0.65879
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 037/200 ] loss = 0.80559, acc = 0.74155
[ Valid | 037/200 ] loss = 0.80559, acc = 0.74155 -> best
Best model found at epoch 37, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 038/200 ] loss = 1.00057, acc = 0.66130
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 038/200 ] loss = 0.99250, acc = 0.67524
[ Valid | 038/200 ] loss = 0.99250, acc = 0.67524


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 039/200 ] loss = 0.98891, acc = 0.67126
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 039/200 ] loss = 0.86007, acc = 0.72253
[ Valid | 039/200 ] loss = 0.86007, acc = 0.72253


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 040/200 ] loss = 0.96378, acc = 0.67932
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 040/200 ] loss = 0.79824, acc = 0.73911
[ Valid | 040/200 ] loss = 0.79824, acc = 0.73911


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 041/200 ] loss = 0.98485, acc = 0.67098
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 041/200 ] loss = 0.81549, acc = 0.72604
[ Valid | 041/200 ] loss = 0.81549, acc = 0.72604


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 042/200 ] loss = 0.97686, acc = 0.67565
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 042/200 ] loss = 0.83761, acc = 0.73369
[ Valid | 042/200 ] loss = 0.83761, acc = 0.73369


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 043/200 ] loss = 0.96256, acc = 0.68152
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 043/200 ] loss = 0.93307, acc = 0.70583
[ Valid | 043/200 ] loss = 0.93307, acc = 0.70583


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 044/200 ] loss = 0.95354, acc = 0.68518
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 044/200 ] loss = 0.76230, acc = 0.74815
[ Valid | 044/200 ] loss = 0.76230, acc = 0.74815 -> best
Best model found at epoch 44, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 045/200 ] loss = 0.95029, acc = 0.68261
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 045/200 ] loss = 0.81259, acc = 0.74815
[ Valid | 045/200 ] loss = 0.81259, acc = 0.74815


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 046/200 ] loss = 0.92918, acc = 0.68863
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 046/200 ] loss = 0.79420, acc = 0.73176
[ Valid | 046/200 ] loss = 0.79420, acc = 0.73176


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 047/200 ] loss = 0.92310, acc = 0.69596
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 047/200 ] loss = 0.76148, acc = 0.75738
[ Valid | 047/200 ] loss = 0.76148, acc = 0.75738 -> best
Best model found at epoch 47, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 048/200 ] loss = 0.92110, acc = 0.69673
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 048/200 ] loss = 0.91049, acc = 0.71720
[ Valid | 048/200 ] loss = 0.91049, acc = 0.71720


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 049/200 ] loss = 0.89974, acc = 0.70376
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 049/200 ] loss = 0.82283, acc = 0.73021
[ Valid | 049/200 ] loss = 0.82283, acc = 0.73021


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 050/200 ] loss = 0.91371, acc = 0.69773
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 050/200 ] loss = 0.76903, acc = 0.73762
[ Valid | 050/200 ] loss = 0.76903, acc = 0.73762


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 051/200 ] loss = 0.90338, acc = 0.70015
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 051/200 ] loss = 0.83149, acc = 0.73241
[ Valid | 051/200 ] loss = 0.83149, acc = 0.73241


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 052/200 ] loss = 0.89957, acc = 0.70232
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 052/200 ] loss = 0.86503, acc = 0.72667
[ Valid | 052/200 ] loss = 0.86503, acc = 0.72667


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 053/200 ] loss = 0.89002, acc = 0.70643
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 053/200 ] loss = 0.84523, acc = 0.72839
[ Valid | 053/200 ] loss = 0.84523, acc = 0.72839


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 054/200 ] loss = 0.87183, acc = 0.70959
Learning rate: 0.0003


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 054/200 ] loss = 0.78505, acc = 0.75324
Epoch 00054: reducing learning rate of group 0 to 1.8540e-04.
[ Valid | 054/200 ] loss = 0.78505, acc = 0.75324


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 055/200 ] loss = 0.80857, acc = 0.73409
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 055/200 ] loss = 0.64778, acc = 0.79818
[ Valid | 055/200 ] loss = 0.64778, acc = 0.79818 -> best
Best model found at epoch 55, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 056/200 ] loss = 0.80757, acc = 0.73166
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 056/200 ] loss = 0.67472, acc = 0.77810
[ Valid | 056/200 ] loss = 0.67472, acc = 0.77810


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 057/200 ] loss = 0.79424, acc = 0.73625
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 057/200 ] loss = 0.71663, acc = 0.76610
[ Valid | 057/200 ] loss = 0.71663, acc = 0.76610


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 058/200 ] loss = 0.77716, acc = 0.74294
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 058/200 ] loss = 0.70319, acc = 0.78182
[ Valid | 058/200 ] loss = 0.70319, acc = 0.78182


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 059/200 ] loss = 0.77903, acc = 0.73984
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 059/200 ] loss = 0.67251, acc = 0.78313
[ Valid | 059/200 ] loss = 0.67251, acc = 0.78313


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 060/200 ] loss = 0.76293, acc = 0.74956
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 060/200 ] loss = 0.64534, acc = 0.78780
[ Valid | 060/200 ] loss = 0.64534, acc = 0.78780


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 061/200 ] loss = 0.75655, acc = 0.74987
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 061/200 ] loss = 0.67407, acc = 0.77557
[ Valid | 061/200 ] loss = 0.67407, acc = 0.77557


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 062/200 ] loss = 0.74630, acc = 0.75054
Learning rate: 0.00018539999999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 062/200 ] loss = 0.69737, acc = 0.78875
Epoch 00062: reducing learning rate of group 0 to 1.1458e-04.
[ Valid | 062/200 ] loss = 0.69737, acc = 0.78875


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 063/200 ] loss = 0.72177, acc = 0.76048
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 063/200 ] loss = 0.61797, acc = 0.80232
[ Valid | 063/200 ] loss = 0.61797, acc = 0.80232 -> best
Best model found at epoch 63, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 064/200 ] loss = 0.71185, acc = 0.76584
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 064/200 ] loss = 0.62731, acc = 0.79182
[ Valid | 064/200 ] loss = 0.62731, acc = 0.79182


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 065/200 ] loss = 0.69827, acc = 0.77011
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 065/200 ] loss = 0.59284, acc = 0.81095
[ Valid | 065/200 ] loss = 0.59284, acc = 0.81095 -> best
Best model found at epoch 65, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 066/200 ] loss = 0.68728, acc = 0.77217
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 066/200 ] loss = 0.65676, acc = 0.78854
[ Valid | 066/200 ] loss = 0.65676, acc = 0.78854


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 067/200 ] loss = 0.68806, acc = 0.77093
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 067/200 ] loss = 0.62707, acc = 0.79458
[ Valid | 067/200 ] loss = 0.62707, acc = 0.79458


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 068/200 ] loss = 0.67401, acc = 0.77637
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 068/200 ] loss = 0.63479, acc = 0.78277
[ Valid | 068/200 ] loss = 0.63479, acc = 0.78277


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 069/200 ] loss = 0.67206, acc = 0.77746
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 069/200 ] loss = 0.64979, acc = 0.78991
[ Valid | 069/200 ] loss = 0.64979, acc = 0.78991


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 070/200 ] loss = 0.68425, acc = 0.77386
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 070/200 ] loss = 0.60790, acc = 0.81518
[ Valid | 070/200 ] loss = 0.60790, acc = 0.81518 -> best
Best model found at epoch 70, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 071/200 ] loss = 0.67119, acc = 0.77595
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 071/200 ] loss = 0.62433, acc = 0.80619
[ Valid | 071/200 ] loss = 0.62433, acc = 0.80619


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 072/200 ] loss = 0.65471, acc = 0.78313
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 072/200 ] loss = 0.60889, acc = 0.80116
[ Valid | 072/200 ] loss = 0.60889, acc = 0.80116


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 073/200 ] loss = 0.66755, acc = 0.77870
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 073/200 ] loss = 0.64096, acc = 0.79405
[ Valid | 073/200 ] loss = 0.64096, acc = 0.79405


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 074/200 ] loss = 0.63520, acc = 0.78548
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 074/200 ] loss = 0.65703, acc = 0.79509
[ Valid | 074/200 ] loss = 0.65703, acc = 0.79509


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 075/200 ] loss = 0.66069, acc = 0.77927
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 075/200 ] loss = 0.59375, acc = 0.82170
[ Valid | 075/200 ] loss = 0.59375, acc = 0.82170 -> best
Best model found at epoch 75, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 076/200 ] loss = 0.62148, acc = 0.79492
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 076/200 ] loss = 0.62915, acc = 0.80414
[ Valid | 076/200 ] loss = 0.62915, acc = 0.80414


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 077/200 ] loss = 0.64022, acc = 0.78472
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 077/200 ] loss = 0.64654, acc = 0.79702
[ Valid | 077/200 ] loss = 0.64654, acc = 0.79702


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 078/200 ] loss = 0.65314, acc = 0.78473
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 078/200 ] loss = 0.60618, acc = 0.80500
[ Valid | 078/200 ] loss = 0.60618, acc = 0.80500


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 079/200 ] loss = 0.65894, acc = 0.78072
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 079/200 ] loss = 0.60069, acc = 0.81339
[ Valid | 079/200 ] loss = 0.60069, acc = 0.81339


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 080/200 ] loss = 0.62812, acc = 0.79583
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 080/200 ] loss = 0.59697, acc = 0.81307
[ Valid | 080/200 ] loss = 0.59697, acc = 0.81307


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 081/200 ] loss = 0.64151, acc = 0.78664
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 081/200 ] loss = 0.62979, acc = 0.80723
[ Valid | 081/200 ] loss = 0.62979, acc = 0.80723


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 082/200 ] loss = 0.63483, acc = 0.79167
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 082/200 ] loss = 0.57113, acc = 0.83158
[ Valid | 082/200 ] loss = 0.57113, acc = 0.83158 -> best
Best model found at epoch 82, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 083/200 ] loss = 0.62330, acc = 0.78699
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 083/200 ] loss = 0.67570, acc = 0.79607
[ Valid | 083/200 ] loss = 0.67570, acc = 0.79607


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 084/200 ] loss = 0.62787, acc = 0.78956
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 084/200 ] loss = 0.59414, acc = 0.81711
[ Valid | 084/200 ] loss = 0.59414, acc = 0.81711


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 085/200 ] loss = 0.62297, acc = 0.79183
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 085/200 ] loss = 0.59996, acc = 0.80958
[ Valid | 085/200 ] loss = 0.59996, acc = 0.80958


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 086/200 ] loss = 0.61123, acc = 0.79852
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 086/200 ] loss = 0.58157, acc = 0.82753
[ Valid | 086/200 ] loss = 0.58157, acc = 0.82753


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 087/200 ] loss = 0.60853, acc = 0.79641
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 087/200 ] loss = 0.57981, acc = 0.81613
[ Valid | 087/200 ] loss = 0.57981, acc = 0.81613


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 088/200 ] loss = 0.61721, acc = 0.79592
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 088/200 ] loss = 0.61671, acc = 0.82190
[ Valid | 088/200 ] loss = 0.61671, acc = 0.82190


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 089/200 ] loss = 0.60273, acc = 0.80437
Learning rate: 0.00011457719999999998


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 089/200 ] loss = 0.65576, acc = 0.81137
Epoch 00089: reducing learning rate of group 0 to 7.0809e-05.
[ Valid | 089/200 ] loss = 0.65576, acc = 0.81137


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 090/200 ] loss = 0.57473, acc = 0.81030
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 090/200 ] loss = 0.55053, acc = 0.83890
[ Valid | 090/200 ] loss = 0.55053, acc = 0.83890 -> best
Best model found at epoch 90, saving model


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 091/200 ] loss = 0.58647, acc = 0.80185
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 091/200 ] loss = 0.55832, acc = 0.82923
[ Valid | 091/200 ] loss = 0.55832, acc = 0.82923


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 092/200 ] loss = 0.55894, acc = 0.81230
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 092/200 ] loss = 0.57041, acc = 0.82857
[ Valid | 092/200 ] loss = 0.57041, acc = 0.82857


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 093/200 ] loss = 0.56508, acc = 0.81816
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 093/200 ] loss = 0.55922, acc = 0.83304
[ Valid | 093/200 ] loss = 0.55922, acc = 0.83304


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 094/200 ] loss = 0.55875, acc = 0.81598
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 094/200 ] loss = 0.54752, acc = 0.82604
[ Valid | 094/200 ] loss = 0.54752, acc = 0.82604


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 095/200 ] loss = 0.55331, acc = 0.82090
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 095/200 ] loss = 0.60764, acc = 0.81988
[ Valid | 095/200 ] loss = 0.60764, acc = 0.81988


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 096/200 ] loss = 0.56567, acc = 0.81607
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 096/200 ] loss = 0.56017, acc = 0.82402
[ Valid | 096/200 ] loss = 0.56017, acc = 0.82402


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 097/200 ] loss = 0.56279, acc = 0.81425
Learning rate: 7.080870959999999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 097/200 ] loss = 0.58159, acc = 0.81179
Epoch 00097: reducing learning rate of group 0 to 4.3760e-05.
[ Valid | 097/200 ] loss = 0.58159, acc = 0.81179


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 098/200 ] loss = 0.53772, acc = 0.82301
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 098/200 ] loss = 0.55063, acc = 0.83062
[ Valid | 098/200 ] loss = 0.55063, acc = 0.83062


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 099/200 ] loss = 0.52287, acc = 0.82684
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 099/200 ] loss = 0.55855, acc = 0.82911
[ Valid | 099/200 ] loss = 0.55855, acc = 0.82911


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 100/200 ] loss = 0.53601, acc = 0.82652
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 100/200 ] loss = 0.55534, acc = 0.83092
[ Valid | 100/200 ] loss = 0.55534, acc = 0.83092


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 101/200 ] loss = 0.52341, acc = 0.82701
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 101/200 ] loss = 0.54499, acc = 0.83708
[ Valid | 101/200 ] loss = 0.54499, acc = 0.83708


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 102/200 ] loss = 0.52642, acc = 0.82443
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 102/200 ] loss = 0.55731, acc = 0.83473
[ Valid | 102/200 ] loss = 0.55731, acc = 0.83473


  0%|          | 0/187 [00:00<?, ?it/s]

[ Train | 103/200 ] loss = 0.53418, acc = 0.82231
Learning rate: 4.375978253279999e-05


  0%|          | 0/21 [00:00<?, ?it/s]

[ Valid | 103/200 ] loss = 0.56591, acc = 0.83051
[ Valid | 103/200 ] loss = 0.56591, acc = 0.83051
No improvment 12 consecutive epochs, early stopping. So far the best validation acc = 0.83890.


In [10]:
def make_predictions(model, data_loader, device="cuda" if torch.cuda.is_available() else "cpu"):
     
    model.eval()
     
    predictions, ground_truths = None, None
     
    for batch in tqdm(data_loader):
        
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
        
        if predictions is None:
            predictions = logits.detach().cpu().numpy()
        else:
            predictions = np.vstack((predictions, logits.detach().cpu().numpy()))
            
        if ground_truths is None:
            ground_truths = labels.detach().cpu().numpy()
        else:
            ground_truths = np.hstack((ground_truths, labels.detach().cpu().numpy()))
    
    return predictions, ground_truths

In [11]:
# Test time augmentation for validation set
n_tta = 15 # Number of augmentations for each image
tta_ratio = 0.9 # (1 - tta_ratio) * raw prediction logits + tta_ratio * average tta prediction logits

raw_valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm, files=valid_files)
tta_valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=train_tfm, files=valid_files)
raw_valid_loader = DataLoader(raw_valid_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
tta_valid_loader = DataLoader(tta_valid_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.resnet18()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 11)
model.to(device)
model.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model.eval()

raw_predictions, ground_truths = make_predictions(model, raw_valid_loader)

tta_predictions = None

for _ in range(n_tta):
    
    tmp_tta_predictions, _ = make_predictions(model, tta_valid_loader)
        
    if tta_predictions is None:
        tta_predictions = tmp_tta_predictions
    else:
        tta_predictions += tmp_tta_predictions

final_predictions = (1-tta_ratio) * raw_predictions + tta_ratio * tta_predictions / n_tta

tta_valid_acc = (final_predictions.argmax(axis=-1) == ground_truths).mean()

print(f"TTA Valid acc = {tta_valid_acc:.5f}")

One ./food11\validation sample ./food11\training\2_986.jpg
One ./food11\validation sample ./food11\training\2_986.jpg


  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

TTA Valid acc = 0.88346


In [12]:
# test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
# test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [13]:
# model_best = Classifier().to(device)
# model_best = model
# model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
# model_best.eval()
# prediction = []
# with torch.no_grad():
#     for data,_ in test_loader:
#         test_pred = model_best(data.to(device))
#         test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
#         prediction += test_label.squeeze().tolist()

In [14]:
# Test time augmentation for test set
n_tta = 15 # Number of augmentations for each image
tta_ratio = 0.9 # (1 - tta_ratio) * raw prediction logits + tta_ratio * average tta prediction logits

test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
tta_test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=train_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
tta_test_loader = DataLoader(tta_test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

model = models.resnet18()
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 11)
model.to(device)
model.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model.eval()

raw_predictions, _ = make_predictions(model, test_loader)

tta_predictions = None

for _ in range(n_tta):
    
    tmp_tta_predictions, _ = make_predictions(model, tta_test_loader)
        
    if tta_predictions is None:
        tta_predictions = tmp_tta_predictions
    else:
        tta_predictions += tmp_tta_predictions

prediction = (1-tta_ratio) * raw_predictions + tta_ratio * tta_predictions / n_tta
prediction = np.argmax(prediction, axis=-1).squeeze().tolist()

One ./food11\test sample ./food11\test\0001.jpg
One ./food11\test sample ./food11\test\0001.jpg


  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

  0%|          | 0/53 [00:00<?, ?it/s]

In [15]:
#create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
df["Category"] = prediction
df.to_csv("submission.csv",index = False)

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [14]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You need to add some transforms here.
    transforms.ToTensor(),
])

# Q2. Residual Implementation
![](https://i.imgur.com/GYsq1Ap.png)
## Directly copy the following block and paste it on GradeScope after you finish the code


In [15]:
from torch import nn
class Residual_Network(nn.Module):
    def __init__(self):
        super(Residual_Network, self).__init__()
        
        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(64, 64, 3, 1, 1),
            nn.BatchNorm2d(64),
        )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, 2, 1),
            nn.BatchNorm2d(128),
        )

        self.cnn_layer4 = nn.Sequential(
            nn.Conv2d(128, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
        )
        self.cnn_layer5 = nn.Sequential(
            nn.Conv2d(128, 256, 3, 2, 1),
            nn.BatchNorm2d(256),
        )
        self.cnn_layer6 = nn.Sequential(
            nn.Conv2d(256, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(256* 32* 32, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )
        self.relu = nn.ReLU()

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x1 = self.cnn_layer1(x)
        
        x1 = self.relu(x1)
        
        x2 = self.cnn_layer2(x1) + x1
        
        x2 = self.relu(x2)
        
        x3 = self.cnn_layer3(x2)
        
        x3 = self.relu(x3)
        
        x4 = self.cnn_layer4(x3) + x3
        
        x4 = self.relu(x4)
        
        x5 = self.cnn_layer5(x4)
        
        x5 = self.relu(x5)
        
        x6 = self.cnn_layer6(x5) + x5
        
        x6 = self.relu(x6)
        
        # The extracted feature map must be flatten before going to fully-connected layers.
        xout = x6.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        xout = self.fc_layer(xout)
        return xout