# B09705039_劉惟恩_HW05

## Q1

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
from PIL import Image
import os
import glob
import random

# count amount of all labels and folders
labels_out = ['blazer', 'cardigan', 'coat', 'jacket']
folder = ['train', 'valid', 'test']
all_count = dict()

for j in range(3):
    temp_count = dict()
    for i in range(4):
        basepath = os.path.join("/tmp/dataset-ntu-photo/photos", folder[j], labels_out[i], "*.jpg")
        cand_fn = glob.glob(basepath)
        count = 0
        for afn in cand_fn:
            count += 1
        temp_count[labels_out[i]] = count
    all_count[folder[j]] = temp_count

In [2]:
# sum of all folders
for j in range(3):
    print(folder[j] + " total count:", sum(all_count[folder[j]].values()))

train total count: 1041
valid total count: 105
test total count: 146


In [3]:
# proportion of each label
for j in range(3):
    print(folder[j] + ":")
    total = sum(all_count[folder[j]].values())
    for i in range(4):
        print(labels_out[i] + " count:", all_count[folder[j]][labels_out[i]])
        print(labels_out[i] + " porportion:", round(all_count[folder[j]][labels_out[i]] * 100 / total, 2), "%")
    print()

train:
blazer count: 97
blazer porportion: 9.32 %
cardigan count: 237
cardigan porportion: 22.77 %
coat count: 296
coat porportion: 28.43 %
jacket count: 411
jacket porportion: 39.48 %

valid:
blazer count: 7
blazer porportion: 6.67 %
cardigan count: 36
cardigan porportion: 34.29 %
coat count: 27
coat porportion: 25.71 %
jacket count: 35
jacket porportion: 33.33 %

test:
blazer count: 9
blazer porportion: 6.16 %
cardigan count: 42
cardigan porportion: 28.77 %
coat count: 43
coat porportion: 29.45 %
jacket count: 52
jacket porportion: 35.62 %



Since blazer has the least data we expect that the accuracy of blazer will be lower than all the other labels. Cardigan and coat will have a similar accuracy while coat may be slightly higher due to it has larger training data. Finally, jacket is expected to have the highest accuracy among all the other labels since it has the largest training data.

## Q2

Import and setup

In [4]:
_exp_name = "q2"

In [5]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
import torchvision

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [6]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

Transform picture

In [7]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

Dataset

In [8]:
class Dataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(Dataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path, labels_out[i], j) for i in range(4) for j in os.listdir(os.path.join(path, labels_out[i])) if not j.startswith(".")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        label = -2
        lname = fname.split("/")[-2]
        if lname == labels_out[0]:
            label = 0
        elif lname == labels_out[1]:
            label = 1
        elif lname == labels_out[2]:
            label = 2
        elif lname == labels_out[3]:
            label = 3
        else:
            label = -1
        return im,label

Training model

In [9]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        model = torchvision.models.resnet50(pretrained=True)
        model.fc = nn.Linear(2048, 4)
        self.model = model

    def forward(self, x):
        out = self.model(x)
        out = out.view(out.size()[0], -1)
        return out

Execute

In [10]:
batch_size = 32
_dataset_dir = "/tmp/dataset-ntu-photo/photos"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = Dataset(os.path.join(_dataset_dir,"train"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = Dataset(os.path.join(_dataset_dir,"valid"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/train sample /tmp/dataset-ntu-photo/photos/train/blazer/20170324101342210_500.jpg
One /tmp/dataset-ntu-photo/photos/valid sample /tmp/dataset-ntu-photo/photos/valid/blazer/20170324142214231_500.jpg


In [11]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# The number of training epochs and patience.
n_epochs = 200
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_loss = 99999999

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
#     train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
#         train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
#     train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
#     print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
#     valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
#         valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
#     valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
#     print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_loss < best_loss:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}")


    # save models
    if valid_loss < best_loss:
        print(f"Best model found at epoch {epoch + 1}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_loss = valid_loss
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

cuda


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 1.08004


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 1.74434 -> best
Best model found at epoch 1, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 0.80230


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 0.96431 -> best
Best model found at epoch 2, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 0.65760


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 1.07475


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 0.57186


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 0.85874 -> best
Best model found at epoch 4, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 0.43903


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 0.87322


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 0.46977


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 0.87439


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 0.36668


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 0.79956 -> best
Best model found at epoch 7, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 0.32146


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 1.00684


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 0.31504


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 1.13533


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 0.26891


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 0.68470 -> best
Best model found at epoch 10, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 0.25442


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.28573


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 0.22636


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 0.90145


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 0.21800


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.11874


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 0.24703


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 1.31748


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 0.19558


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 0.68723


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 0.19239


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 0.88306


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 0.18968


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 0.92253


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 0.17069


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 0.67037 -> best
Best model found at epoch 18, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 0.17471


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 0.91266


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 0.14773


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 1.01319


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 0.16943


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 1.29900


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 0.16483


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 0.86668


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 0.12906


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 0.74885


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 0.14640


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 0.65150 -> best
Best model found at epoch 24, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 0.13969


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 0.80111


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 0.12842


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 0.95515


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 0.13302


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 1.12922


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 0.10484


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 1.09469


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 0.12099


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 1.00206


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 030/200 ] loss = 0.11992


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 030/200 ] loss = 0.83665


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 031/200 ] loss = 0.08999


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 031/200 ] loss = 1.16421


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 032/200 ] loss = 0.06902


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 032/200 ] loss = 1.00127


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 033/200 ] loss = 0.06055


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 033/200 ] loss = 0.89436


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 034/200 ] loss = 0.08694


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 034/200 ] loss = 1.11995


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 035/200 ] loss = 0.10464


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 035/200 ] loss = 0.66782


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 036/200 ] loss = 0.12674


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 036/200 ] loss = 0.99951


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 037/200 ] loss = 0.13688


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 037/200 ] loss = 1.37497


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 038/200 ] loss = 0.11341


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 038/200 ] loss = 0.77318


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 039/200 ] loss = 0.10586


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 039/200 ] loss = 0.84657


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 040/200 ] loss = 0.08234


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 040/200 ] loss = 0.80303


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 041/200 ] loss = 0.17789


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 041/200 ] loss = 0.94057


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 042/200 ] loss = 0.14139


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 042/200 ] loss = 0.71460


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 043/200 ] loss = 0.07590


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 043/200 ] loss = 0.57539 -> best
Best model found at epoch 43, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 044/200 ] loss = 0.05167


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 044/200 ] loss = 0.70178


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 045/200 ] loss = 0.09043


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 045/200 ] loss = 1.27288


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 046/200 ] loss = 0.10666


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 046/200 ] loss = 1.08822


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 047/200 ] loss = 0.05571


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 047/200 ] loss = 1.12708


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 048/200 ] loss = 0.08644


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 048/200 ] loss = 1.01969


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 049/200 ] loss = 0.10073


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 049/200 ] loss = 1.12883


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 050/200 ] loss = 0.08819


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 050/200 ] loss = 0.94568


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 051/200 ] loss = 0.08337


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 051/200 ] loss = 1.44284


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 052/200 ] loss = 0.09975


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 052/200 ] loss = 1.20131


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 053/200 ] loss = 0.07271


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 053/200 ] loss = 0.61900


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 054/200 ] loss = 0.09030


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 054/200 ] loss = 0.82712


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 055/200 ] loss = 0.06938


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 055/200 ] loss = 1.02767


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 056/200 ] loss = 0.09564


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 056/200 ] loss = 1.40927


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 057/200 ] loss = 0.05105


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 057/200 ] loss = 1.42932


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 058/200 ] loss = 0.09090


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 058/200 ] loss = 1.12344


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 059/200 ] loss = 0.13603


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 059/200 ] loss = 0.85250


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 060/200 ] loss = 0.11068


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 060/200 ] loss = 0.51695 -> best
Best model found at epoch 60, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 061/200 ] loss = 0.07798


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 061/200 ] loss = 0.72624


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 062/200 ] loss = 0.03430


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 062/200 ] loss = 0.89439


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 063/200 ] loss = 0.02816


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 063/200 ] loss = 1.26942


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 064/200 ] loss = 0.04194


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 064/200 ] loss = 0.94898


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 065/200 ] loss = 0.03105


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 065/200 ] loss = 0.83475


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 066/200 ] loss = 0.05256


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 066/200 ] loss = 1.12169


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 067/200 ] loss = 0.06031


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 067/200 ] loss = 1.30317


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 068/200 ] loss = 0.03777


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 068/200 ] loss = 0.76398


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 069/200 ] loss = 0.07703


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 069/200 ] loss = 0.93488


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 070/200 ] loss = 0.13277


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 070/200 ] loss = 1.40926


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 071/200 ] loss = 0.10003


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 071/200 ] loss = 1.25791


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 072/200 ] loss = 0.14277


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 072/200 ] loss = 0.97155


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 073/200 ] loss = 0.08916


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 073/200 ] loss = 0.89639


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 074/200 ] loss = 0.05843


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 074/200 ] loss = 0.81604


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 075/200 ] loss = 0.03365


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 075/200 ] loss = 0.71875


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 076/200 ] loss = 0.02625


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 076/200 ] loss = 0.77298


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 077/200 ] loss = 0.01778


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 077/200 ] loss = 1.13219


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 078/200 ] loss = 0.02347


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 078/200 ] loss = 1.16789


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 079/200 ] loss = 0.01565


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 079/200 ] loss = 1.23663


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 080/200 ] loss = 0.03376


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 080/200 ] loss = 1.48351


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 081/200 ] loss = 0.03351


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 081/200 ] loss = 1.52200
No improvment 20 consecutive epochs, early stopping


Predict test

In [12]:
# load test data
test_set = Dataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/test sample /tmp/dataset-ntu-photo/photos/test/blazer/20170324131109229_500.jpg


In [13]:
# predict test data
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [14]:
print(prediction)

[0, 2, 0, 0, 0, 3, 0, 0, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 1, 1, 1, 0, 3, 1, 1, 3, 2, 1, 1, 1, 1, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 1, 3, 1, 3, 3, 3, 1, 3, 3, 1, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 0, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 1]


In [15]:
# get true label of test data
true_labels = []
for batch in tqdm(test_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    true_labels += labels.squeeze().tolist()

  0%|          | 0/5 [00:00<?, ?it/s]

In [16]:
print(true_labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


In [17]:
# Calculate test scores
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from mlxtend.evaluate import accuracy_score

print(classification_report(true_labels, prediction, target_names=labels_out))
matrix = confusion_matrix(true_labels, prediction)
print("Accuracy:", accuracy_score(true_labels, prediction))
print("Confusion Matrix:\n", matrix)
print("Per-Class Accuracy:")
for i in range(4):
    print(labels_out[i], "accuracy:", accuracy_score(true_labels, prediction, method='binary', pos_label=i))

              precision    recall  f1-score   support

      blazer       0.70      0.78      0.74         9
    cardigan       0.80      0.79      0.80        42
        coat       0.87      0.93      0.90        43
      jacket       0.88      0.83      0.85        52

    accuracy                           0.84       146
   macro avg       0.81      0.83      0.82       146
weighted avg       0.84      0.84      0.84       146

Accuracy: 0.8424657534246576
Confusion Matrix:
 [[ 7  0  1  1]
 [ 2 33  3  4]
 [ 0  2 40  1]
 [ 1  6  2 43]]
Per-Class Accuracy:
blazer accuracy: 0.9657534246575342
cardigan accuracy: 0.8835616438356164
coat accuracy: 0.9383561643835616
jacket accuracy: 0.8972602739726028


After testing SGD and Adam, we prefer to use Adam since it has much better accuracy rate. We guess Adam performs better than SGD because the improvement of SGD slows down in the middle and is much easier to occur early stopping since we stop the model within 20 epochs. Also by doing some reasearch we recommend lr=0.0003, weight_decay=1e-5 to be good hyperparameters.

The accuracy of blazer is the highest which we didn't expect since it has the smallest training dataset. This might happen because the testing data is to small and these data tend to fit the trained model very well. The accuracy of coat is higher than cardigan as we expected since it has larger training data. However, we didn't expect jacket to perform better than coat. This may happen due to jacket occur overfitting because of the large training dataset or because it has a larger testing set and there exist some outliers which is hard to predict.

## Q3

Import and setup

In [43]:
_exp_name = "q3"

In [41]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
import torchvision

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [42]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

Transform picture

In [44]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

Dataset

In [45]:
class Dataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(Dataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path, labels_out[i], j) for i in range(4) for j in os.listdir(os.path.join(path, labels_out[i])) if not j.startswith(".")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        label = -2
        lname = fname.split("/")[-2]
        if lname == labels_out[0]:
            label = 0
        elif lname == labels_out[1]:
            label = 1
        elif lname == labels_out[2]:
            label = 2
        elif lname == labels_out[3]:
            label = 3
        else:
            label = -1
        return im,label

Training model

In [51]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.model = torchvision.models.resnet50(pretrained=True)
        # Freeze the parameters in the resnet50 pretrained model
        for param in self.model.parameters():
            param.requires_grad = False
        # modify the last layer
        self.model.fc = nn.Linear(2048, 4)

    def forward(self, x):
        out = self.model(x)
        out = out.view(out.size()[0], -1)
        return out

Execute

In [47]:
batch_size = 32
_dataset_dir = "/tmp/dataset-ntu-photo/photos"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = Dataset(os.path.join(_dataset_dir,"train"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = Dataset(os.path.join(_dataset_dir,"valid"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/train sample /tmp/dataset-ntu-photo/photos/train/blazer/20170324101342210_500.jpg
One /tmp/dataset-ntu-photo/photos/valid sample /tmp/dataset-ntu-photo/photos/valid/blazer/20170324142214231_500.jpg


In [52]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# The number of training epochs and patience.
n_epochs = 200
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_loss = 99999999

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
#     train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
#         train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
#     train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
#     print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
#     valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
#         valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
#     valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
#     print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_loss < best_loss:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}")


    # save models
    if valid_loss < best_loss:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_loss = valid_loss
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

cuda


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 1.30227


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 1.22694 -> best
Best model found at epoch 0, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 1.20800


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 1.16017 -> best
Best model found at epoch 1, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 1.18000


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 1.17781


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 1.12913


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 1.10178 -> best
Best model found at epoch 3, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 1.09773


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 1.14488


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 1.06309


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 1.04770 -> best
Best model found at epoch 5, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 1.07041


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 1.10080


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 1.01840


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 0.97383 -> best
Best model found at epoch 7, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 1.02539


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 1.02476


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 0.99877


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 1.07133


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 1.00725


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.03860


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 1.00250


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 1.02927


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 0.97478


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.04474


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 0.99462


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 1.08866


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 0.95401


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 0.97273 -> best
Best model found at epoch 14, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 0.95631


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 0.93567 -> best
Best model found at epoch 15, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 0.96047


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 0.99600


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 0.93843


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 1.06209


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 0.94829


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 1.00214


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 0.96939


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 0.97753


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 0.91639


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 0.96163


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 0.93376


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 0.93636


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 0.91911


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 0.97939


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 0.90918


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 0.97010


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 0.90079


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 1.01727


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 0.89904


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 1.13976


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 0.91029


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 1.05589


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 0.89057


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 0.92555 -> best
Best model found at epoch 27, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 0.87674


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 1.05231


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 030/200 ] loss = 0.87604


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 030/200 ] loss = 0.98678


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 031/200 ] loss = 0.88573


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 031/200 ] loss = 1.00901


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 032/200 ] loss = 0.87918


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 032/200 ] loss = 0.93266


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 033/200 ] loss = 0.91643


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 033/200 ] loss = 0.95711


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 034/200 ] loss = 0.89326


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 034/200 ] loss = 0.97034


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 035/200 ] loss = 0.88659


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 035/200 ] loss = 1.04993


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 036/200 ] loss = 0.90199


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 036/200 ] loss = 0.97908


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 037/200 ] loss = 0.89406


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 037/200 ] loss = 0.94313


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 038/200 ] loss = 0.88067


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 038/200 ] loss = 1.15787


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 039/200 ] loss = 0.85936


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 039/200 ] loss = 0.91674 -> best
Best model found at epoch 38, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 040/200 ] loss = 0.88980


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 040/200 ] loss = 0.95402


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 041/200 ] loss = 0.88586


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 041/200 ] loss = 0.89917 -> best
Best model found at epoch 40, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 042/200 ] loss = 0.86894


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 042/200 ] loss = 0.96395


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 043/200 ] loss = 0.87973


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 043/200 ] loss = 0.95321


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 044/200 ] loss = 0.87455


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 044/200 ] loss = 0.99181


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 045/200 ] loss = 0.83692


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 045/200 ] loss = 0.95594


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 046/200 ] loss = 0.87819


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 046/200 ] loss = 0.95311


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 047/200 ] loss = 0.86320


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 047/200 ] loss = 0.99459


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 048/200 ] loss = 0.86804


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 048/200 ] loss = 1.04032


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 049/200 ] loss = 0.82496


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 049/200 ] loss = 0.93741


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 050/200 ] loss = 0.88560


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 050/200 ] loss = 0.96300


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 051/200 ] loss = 0.83254


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 051/200 ] loss = 0.97191


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 052/200 ] loss = 0.87854


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 052/200 ] loss = 0.92238


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 053/200 ] loss = 0.85886


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 053/200 ] loss = 1.01297


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 054/200 ] loss = 0.84067


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 054/200 ] loss = 0.91614


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 055/200 ] loss = 0.84943


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 055/200 ] loss = 0.91688


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 056/200 ] loss = 0.83274


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 056/200 ] loss = 0.95334


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 057/200 ] loss = 0.84142


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 057/200 ] loss = 0.98387


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 058/200 ] loss = 0.81132


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 058/200 ] loss = 0.96561


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 059/200 ] loss = 0.86290


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 059/200 ] loss = 1.06333


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 060/200 ] loss = 0.85956


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 060/200 ] loss = 0.89221 -> best
Best model found at epoch 59, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 061/200 ] loss = 0.83424


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 061/200 ] loss = 1.07409


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 062/200 ] loss = 0.84403


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 062/200 ] loss = 0.98767


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 063/200 ] loss = 0.81433


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 063/200 ] loss = 1.06143


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 064/200 ] loss = 0.81588


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 064/200 ] loss = 0.94455


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 065/200 ] loss = 0.84547


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 065/200 ] loss = 0.89249


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 066/200 ] loss = 0.83576


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 066/200 ] loss = 1.05398


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 067/200 ] loss = 0.81252


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 067/200 ] loss = 1.12070


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 068/200 ] loss = 0.82966


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 068/200 ] loss = 1.00289


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 069/200 ] loss = 0.81881


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 069/200 ] loss = 1.05859


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 070/200 ] loss = 0.80784


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 070/200 ] loss = 0.89238


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 071/200 ] loss = 0.83866


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 071/200 ] loss = 0.90360


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 072/200 ] loss = 0.83672


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 072/200 ] loss = 0.94781


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 073/200 ] loss = 0.83007


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 073/200 ] loss = 0.98958


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 074/200 ] loss = 0.81769


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 074/200 ] loss = 0.88932 -> best
Best model found at epoch 73, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 075/200 ] loss = 0.84197


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 075/200 ] loss = 0.95696


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 076/200 ] loss = 0.79414


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 076/200 ] loss = 0.92999


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 077/200 ] loss = 0.82451


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 077/200 ] loss = 1.01183


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 078/200 ] loss = 0.81681


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 078/200 ] loss = 1.01198


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 079/200 ] loss = 0.82494


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 079/200 ] loss = 0.91876


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 080/200 ] loss = 0.80671


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 080/200 ] loss = 0.89250


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 081/200 ] loss = 0.81628


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 081/200 ] loss = 0.94840


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 082/200 ] loss = 0.82295


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 082/200 ] loss = 0.93604


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 083/200 ] loss = 0.84171


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 083/200 ] loss = 0.91034


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 084/200 ] loss = 0.81621


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 084/200 ] loss = 0.92594


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 085/200 ] loss = 0.81788


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 085/200 ] loss = 1.13851


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 086/200 ] loss = 0.78640


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 086/200 ] loss = 0.96368


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 087/200 ] loss = 0.81697


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 087/200 ] loss = 0.91643


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 088/200 ] loss = 0.76172


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 088/200 ] loss = 0.96218


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 089/200 ] loss = 0.80407


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 089/200 ] loss = 0.99113


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 090/200 ] loss = 0.77283


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 090/200 ] loss = 0.91551


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 091/200 ] loss = 0.79725


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 091/200 ] loss = 0.96742


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 092/200 ] loss = 0.81988


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 092/200 ] loss = 0.98807


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 093/200 ] loss = 0.79840


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 093/200 ] loss = 0.93519


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 094/200 ] loss = 0.81514


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 094/200 ] loss = 1.04947


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 095/200 ] loss = 0.81988


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 095/200 ] loss = 0.98942
No improvment 20 consecutive epochs, early stopping


Predict test

In [53]:
# load test data
test_set = Dataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/test sample /tmp/dataset-ntu-photo/photos/test/blazer/20170324131109229_500.jpg


In [54]:
# predict test data
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [55]:
print(prediction)

[0, 3, 3, 0, 1, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 3, 2, 3, 1, 1, 3, 1, 2, 2, 3, 1, 2, 2, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1, 2, 0, 2, 1, 2, 1, 2, 2, 1, 3, 1, 2, 0, 2, 2, 2, 3, 2, 2, 3, 3, 3, 2, 2, 3, 2, 2, 2, 3, 3, 1, 2, 2, 3, 1, 2, 2, 1, 2, 3, 1, 2, 2, 1, 3, 3, 3, 1, 3, 1, 3, 1, 3, 3, 1, 3, 3, 3, 3, 2, 3, 2, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 2, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 0, 3, 1, 3, 1]


In [56]:
# get true label of test data
true_labels = []
for batch in tqdm(test_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    true_labels += labels.squeeze().tolist()

  0%|          | 0/5 [00:00<?, ?it/s]

In [57]:
print(true_labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


In [58]:
# Calculate test scores
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from mlxtend.evaluate import accuracy_score

print(classification_report(true_labels, prediction, target_names=labels_out))
matrix = confusion_matrix(true_labels, prediction)
print("Accuracy:", accuracy_score(true_labels, prediction))
print("Confusion Matrix:\n", matrix)
print("Per-Class Accuracy:")
for i in range(4):
    print(labels_out[i], "accuracy:", accuracy_score(true_labels, prediction, method='binary', pos_label=i))

              precision    recall  f1-score   support

      blazer       0.57      0.44      0.50         9
    cardigan       0.55      0.62      0.58        42
        coat       0.66      0.53      0.59        43
      jacket       0.58      0.63      0.61        52

    accuracy                           0.59       146
   macro avg       0.59      0.56      0.57       146
weighted avg       0.59      0.59      0.59       146

Accuracy: 0.589041095890411
Confusion Matrix:
 [[ 4  1  0  4]
 [ 0 26  6 10]
 [ 2  8 23 10]
 [ 1 12  6 33]]
Per-Class Accuracy:
blazer accuracy: 0.9452054794520548
cardigan accuracy: 0.7465753424657534
coat accuracy: 0.7808219178082192
jacket accuracy: 0.7054794520547946


By only training the last fully connected layer, it has worse performace compared with Q2 which trains the whole CNN network.

## Q4

In [26]:
_exp_name = "q4"

In [27]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
import torchvision

# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [28]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

Transform picture

In [29]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

Dataset

In [30]:
class Dataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(Dataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path, labels_out[i], j) for i in range(4) for j in os.listdir(os.path.join(path, labels_out[i])) if not j.startswith(".")])
        if files != None:
            self.files = files
        print(f"One {path} sample",self.files[0])
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        #im = self.data[idx]
        label = -2
        lname = fname.split("/")[-2]
        if lname == labels_out[0]:
            label = 0
        elif lname == labels_out[1]:
            label = 1
        elif lname == labels_out[2]:
            label = 2
        elif lname == labels_out[3]:
            label = 3
        else:
            label = -1
        return im,label

Training model

In [31]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        model = torchvision.models.resnet50(pretrained=False)
        model.fc = nn.Linear(2048, 4)
        self.model = model

    def forward(self, x):
        out = self.model(x)
        out = out.view(out.size()[0], -1)
        return out

Execute

In [33]:
batch_size = 32
_dataset_dir = "/tmp/dataset-ntu-photo/photos"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = Dataset(os.path.join(_dataset_dir,"train"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = Dataset(os.path.join(_dataset_dir,"valid"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/train sample /tmp/dataset-ntu-photo/photos/train/blazer/20170324101342210_500.jpg
One /tmp/dataset-ntu-photo/photos/valid sample /tmp/dataset-ntu-photo/photos/valid/blazer/20170324142214231_500.jpg


In [34]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

# The number of training epochs and patience.
n_epochs = 200
patience = 20 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5) 

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_loss = 99999999

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
#     train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
#         train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
#     train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
#     print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
#     valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
#         acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
#         valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
#     valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
#     print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_loss < best_loss:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
#             print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}")


    # save models
    if valid_loss < best_loss:
        print(f"Best model found at epoch {epoch + 1}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_loss = valid_loss
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

cuda


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 001/200 ] loss = 1.42943


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 001/200 ] loss = 1.27955 -> best
Best model found at epoch 1, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 002/200 ] loss = 1.34628


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 002/200 ] loss = 1.61030


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 003/200 ] loss = 1.28866


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 003/200 ] loss = 1.22429 -> best
Best model found at epoch 3, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 004/200 ] loss = 1.34728


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 004/200 ] loss = 1.36504


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 005/200 ] loss = 1.37213


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 005/200 ] loss = 1.70372


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 006/200 ] loss = 1.32315


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 006/200 ] loss = 1.49149


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 007/200 ] loss = 1.35752


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 007/200 ] loss = 1.26067


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 008/200 ] loss = 1.29045


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 008/200 ] loss = 1.19424 -> best
Best model found at epoch 8, saving model


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 009/200 ] loss = 1.26870


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 009/200 ] loss = 1.33334


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 010/200 ] loss = 1.24706


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 010/200 ] loss = 1.42942


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 011/200 ] loss = 1.23046


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 011/200 ] loss = 1.20848


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 012/200 ] loss = 1.24977


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 012/200 ] loss = 1.25778


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 013/200 ] loss = 1.19080


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 013/200 ] loss = 1.55633


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 014/200 ] loss = 1.20675


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 014/200 ] loss = 1.20975


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 015/200 ] loss = 1.14791


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 015/200 ] loss = 1.21208


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 016/200 ] loss = 1.18780


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 016/200 ] loss = 1.40389


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 017/200 ] loss = 1.17616


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 017/200 ] loss = 1.28061


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 018/200 ] loss = 1.13901


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 018/200 ] loss = 1.59350


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 019/200 ] loss = 1.17908


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 019/200 ] loss = 1.46975


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 020/200 ] loss = 1.16278


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 020/200 ] loss = 1.77998


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 021/200 ] loss = 1.10066


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 021/200 ] loss = 1.44999


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 022/200 ] loss = 1.17515


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 022/200 ] loss = 1.32642


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 023/200 ] loss = 1.08906


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 023/200 ] loss = 1.22245


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 024/200 ] loss = 1.08750


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 024/200 ] loss = 1.21406


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 025/200 ] loss = 1.10582


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 025/200 ] loss = 1.30476


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 026/200 ] loss = 1.07830


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 026/200 ] loss = 1.37721


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 027/200 ] loss = 1.07788


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 027/200 ] loss = 1.36082


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 028/200 ] loss = 1.05318


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 028/200 ] loss = 1.26773


  0%|          | 0/33 [00:00<?, ?it/s]

[ Train | 029/200 ] loss = 1.05783


  0%|          | 0/4 [00:00<?, ?it/s]

[ Valid | 029/200 ] loss = 1.61803
No improvment 20 consecutive epochs, early stopping


Predict test

In [35]:
# load test data
test_set = Dataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

One /tmp/dataset-ntu-photo/photos/test sample /tmp/dataset-ntu-photo/photos/test/blazer/20170324131109229_500.jpg


In [36]:
# predict test data
model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in test_loader:
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

In [37]:
print(prediction)

[1, 1, 1, 3, 1, 1, 1, 2, 3, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 2, 1, 1, 3, 3, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1, 3, 2, 1, 3, 3, 3, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 3, 1, 1, 1, 2, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 1, 2, 3, 1, 3, 3, 1, 1, 1, 1, 3, 3, 2, 1, 3, 3, 1, 1, 3, 1, 1, 3, 3, 3, 1, 1, 1, 1]


In [38]:
# get true label of test data
true_labels = []
for batch in tqdm(test_loader):

    # A batch consists of image data and corresponding labels.
    imgs, labels = batch
    true_labels += labels.squeeze().tolist()

  0%|          | 0/5 [00:00<?, ?it/s]

In [39]:
print(true_labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]


In [40]:
# Calculate test scores
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from mlxtend.evaluate import accuracy_score

print(classification_report(true_labels, prediction, target_names=labels_out))
matrix = confusion_matrix(true_labels, prediction)
print("Accuracy:", accuracy_score(true_labels, prediction))
print("Confusion Matrix:\n", matrix)
print("Per-Class Accuracy:")
for i in range(4):
    print(labels_out[i], "accuracy:", accuracy_score(true_labels, prediction, method='binary', pos_label=i))

              precision    recall  f1-score   support

      blazer       0.00      0.00      0.00         9
    cardigan       0.36      0.76      0.48        42
        coat       0.43      0.07      0.12        43
      jacket       0.45      0.42      0.44        52

    accuracy                           0.39       146
   macro avg       0.31      0.31      0.26       146
weighted avg       0.39      0.39      0.33       146

Accuracy: 0.3904109589041096
Confusion Matrix:
 [[ 0  6  1  2]
 [ 0 32  1  9]
 [ 0 24  3 16]
 [ 0 28  2 22]]
Per-Class Accuracy:
blazer accuracy: 0.9383561643835616
cardigan accuracy: 0.5342465753424658
coat accuracy: 0.6986301369863014
jacket accuracy: 0.6095890410958904


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


The result with pretrained model is much better than the no pretrained one.

## Q5

The accuracy with pretrain model is far better than without pretrain model, and the model that trains all the parameters are better than the one that only trains the last fully connected network. The accuracy of each class keeps dropping from Q2 to Q4, however, the distributions are similar blazer accuracy > coat accuracy > cardigan accuracy and jacket accuracy. The accuracy of all tesing data keeps decreasing 0.8424657534246576 > 0.589041095890411 > 0.3904109589041096. We can conclude that pretraining highly influences the result and tuning all the parameters from the pretrained data is the best way to have high accuracy score.

Reference: https://colab.research.google.com/drive/15hMu9YiYjE_6HY99UXon2vKGk2KwugWu