# HW3 Image Classification

## Get Data

### Download Data

In [166]:
# !curl -L "https://www.dropbox.com/s/6l2vcvxl54b0b6w/food11.zip" -o food11.zip


### Manually Unzip

## Preparation

### Importing

In [167]:
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

from tqdm.auto import tqdm, trange
import random

from torch.utils.tensorboard import SummaryWriter


### Transforms

In [168]:
normalize_tfm = transforms.Compose([
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_tfm_without_norm = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.AutoAugment(policy=transforms.AutoAugmentPolicy.IMAGENET),
    transforms.ToTensor(),
])

train_tfm = transforms.Compose([train_tfm_without_norm, normalize_tfm])

test_tfm_without_norm = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

test_tfm = transforms.Compose([test_tfm_without_norm, normalize_tfm])

tta_tfm = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomRotation(30)])


### Define Dataset

In [169]:
class FoodDataset(Dataset):

    def __init__(self, path, tfm=test_tfm, files=None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([
            os.path.join(path, x)
            for x in os.listdir(path)
            if x.endswith(".jpg")
        ])
        if files:
            self.files = files
        print(f"One {path} sample", self.files[0])
        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self, index):
        file_name = self.files[index]
        image = Image.open(file_name)
        image = self.transform(image)

        try:
            # name of an image:  .../[label]_[id].jpg
            base_name = os.path.basename(file_name)
            label = int(base_name.split("_")[0])
        except:
            label = -1
        return image, label


### Define Model

In [170]:
class Classifier(nn.Module):

    def __init__(self, dropout_rate=0.0):
        super(Classifier, self).__init__()

        # input dim: [3, 128, 128]
        model = models.efficientnet_b0(
            weights=models.EfficientNet_B0_Weights.DEFAULT)
        num_features = model.classifier[-1].out_features
        model.classifier.append(nn.Linear(num_features, 11))
        self.cnn = model

    def forward(self, x):
        out = self.cnn(x)
        return out


### Hyperparameters

In [171]:
# device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

seed = 3407

num_epoch = 500
early_stop = 20

batch_size = 32
learning_rate = 1e-4
weight_decay = 1e-5
dropout_rate = 0.0

tta_iterations = 10
origin_weight = 5

dataset_dir = './food11'
model_path = './model.ckpt'


cuda


### Fixing seed

In [172]:
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


same_seeds(seed)


### Model

In [173]:
model = Classifier().to(device)


### Optimizer


In [174]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(),
                              lr=learning_rate,
                              weight_decay=weight_decay)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)


### Dataset

In [175]:
train_set = FoodDataset(os.path.join(dataset_dir, "training"), tfm=train_tfm)
train_loader = DataLoader(train_set,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0,
                          pin_memory=True)
valid_set = FoodDataset(os.path.join(dataset_dir, "validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0,
                          pin_memory=True)


One ./food11\training sample ./food11\training\0_0.jpg
One ./food11\validation sample ./food11\validation\0_0.jpg


## Training

In [176]:
best_acc = 0.0
not_improving = 0

pbar = trange(num_epoch)
writer = SummaryWriter()  # Writer of tensoboard.
step = 0

for epoch in pbar:

    # training
    train_acc = 0.0
    train_loss = 0.0
    model.train()

    for batch in tqdm(train_loader, leave=False, desc="Training"):
        features, labels = batch
        features = features.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(features)

        loss = criterion(outputs, labels)
        loss.backward()
        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        optimizer.step()

        step += 1

        # torch.max -> max_values, max_value_indices
        train_pred = outputs.argmax(dim=-1)
        train_acc += (train_pred.detach() == labels.detach()).sum().item()
        train_loss += loss.item()

    # validation
    valid_acc = 0.0
    valid_loss = 0.0
    model.eval()

    with torch.no_grad():
        for batch in tqdm(valid_loader, leave=False, desc='Validation'):
            features, labels = batch
            features = features.to(device)
            labels = labels.to(device)

            outputs = model(features)
            loss = criterion(outputs, labels)

            val_pred = outputs.argmax(dim=-1)
            valid_acc += (val_pred.cpu() == labels.cpu()).sum().item()
            valid_loss += loss.item()

    train_acc /= len(train_set)
    train_loss /= len(train_loader)
    valid_acc /= len(valid_set)
    valid_loss /= len(valid_loader)

    scheduler.step(valid_loss)
    current_lr = optimizer.param_groups[0]['lr']

    # save best model on validation
    if valid_acc > best_acc:
        not_improving = 0
        best_acc = valid_acc
        torch.save(model.state_dict(), model_path)
    else:
        not_improving += 1

    # display
    info_str = f"Best Acc: {best_acc:.2%}, Train Loss: {train_loss:.2f}, Train Acc: {train_acc:.2%}, Valid Acc: {valid_acc:.2%}, LR: {current_lr:.2e}" + (
        f", Not Improved for {not_improving} epochs" if not_improving else "")
    pbar.set_postfix_str(info_str)

    writer.add_scalar('Loss/train', train_loss, step)
    writer.add_scalar('Loss/valid', valid_loss, step)

    if not_improving >= early_stop:
        break


  0%|          | 0/500 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

Training:   0%|          | 0/309 [00:00<?, ?it/s]

Validation:   0%|          | 0/108 [00:00<?, ?it/s]

# Test & Predict

In [181]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/  --port 6006


Reusing TensorBoard on port 6006 (pid 92204), started 1 day, 2:44:30 ago. (Use '!kill 92204' to kill it.)

In [182]:
test_set = FoodDataset(os.path.join(dataset_dir, "test"), tfm=test_tfm)
test_loader = DataLoader(test_set,
                         batch_size=batch_size,
                         shuffle=False,
                         num_workers=0,
                         pin_memory=True)


One ./food11\test sample ./food11\test\0001.jpg


In [183]:
best_model = Classifier().to(device)
best_model.load_state_dict(torch.load("model.ckpt"))
best_model.eval()
prediction = []
with torch.no_grad():
    for image, _ in tqdm(test_loader, desc="Predicting"):
        origin_image = image.to(device)
        origin_pred = best_model(origin_image)
        origin_label = torch.argmax(origin_pred, dim=1)
        voting_predictions = [origin_label] * origin_weight

        for _ in range(tta_iterations):
            transformed_image = tta_tfm(image)
            transformed_image = transformed_image.to(device)

            tta_pred = best_model(transformed_image)
            tta_label = torch.argmax(tta_pred, dim=1)
            voting_predictions.append(tta_label)

        stacked_predictions = torch.stack(voting_predictions, dim=1)
        vote_res = torch.mode(stacked_predictions, dim=1).values
        vote_label = vote_res.cpu().data.numpy()

        prediction += vote_label.squeeze().tolist()


Predicting:   0%|          | 0/105 [00:00<?, ?it/s]

In [184]:
#create test csv
def pad4(i):
    return "0" * (4 - len(str(i))) + str(i)


df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(1, len(test_set) + 1)]
df["Category"] = prediction
df.to_csv("submission.csv", index=False)
