In [1]:
from datetime import datetime

import torch
import torch.nn as nn
import torch.nn.functional as F
from dogs import dogs
import torch.optim as optim
from torch.optim import lr_scheduler
from torchinfo import summary

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy
from scipy import io
# from tqdm.notebook import tqdm
from coatnet import *

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import time
import os

plt.ion()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


def load_datasets(input_size=224):
    train_transforms = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    train_dataset = dogs(root="./data",
                         train=True,
                         cropped=True,
                         transform=train_transforms,
                         download=False)
    test_dataset = dogs(root="./data",
                        train=False,
                        cropped=True,
                        transform=test_transform,
                        download=False)

    # combine train and test datasets

    classes = train_dataset.classes
    print("Training set stats:")
    train_dataset.stats()
    print("Testing set stats:")
    test_dataset.stats()

    return train_dataset, test_dataset, classes


def fwd_pass(model, loader, loss_function, optimizer, train=False):
    if train:
        model.train()
    else:
        model.eval()

    total_loss = 0
    y_pred, y = [], []
    for X, labels in loader:
        labels = labels.type(torch.LongTensor)
        X, labels = X.to(device), labels.to(device)
        # ===================손실 함수 계산=====================
        outputs = model(X)
        loss = loss_function(outputs, labels)
        total_loss += loss.item()
        # ===================학습====================
        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # ===============평가===============
        with torch.no_grad():
            y_hat = outputs.argmax(dim=1)
            y_pred.extend(y_hat.cpu().numpy().tolist())
            y.extend(labels.cpu().numpy().tolist())

    with torch.no_grad():
        total_loss /= len(loader.dataset)
        acc = accuracy_score(y, y_pred) * 100
        f1 = f1_score(y, y_pred, average="weighted") * 100

    return total_loss, acc, f1


logs = dict(trainlosses=list(), testlosses=list(),
            trainaccs=list(), testaccs=list())


def timeSince(since: float) -> str:
    s = time.time() - since
    return f"{int(s / 60):2}m {int(s % 60):02}s"


def train(model, train_loader, test_loader,
          optimizer, loss_function, epoch_num=1, device=device):
    model.to(device)
    torch.manual_seed(14)

    start = time.time()
    print(f"Training Process Starts at {datetime.now().strftime('%H:%M:%S')} ... ")

    epoch = 1

    # loop over the dataset multiple times
    while epoch <= epoch_num:
        # Training
        train_loss, train_acc, train_f1 = fwd_pass(model, train_loader, loss_function, optimizer, train=True)

        # Testing
        with torch.no_grad():
            test_loss, test_acc, test_f1 = fwd_pass(model, test_loader, loss_function, optimizer,
                                                    train=False)

        reset = '\n' if epoch <= 5 or epoch % 5 == 0 else '\r'
        print(f"Epoch[{epoch:2d}]>>>",
              f"Train/Test loss: {train_loss:.7f}/{test_loss:.7f},",
              f"Acc.: {train_acc:.2f}/{test_acc:.2f}",
              f"[[{timeSince(start)}]]", end=reset)

        # ===================log========================
        logs['trainlosses'].append(train_loss)
        logs['trainaccs'].append(train_acc)
        logs['testlosses'].append(test_loss)
        logs['testaccs'].append(test_acc)

        epoch += 1

    print(f'F1 Score: {test_f1}')
    print(f'Finished in {timeSince(start)}')
    return logs


train_data, test_data, classes = load_datasets(224)
dataset_sizes = train_data.__len__()
batch_size = 16

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=0)

num_blocks = [2, 2, 6, 12, 2]  # L
channels = [64, 96, 192, 384, 768]  # D
block_types = ['C', 'C', 'T', 'T']

model = CoAtNet((224, 224), 3, num_blocks, channels, num_classes=len(classes), block_types=block_types)

criterion = nn.CrossEntropyLoss()
learning_rate = 1e-3
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

Training set stats:
12877 samples spanning 120 classes (avg 107.308333 per class)
Testing set stats:
9249 samples spanning 120 classes (avg 77.075000 per class)


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [None]:
summary(model, input_size=(batch_size, 3, 224, 224))

In [2]:
model.load_state_dict(torch.load('./savemodel/dogbreed1.pth'))
model.to(device)
model.eval()

CoAtNet(
  (s0): Sequential(
    (0): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
    (1): Sequential(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): GELU()
    )
  )
  (s1): Sequential(
    (0): MBConv(
      (pool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (proj): Conv2d(64, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (conv): PreNorm(
        (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (fn): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [4]:
logs = train(model, train_loader=train_loader, test_loader=test_loader,
             optimizer=optimizer, loss_function=criterion,
             epoch_num=3, device=device)

Training Process Starts at 17:43:26 ... 
Epoch[ 1]>>> Train/Test loss: 0.0078367/0.0714398, Acc.: 95.81/97.84 [[10m 53s]]
Epoch[ 2]>>> Train/Test loss: 0.0067343/0.0763198, Acc.: 96.58/97.63 [[21m 47s]]
Epoch[ 3]>>> Train/Test loss: 0.0064167/0.0631362, Acc.: 96.53/98.15 [[32m 38s]]
F1 Score: 98.15673013989462
Finished in 32m 38s


In [3]:
torch.save(model, './savemodel/dogbreed2.pth')

In [None]:
logs = train(model, train_loader=train_loader, test_loader=test_loader,
             optimizer=optimizer, loss_function=criterion,
             epoch_num=10, device=device)

In [None]:
torch.cuda.empty_cache()

In [15]:
class_correct = [0.] * 120
class_total = [0.] * 120
y_test, y_pred = [], []
X_test = []

with torch.no_grad():
    for images, labels in test_loader:
        X_test.extend([_ for _ in images])
        outputs = model(images.to(device))
        _, predicted = torch.topk(outputs, 3)
        predicted = predicted.cpu()
        c = (labels in predicted)
        for i, label in enumerate(labels):
            class_correct[label] += int(c)
            class_total[label] += 1
        y_pred.extend(predicted.numpy())
        y_test.extend(labels.cpu().numpy())

for i in range(120):
    print(f"Acuracy of {classes[i]:5s}: {100 * class_correct[i] / class_total[i]:2.0f}%")

Acuracy of Chihuaha: 100%
Acuracy of Japanese Spaniel: 100%
Acuracy of Maltese Dog: 100%
Acuracy of Pekinese: 98%
Acuracy of Shih-Tzu: 100%
Acuracy of Blenheim Spaniel: 100%
Acuracy of Papillon: 100%
Acuracy of Toy Terrier: 100%
Acuracy of Rhodesian Ridgeback: 100%
Acuracy of Afghan Hound: 100%
Acuracy of Basset Hound: 100%
Acuracy of Beagle: 100%
Acuracy of Bloodhound: 100%
Acuracy of Bluetick: 100%
Acuracy of Black-and-tan Coonhound: 100%
Acuracy of Walker Hound: 100%
Acuracy of English Foxhound: 100%
Acuracy of Redbone: 100%
Acuracy of Borzoi: 100%
Acuracy of Irish Wolfhound: 100%
Acuracy of Italian Greyhound: 100%
Acuracy of Whippet: 100%
Acuracy of Ibizian Hound: 100%
Acuracy of Norwegian Elkhound: 100%
Acuracy of Otterhound: 100%
Acuracy of Saluki: 100%
Acuracy of Scottish Deerhound: 100%
Acuracy of Weimaraner: 100%
Acuracy of Staffordshire Bullterrier: 100%
Acuracy of American Staffordshire Terrier: 100%
Acuracy of Bedlington Terrier: 100%
Acuracy of Border Terrier: 100%
Acuracy

In [3]:
from PIL import Image

transformer = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

img = Image.open('./imgs/10.jpg')
img = transformer(img)
output = model(img.unsqueeze(0).to(device))
print(classes[output.argmax(dim=1)])
# print(output.topk(3))

top3 = torch.topk(output, 3,dim=1)
predict_list = [int(x) for x in top3.indices.squeeze()]
print(predict_list)

English Setter
[61, 65, 66]


In [4]:
for i in predict_list:
    print(f"{classes[i]:5s}")

English Setter
Clumber
English Springer Spaniel
