# [모의 캐글-의료] 흉부 CT 코로나 감염 여부 분류
- 이미지 binary 분류 과제
- 담당: 이녕민M

## Import Libraries

In [1]:
# !apt-get update && apt-get install -y python3-opencv

In [2]:
# !pip install sklearn
# !pip install tqdm
# !pip install torchvision
# !pip install wandb

In [3]:
import os, torch, copy, cv2, sys, random
# from datetime import datetime, timezone, timedelta
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms

## Set Arguments & hyperparameters

In [4]:
# 시드(seed) 설정

RANDOM_SEED = 2022

torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [5]:
# parameters

### 데이터 디렉토리 설정 ###
DATA_DIR= 'data'
NUM_CLS = 2

EPOCHS = 30
BATCH_SIZE = 32
LEARNING_RATE = 0.0005
EARLY_STOPPING_PATIENCE = 10

# vggnet, resnet = 224
# basecnn = 128
INPUT_SHAPE = 224

os.environ["CUDA_VISIBLE_DEVICES"]="0"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# DEVICE = torch.device('cpu')
DEVICE

device(type='cuda')

In [6]:
torch.__version__

'1.10.2+cu102'

In [7]:
# import random
# import numpy as np
# import os
# import cv2
# import glob
# from PIL import Image
# import PIL.ImageOps    

# #다음 변수를 수정하여 새로 만들 이미지 갯수를 정합니다.
# num_augmented_images = 400

# file_path = 'data/train/'
# file_names = os.listdir(file_path)
# total_origin_image_num = len(file_names)
# augment_cnt = 1

# for i in range(1, num_augmented_images):
#     change_picture_index = random.randrange(1, total_origin_image_num-1)
#     print(change_picture_index)
#     print(file_names[change_picture_index])
#     file_name = file_names[change_picture_index]
    
#     origin_image_path = 'data/train/' + file_name
#     print(origin_image_path)
#     image = Image.open(origin_image_path)
#     random_augment = random.randrange(1,4)
    
#     if(random_augment == 1):
#         #이미지 좌우 반전
#         print("invert")
#         inverted_image = image.transpose(Image.FLIP_LEFT_RIGHT)
#         inverted_image.save(file_path + 'inverted_' + str(augment_cnt) + '.png')
        
#     elif(random_augment == 2):
#         #이미지 기울이기
#         print("rotate")
#         rotated_image = image.rotate(random.randrange(-20, 20))
#         rotated_image.save(file_path + 'rotated_' + str(augment_cnt) + '.png')
        
#     elif(random_augment == 3):
#         #노이즈 추가하기
#         img = cv2.imread(origin_image_path)
#         print("noise")
#         row,col,ch= img.shape
#         mean = 0
#         var = 0.1
#         sigma = var**0.5
#         gauss = np.random.normal(mean,sigma,(row,col,ch))
#         gauss = gauss.reshape(row,col,ch)
#         noisy_array = img + gauss
#         noisy_image = Image.fromarray(np.uint8(noisy_array)).convert('RGB')
#         noisy_image.save(file_path + 'noiseAdded_' + str(augment_cnt) + '.png')
        
#     augment_cnt += 1

## Dataloader

#### Train & Validation Set loader

In [7]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, mode, input_shape):
        self.data_dir = data_dir
        self.mode = mode
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Dataset split
        if self.mode == 'train':
            self.db = self.db[:int(len(self.db) * 0.9)]
        elif self.mode == 'val':
            self.db = self.db[int(len(self.db) * 0.9):]
            self.db.reset_index(inplace=True)
        else:
            print(f'!!! Invalid split {self.mode}... !!!')
            
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading ' + self.mode + ' dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        # (COVID : 1, No : 0)
        db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
        
        return db

    def __len__(self):
        return len(self.db)

    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])

        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'train',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['COVID']


# Model

## VGGNet 

In [8]:
VGG_types = {
    'VGG11' : [64, 'M', 128, 'M', 256, 256, 'M', 512,512, 'M',512,512,'M'],
    'VGG13' : [64,64, 'M', 128, 128, 'M', 256, 256, 'M', 512,512, 'M', 512,512,'M'],
    'VGG16' : [64,64, 'M', 128, 128, 'M', 256, 256,256, 'M', 512,512,512, 'M',512,512,512,'M'],
    'VGG19' : [64,64, 'M', 128, 128, 'M', 256, 256,256,256, 'M', 512,512,512,512, 'M',512,512,512,512,'M']
}

In [9]:
# define VGGnet class
class VGGnet(nn.Module):
    def __init__(self, model, in_channels=3, num_classes=NUM_CLS, init_weights=True):
        super(VGGnet,self).__init__()
        self.in_channels = in_channels

        # create conv_layers corresponding to VGG type
        self.conv_layers = self.create_conv_laters(VGG_types[model])

        self.fcs = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        
        self.softmax = nn.Softmax(dim=1)

        # weight initialization
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(-1, 512 * 7 * 7)
        x = self.fcs(x)
        x = self.softmax(x)
        return x

    # defint weight initialization function
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    
    # define a function to create conv layer taken the key of VGG_type dict 
    def create_conv_laters(self, architecture):
        layers = []
        in_channels = self.in_channels # 3

        for x in architecture:
            if type(x) == int: # int means conv layer
                out_channels = x

                layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels,
                                     kernel_size=(3,3), stride=(1,1), padding=(1,1)),
                           nn.BatchNorm2d(x),
                           nn.ReLU()]
                in_channels = x
            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))]
        
        return nn.Sequential(*layers)

# define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# creat VGGnet object
model = VGGnet('VGG13', in_channels=3, num_classes=NUM_CLS, init_weights=True).to(device)
print(model)

cuda
VGGnet(
  (conv_layers): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256, kernel_size=(3,

## torchvision.models 

In [8]:
import torchvision.models as models
import torch.nn.functional as F

class custom0(nn.Module):
    def __init__(self, num_classes =2):
        super(custom0, self).__init__()
        self.model = models.densenet201(pretrained=False, num_classes =2)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        x = self.model(x)
        x = self.softmax(x)
        
        return x

In [9]:
model = custom0().to(DEVICE)
model

custom0(
  (model): DenseNet(
    (features): Sequential(
      (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu0): ReLU(inplace=True)
      (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (denseblock1): _DenseBlock(
        (denselayer1): _DenseLayer(
          (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        )
        (denselayer2): _DenseLayer(
          (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, 

## base CNN 

In [12]:
# import torch.nn.functional as F

# class custom_CNN(nn.Module):
#     def __init__(self, num_classes):
#         super(custom_CNN, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=5)
#         self.pool = nn.MaxPool2d(kernel_size=2)
#         self.conv2 = nn.Conv2d(in_channels=8, out_channels=25, kernel_size=5)
        
#         self.fc1 = nn.Linear(in_features=25*29*29, out_features=128)
#         self.fc2 = nn.Linear(in_features=128, out_features=num_classes)
#         self.softmax = nn.Softmax(dim=1)
        
#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x))) # (32, 3, 128, 128) -> (32, 8, 62, 62)
#         x = self.pool(F.relu(self.conv2(x))) # (32, 8, 62, 62) -> (32, 25, 29, 29)
        
#         x = torch.flatten(x,1)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
        
#         output = self.softmax(x)
        
#         return output

# summary 

In [10]:
!pip install torchsummary
import torchsummary

torchsummary.summary(model, input_size = (3, 224, 224))



AttributeError: 'list' object has no attribute 'size'

## Utils
### EarlyStopper

In [10]:
class LossEarlyStopper():
    """Early stopper
    
    Attributes:
        patience (int): loss가 줄어들지 않아도 학습할 epoch 수
        patience_counter (int): loss 가 줄어들지 않을 때 마다 1씩 증가, 감소 시 0으로 리셋
        min_loss (float): 최소 loss
        stop (bool): True 일 때 학습 중단


    """

    def __init__(self, patience: int)-> None:
        self.patience = patience

        self.patience_counter = 0
        self.min_loss = np.Inf
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, loss: float)-> None:
        """Early stopping 여부 판단"""  

        if self.min_loss == np.Inf:
            self.min_loss = loss
            return None

        elif loss > self.min_loss:
            self.patience_counter += 1
            msg = f"Early stopping counter {self.patience_counter}/{self.patience}"

            if self.patience_counter == self.patience:
                self.stop = True
                
        elif loss <= self.min_loss:
            self.patience_counter = 0
            self.save_model = True
            msg = f"Validation loss decreased {self.min_loss} -> {loss}"
            self.min_loss = loss
        
        print(msg)

### Trainer

In [11]:
class Trainer():
    """ epoch에 대한 학습 및 검증 절차 정의"""
    
    def __init__(self, loss_fn, model, device, metric_fn, optimizer=None, scheduler=None):
        """ 초기화
        """
        self.loss_fn = loss_fn
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.metric_fn = metric_fn

    def train_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 학습 절차"""
        
        self.model.train()
        train_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()

            
            pred = self.model(img)
            
            # pred = torch.softmax(pred)
            # pred = torch.round(pred)
            
            loss = self.loss_fn(pred[:,1], label)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
            
            train_total_loss += loss.item()
            prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.train_mean_loss = train_total_loss / batch_index
        self.train_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Train loss: {self.train_mean_loss}, Acc: {self.train_score}, F1-Macro: {f1}'
        print(msg)

    def validate_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 검증 절차
        """
        self.model.eval()
        val_total_loss = 0
        target_lst = []
        pred_lst = []
        prob_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()
            
            pred = self.model(img)
            
            # pred = torch.softmax(pred)
            # pred = torch.round(pred)
            
            loss = self.loss_fn(pred[:,1], label)
            val_total_loss += loss.item()
            prob_lst.extend(pred[:, 1].cpu().tolist())
            target_lst.extend(label.cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.val_mean_loss = val_total_loss / batch_index
        self.validation_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        msg = f'Epoch {epoch_index}, Val loss: {self.val_mean_loss}, Acc: {self.validation_score}, F1-Macro: {f1}'
        print(msg)

### Metrics

In [12]:
from sklearn.metrics import accuracy_score, f1_score

def get_metric_fn(y_pred, y_answer):
    """ 성능을 반환하는 함수"""
    
    assert len(y_pred) == len(y_answer), 'The size of prediction and answer are not same.'
    accuracy = accuracy_score(y_answer, y_pred)
    f1 = f1_score(y_answer, y_pred, average='macro')
    return accuracy, f1

## Train
### 학습을 위한 객체 선언

#### Load Dataset & Dataloader

In [13]:
# Load dataset & dataloader
train_dataset = CustomDataset(data_dir=DATA_DIR, mode='train', input_shape=INPUT_SHAPE)
# train_dataset2 = CustomDataset(data_dir=DATA_DIR, mode='train', input_shape=INPUT_SHAPE)
validation_dataset = CustomDataset(data_dir=DATA_DIR, mode='val', input_shape=INPUT_SHAPE)

# train_dataset = torch.utils.data.ConcatDataset([train_dataset, train_dataset2])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True)
print('Train set samples:',len(train_dataset),  'Val set samples:', len(validation_dataset))

#581 - 65

Loading train dataset..
Loading val dataset..
Train set samples: 581 Val set samples: 65


#### Load model and other utils

In [14]:
# Load Model
# model = custom_CNN(NUM_CLS).to(DEVICE)


# # Save Initial Model
# torch.save(model.state_dict(), 'initial.pt')

# Set optimizer, scheduler, loss function, metric function
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler =  optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e5, max_lr=0.0001, epochs=EPOCHS, steps_per_epoch=len(train_dataloader))
loss_fn = nn.BCELoss()
metric_fn = get_metric_fn

# Set trainer
trainer = Trainer(loss_fn, model, DEVICE, metric_fn, optimizer, scheduler)

# Set earlystopper
early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE)

### epoch 단위 학습 진행

In [15]:
import wandb
wandb.login(relogin = True)

wandb.init(project = 'test2', name = 'vgg13_2')

config = wandb.config
config.learning_rate = LEARNING_RATE

for epoch_index in tqdm(range(EPOCHS)):

    trainer.train_epoch(train_dataloader, epoch_index)
    trainer.validate_epoch(validation_dataloader, epoch_index)

    # early_stopping check
    early_stopper.check_early_stopping(loss=trainer.val_mean_loss)
    
    
    wandb.log({"train_loss": trainer.train_mean_loss,
             "train_score": trainer.train_score,
             "val_loss": trainer.val_mean_loss,
             "val_score": trainer.validation_score})

    if early_stopper.stop:
        print('Early stopped')
        break

    if early_stopper.save_model:
        check_point = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }
        torch.save(check_point, 'vgg13.pt')

wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize


[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mjw_cho[0m (use `wandb login --relogin` to force relogin)


  0% 0/30 [00:00<?, ?it/s]

Epoch 0, Train loss: 0.8070046073860593, Acc: 0.5283993115318416, F1-Macro: 0.5250208855472014


  3% 1/30 [00:56<27:24, 56.72s/it]

Epoch 0, Val loss: 1.0411602854728699, Acc: 0.5384615384615384, F1-Macro: 0.4025735294117648
Epoch 1, Train loss: 0.688414517376158, Acc: 0.6557659208261618, F1-Macro: 0.6549061534806366


  7% 2/30 [01:51<25:57, 55.63s/it]

Epoch 1, Val loss: 2.0200697779655457, Acc: 0.5076923076923077, F1-Macro: 0.336734693877551
Early stopping counter 1/10
Epoch 2, Train loss: 1.0771978944540024, Acc: 0.6368330464716007, F1-Macro: 0.6364841343031755
Epoch 2, Val loss: 0.7973748501390219, Acc: 0.6, F1-Macro: 0.5706300813008129
Validation loss decreased 1.0411602854728699 -> 0.7973748501390219


 10% 3/30 [02:48<25:17, 56.21s/it]

Epoch 3, Train loss: 0.9758887704875734, Acc: 0.7211703958691911, F1-Macro: 0.7196346868744639
Epoch 3, Val loss: 4.61237907409668, Acc: 0.6, F1-Macro: 0.5706300813008129
Early stopping counter 1/10


 13% 4/30 [03:53<25:52, 59.73s/it]

Epoch 4, Train loss: 0.8357358136110835, Acc: 0.7521514629948365, F1-Macro: 0.750375939849624
Epoch 4, Val loss: 2.0392770018806914, Acc: 0.5846153846153846, F1-Macro: 0.4899738448125545
Early stopping counter 2/10


 17% 5/30 [04:58<25:44, 61.76s/it]

Epoch 5, Train loss: 0.6676838820179304, Acc: 0.7882960413080895, F1-Macro: 0.7881730469965764
Epoch 5, Val loss: 0.7074286046354246, Acc: 0.6615384615384615, F1-Macro: 0.6299171842650104
Validation loss decreased 0.7973748501390219 -> 0.7074286046354246


 20% 6/30 [05:55<23:55, 59.83s/it]

Epoch 6, Train loss: 0.44325655574599904, Acc: 0.846815834767642, F1-Macro: 0.8462254471919468
Epoch 6, Val loss: 5.642210960388184, Acc: 0.5538461538461539, F1-Macro: 0.46886446886446886
Early stopping counter 1/10


 23% 7/30 [07:02<23:53, 62.31s/it]

Epoch 7, Train loss: 0.3923841913541158, Acc: 0.8657487091222031, F1-Macro: 0.8652020274129069
Epoch 7, Val loss: 1.8119505346353435, Acc: 0.5384615384615384, F1-Macro: 0.4237588652482269
Early stopping counter 2/10


 27% 8/30 [08:11<23:40, 64.59s/it]

Epoch 8, Train loss: 0.32051245992382366, Acc: 0.8795180722891566, F1-Macro: 0.8785825173155004
Epoch 8, Val loss: 0.8490597903728485, Acc: 0.7538461538461538, F1-Macro: 0.7435897435897436
Early stopping counter 3/10


 30% 9/30 [09:30<24:06, 68.87s/it]

Epoch 9, Train loss: 0.1953128526608149, Acc: 0.9345955249569707, F1-Macro: 0.9343291928421855
Epoch 9, Val loss: 2.43381068110466, Acc: 0.5846153846153846, F1-Macro: 0.5054945054945055
Early stopping counter 4/10


 33% 10/30 [10:49<24:04, 72.21s/it]

Epoch 10, Train loss: 0.2701575429075294, Acc: 0.9001721170395869, F1-Macro: 0.8995145399460864
Epoch 10, Val loss: 1.591294139623642, Acc: 0.7076923076923077, F1-Macro: 0.6934723256391164
Early stopping counter 5/10


 37% 11/30 [12:13<23:57, 75.65s/it]

Epoch 11, Train loss: 0.12352391684220897, Acc: 0.9621342512908778, F1-Macro: 0.9619447421698226
Epoch 11, Val loss: 0.5226595550775528, Acc: 0.8461538461538461, F1-Macro: 0.8452380952380953
Validation loss decreased 0.7074286046354246 -> 0.5226595550775528


 40% 12/30 [13:53<24:58, 83.24s/it]

Epoch 12, Train loss: 0.08998188867958055, Acc: 0.963855421686747, F1-Macro: 0.963716116753156
Epoch 12, Val loss: 1.3388068675994873, Acc: 0.7076923076923077, F1-Macro: 0.6888384983623079
Early stopping counter 1/10


 43% 13/30 [15:24<24:12, 85.44s/it]

Epoch 13, Train loss: 0.056067893834551796, Acc: 0.9793459552495697, F1-Macro: 0.9792425866380849
Epoch 13, Val loss: 0.46538393399168854, Acc: 0.8307692307692308, F1-Macro: 0.8306088604596067
Validation loss decreased 0.5226595550775528 -> 0.46538393399168854


 47% 14/30 [16:47<22:37, 84.86s/it]

Epoch 14, Train loss: 0.04240312450565398, Acc: 0.9931153184165232, F1-Macro: 0.9930841566480181
Epoch 14, Val loss: 1.7152206599712372, Acc: 0.6923076923076923, F1-Macro: 0.6635610766045549
Early stopping counter 1/10


 50% 15/30 [18:15<21:25, 85.70s/it]

Epoch 15, Train loss: 0.10697030695155263, Acc: 0.9724612736660929, F1-Macro: 0.9723095987036506
Epoch 15, Val loss: 0.4246487021463281, Acc: 0.8, F1-Macro: 0.7969238163902908
Validation loss decreased 0.46538393399168854 -> 0.4246487021463281


 53% 16/30 [19:54<20:54, 89.62s/it]

Epoch 16, Train loss: 0.11456916719261143, Acc: 0.9655765920826161, F1-Macro: 0.965478312537136
Epoch 16, Val loss: 1.1177862582262605, Acc: 0.676923076923077, F1-Macro: 0.6500897205844655
Early stopping counter 1/10


 57% 17/30 [21:28<19:43, 91.07s/it]

Epoch 17, Train loss: 0.03521836450414008, Acc: 0.9879518072289156, F1-Macro: 0.9879000467099255
Epoch 17, Val loss: 0.6128379106521606, Acc: 0.7846153846153846, F1-Macro: 0.7845643939393939
Early stopping counter 2/10


 60% 18/30 [23:00<18:16, 91.35s/it]

Epoch 18, Train loss: 0.06744312738818634, Acc: 0.9879518072289156, F1-Macro: 0.987905372251052
Epoch 18, Val loss: 1.527328222990036, Acc: 0.6923076923076923, F1-Macro: 0.6697154471544715
Early stopping counter 3/10


 63% 19/30 [24:31<16:42, 91.17s/it]

Epoch 19, Train loss: 0.08598091682910712, Acc: 0.9707401032702238, F1-Macro: 0.9705863959475517
Epoch 19, Val loss: 3.908687859773636, Acc: 0.7846153846153846, F1-Macro: 0.7782651072124755
Early stopping counter 4/10


 67% 20/30 [26:09<15:31, 93.19s/it]

Epoch 20, Train loss: 0.03626938823274233, Acc: 0.9896729776247849, F1-Macro: 0.9896106528062563
Epoch 20, Val loss: 0.5594922248274088, Acc: 0.8461538461538461, F1-Macro: 0.8461174242424243
Early stopping counter 5/10


 70% 21/30 [27:46<14:10, 94.49s/it]

Epoch 21, Train loss: 0.015375432461344948, Acc: 0.9948364888123924, F1-Macro: 0.9948165881075937
Epoch 21, Val loss: 0.5838332185121544, Acc: 0.8, F1-Macro: 0.7969238163902908
Early stopping counter 6/10


 73% 22/30 [29:25<12:44, 95.59s/it]

Epoch 22, Train loss: 0.01735159147453184, Acc: 0.9965576592082617, F1-Macro: 0.9965420783240091
Epoch 22, Val loss: 0.7394386231899261, Acc: 0.8, F1-Macro: 0.7951515151515152
Early stopping counter 7/10


 77% 23/30 [30:50<10:46, 92.38s/it]

Epoch 23, Train loss: 0.015162387752853747, Acc: 0.9948364888123924, F1-Macro: 0.9948093639907445
Epoch 23, Val loss: 0.4958965033529239, Acc: 0.8, F1-Macro: 0.7951515151515152
Early stopping counter 8/10


 80% 24/30 [32:10<08:53, 88.96s/it]

Epoch 24, Train loss: 0.00441233294825199, Acc: 1.0, F1-Macro: 1.0
Epoch 24, Val loss: 2.2513789236545563, Acc: 0.8307692307692308, F1-Macro: 0.8301259206462342
Early stopping counter 9/10


 83% 25/30 [33:40<07:25, 89.06s/it]

Epoch 25, Train loss: 0.008823744684276689, Acc: 0.9965576592082617, F1-Macro: 0.9965404311063475


 83% 25/30 [34:55<06:59, 83.83s/it]

Epoch 25, Val loss: 0.43532961608480036, Acc: 0.8461538461538461, F1-Macro: 0.8458254269449715
Early stopping counter 10/10
Early stopped





0,1
train_loss,▆▅█▇▆▅▄▄▃▂▃▂▂▁▁▂▂▁▁▂▁▁▁▁▁▁
train_score,▁▃▃▄▄▅▆▆▆▇▇▇▇███▇█████████
val_loss,▂▃▂▇▃▁█▃▂▄▃▁▂▁▃▁▂▁▂▆▁▁▁▁▃▁
val_score,▂▁▃▃▃▄▂▂▆▃▅█▅█▅▇▅▇▅▇█▇▇▇██

0,1
train_loss,0.00882
train_score,0.99656
val_loss,0.43533
val_score,0.84615


## Inference
### 모델 로드

In [16]:
TRAINED_MODEL_PATH = 'vgg13.pt'

### Load dataset

In [17]:
class TestDataset(Dataset):
    def __init__(self, data_dir, input_shape):
        self.data_dir = data_dir
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.ToTensor(),
                                             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    def data_loader(self):
        print('Loading test dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        db = pd.read_csv(os.path.join(self.data_dir, 'sample_submission.csv'))
        return db
    
    def __len__(self):
        return len(self.db)
    
    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])
        
        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'test',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['file_name']

In [18]:
# Load dataset & dataloader
test_dataset = TestDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

Loading test dataset..


### 추론 진행

In [19]:
model.load_state_dict(torch.load(TRAINED_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
prob_lst = []
model.eval()
example_images = []
with torch.no_grad():
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = model(img)
        print(pred)
        file_lst.extend(list(file_num))
        pred_lst.extend(pred.argmax(dim=1).tolist())
        prob_lst.extend(pred[:, 1].tolist())

1it [00:03,  3.54s/it]

tensor([[9.9988e-01, 1.1562e-04],
        [9.9756e-01, 2.4420e-03],
        [9.9912e-01, 8.7814e-04],
        [6.2213e-03, 9.9378e-01],
        [9.9984e-01, 1.5780e-04],
        [6.8283e-03, 9.9317e-01],
        [2.4449e-01, 7.5551e-01],
        [3.8952e-01, 6.1048e-01],
        [9.9927e-01, 7.3059e-04],
        [1.4347e-13, 1.0000e+00],
        [9.9885e-01, 1.1488e-03],
        [9.8966e-01, 1.0339e-02],
        [9.9928e-01, 7.1750e-04],
        [8.9907e-01, 1.0093e-01],
        [9.8340e-01, 1.6599e-02],
        [9.9885e-01, 1.1507e-03],
        [9.7807e-01, 2.1935e-02],
        [9.9999e-01, 9.1900e-06],
        [9.0277e-01, 9.7225e-02],
        [9.9988e-01, 1.2195e-04],
        [9.9998e-01, 2.3370e-05],
        [9.9897e-01, 1.0349e-03],
        [8.0640e-01, 1.9360e-01],
        [1.0000e+00, 4.1186e-06],
        [4.8113e-02, 9.5189e-01],
        [9.9999e-01, 1.1524e-05],
        [9.9914e-01, 8.6404e-04],
        [4.1235e-03, 9.9588e-01],
        [2.0139e-08, 1.0000e+00],
        [1.505

2it [00:06,  3.28s/it]

tensor([[3.6024e-05, 9.9996e-01],
        [9.9992e-01, 7.5755e-05],
        [9.9816e-01, 1.8358e-03],
        [2.5684e-01, 7.4316e-01],
        [2.0106e-02, 9.7989e-01],
        [2.5387e-01, 7.4613e-01],
        [3.1835e-03, 9.9682e-01],
        [9.8989e-01, 1.0112e-02],
        [6.4650e-05, 9.9994e-01],
        [9.2229e-01, 7.7713e-02],
        [8.5730e-01, 1.4270e-01],
        [3.1387e-05, 9.9997e-01],
        [1.1542e-01, 8.8458e-01],
        [8.1859e-02, 9.1814e-01],
        [7.2450e-01, 2.7550e-01],
        [9.9979e-01, 2.0766e-04],
        [5.6997e-01, 4.3003e-01],
        [9.9944e-01, 5.5741e-04],
        [4.1341e-02, 9.5866e-01],
        [5.4351e-05, 9.9995e-01],
        [7.2159e-01, 2.7841e-01],
        [9.9759e-01, 2.4130e-03],
        [1.4235e-01, 8.5765e-01],
        [1.8895e-02, 9.8110e-01],
        [2.3820e-02, 9.7618e-01],
        [6.0516e-03, 9.9395e-01],
        [9.9995e-01, 5.4446e-05],
        [8.1704e-01, 1.8296e-01],
        [1.8844e-07, 1.0000e+00],
        [9.999

3it [00:09,  3.15s/it]

tensor([[8.5546e-07, 1.0000e+00],
        [5.2433e-11, 1.0000e+00],
        [7.6726e-01, 2.3274e-01],
        [7.7579e-01, 2.2421e-01],
        [2.0585e-02, 9.7941e-01],
        [9.9697e-01, 3.0346e-03],
        [9.9953e-01, 4.6534e-04],
        [9.9982e-01, 1.8050e-04],
        [9.9846e-01, 1.5389e-03],
        [9.9983e-01, 1.7193e-04],
        [2.2068e-05, 9.9998e-01],
        [9.9987e-01, 1.3218e-04],
        [1.0000e+00, 4.5324e-06],
        [1.0000e+00, 2.3926e-08],
        [7.3870e-03, 9.9261e-01],
        [8.2504e-05, 9.9992e-01],
        [4.4222e-03, 9.9558e-01],
        [4.1468e-01, 5.8532e-01],
        [9.8663e-01, 1.3367e-02],
        [5.8518e-05, 9.9994e-01],
        [9.6180e-02, 9.0382e-01],
        [8.7470e-02, 9.1253e-01],
        [9.1133e-01, 8.8671e-02],
        [1.1923e-01, 8.8077e-01],
        [9.9764e-01, 2.3587e-03],
        [3.5938e-01, 6.4062e-01],
        [9.8524e-01, 1.4760e-02],
        [9.9814e-01, 1.8580e-03],
        [2.4515e-06, 1.0000e+00],
        [3.338

4it [00:09,  2.46s/it]

tensor([[1.0520e-04, 9.9989e-01],
        [3.5928e-06, 1.0000e+00],
        [9.4375e-01, 5.6247e-02],
        [4.6576e-04, 9.9953e-01]], device='cuda:0')





### 결과 저장

In [20]:
df = pd.DataFrame({'file_name':file_lst, 'COVID':pred_lst})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('0215_vgg13.csv', index=False)

In [21]:
df_pred11 = pd.read_csv('0215_vgg13.csv')
df_pred11.head()

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,1
4,4.png,0


In [36]:
df_pred11['COVID'].value_counts()

0    65
1    35
Name: COVID, dtype: int64

In [22]:
df_pred16 = pd.read_csv('0212_vgg16.csv')
df_pred16.head()

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,1
4,4.png,0


In [38]:
df_pred16['COVID'].value_counts()

0    70
1    30
Name: COVID, dtype: int64

In [23]:
df_pred13 = pd.read_csv('0212_vgg13.csv')
df_pred13.head()

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,1
4,4.png,0


In [40]:
df_pred13['COVID'].value_counts()

0    60
1    40
Name: COVID, dtype: int64

In [24]:
df_pred = pd.read_csv('prediction.csv')
df_pred.head()

Unnamed: 0,file_name,COVID
0,0.png,1
1,1.png,0
2,2.png,0
3,3.png,1
4,4.png,0


In [43]:
df_pred['COVID'].value_counts()

0    56
1    44
Name: COVID, dtype: int64

In [25]:
df_vote = pd.read_csv('prediction.csv')
df_vote['COVID'] = 0
df_vote.head()

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,0
4,4.png,0


In [45]:
df_vote['COVID'].value_counts()

0    100
Name: COVID, dtype: int64

In [26]:
df_vote['COVID'] += df_pred11['COVID']
df_vote['COVID'] += df_pred13['COVID']
df_vote['COVID'] += df_pred16['COVID']
df_vote['COVID'] += df_pred['COVID']
df_vote['COVID'].value_counts()

0    41
4    20
3    18
1    17
2     4
Name: COVID, dtype: int64

In [27]:
df_vote.head()

Unnamed: 0,file_name,COVID
0,0.png,1
1,1.png,0
2,2.png,0
3,3.png,4
4,4.png,0


In [28]:
df_vote['COVID'][(df_vote['COVID'] < 3 )] = 0
df_vote['COVID'][(df_vote['COVID'] > 2 )] = 1

df_vote['COVID'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vote['COVID'][(df_vote['COVID'] < 3 )] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_vote['COVID'][(df_vote['COVID'] > 2 )] = 1


0    62
1    38
Name: COVID, dtype: int64

In [29]:
df_vote.head()

Unnamed: 0,file_name,COVID
0,0.png,0
1,1.png,0
2,2.png,0
3,3.png,1
4,4.png,0


In [30]:
df_vote.to_csv('0215_vote2(vgg*3 + base).csv', index=False)