In [13]:
# 8.3.3 조기종료

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import transforms, datasets

import matplotlib
import matplotlib.pyplot as plt
import time
import argparse
from tqdm import tqdm
matplotlib.style.use('ggplot')

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [16]:
#데이터를 불러오는 과정에서 원하는 preprocessing 과정

train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
val_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

In [17]:
# 원하는 train / test data 를 불러옴
train_dataset = datasets.ImageFolder(
    root=r'../chap08/data/archive/train',
    transform=train_transform
)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset, batch_size=32, shuffle=True,
)
val_dataset = datasets.ImageFolder(
    root=r'../chap08/data/archive/test',
    transform=val_transform
)
val_dataloader = torch.utils.data.DataLoader(
    val_dataset, batch_size=32, shuffle=False,
)

In [18]:
def resnet50(pretrained=True):
    # transfer learning을 위한 resnet을 가져옴
    model = models.resnet50(progress=True, pretrained=pretrained)
    if requires_grad == False:
        for param in model.parameters():
            param.requires_grad = False
    elif requires_grad == True:
        for param in model.parameters():
            param.requires_grad = True
    # 마지막 원하는 포멧과 함께 2 class 문제 해결
    model.fc = nn.Linear(2048, 2)
    return model

In [19]:
class LRScheduler():
    def __init__(
        self, optimizer, patience=5, min_lr=1e-6, factor=0.5
    ):
        # 해당하는 optimizer , patinece , learning rate , factor 등을 기입
        self.optimizer = optimizer
        self.patience = patience
        self.min_lr = min_lr
        self.factor = factor
        # 스케쥴러를 통해 작성
        self.lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 
                self.optimizer,
                mode='min',
                patience=self.patience,
                factor=self.factor,
                min_lr=self.min_lr,
                verbose=True
            )
    def __call__(self, val_loss):
        self.lr_scheduler.step(val_loss)

In [20]:
class EarlyStopping():
    # 만약에 학습이 더디거나 충분히 학습되면, 그전에 early stopping
    def __init__(self, patience=5, verbose=False, delta=0, path='../chap08/data/checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        self.path = path
        
    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                # 만약에 patience 횟수만큼 진정이 없으면 stop을 위한용도
                self.early_stop = True
                
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [21]:
parser = argparse.ArgumentParser()
#해당하는 행동을 이제 schedular에 argument를 넣어서 해결
parser.add_argument('--lr-scheduler', dest='lr_scheduler', action='store_true')
parser.add_argument('--early-stopping', dest='early_stopping', action='store_true')
parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1") #주피터 노트북에서 실행할때 필요합니다
args = vars(parser.parse_args())

In [26]:
#!pip install ipywidgets 혹은
#anaconda prompt에서 conda install -c conda-forge ipywidgets 실행

# resnet을 가져오고 그에 해당하는 요소들을 print
print(f"Computation device: {device}\n")
model = models.resnet50(pretrained=True).to(device)
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

Computation device: cpu

25,557,032 total parameters.
25,557,032 training parameters.


In [27]:
lr = 0.001
epochs = 100

# learing rate , epoch 설정 후, 
# loss 와 optimization 함수를 설정.
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

In [28]:
loss_plot_name = 'loss'
acc_plot_name = 'accuracy'
model_name = 'model'

# 이름을 기입하여, 스케쥴러에 넣어줄 예정

In [30]:
#argument가 무엇인지에 따라, name을 달리 설정해줌.
if args['lr_scheduler']:
    print('INFO: Initializing learning rate scheduler')
    lr_scheduler = LRScheduler(optimizer)
    loss_plot_name = 'lrs_loss'
    acc_plot_name = 'lrs_accuracy'
    model_name = 'lrs_model'
if args['early_stopping']:
    print('INFO: Initializing early stopping')
    early_stopping = EarlyStopping()
    loss_plot_name = 'es_loss'
    acc_plot_name = 'es_accuracy'
    model_name = 'es_model'

In [31]:

def training(model, train_dataloader, train_dataset, optimizer, criterion):
    print('Training')
    # 학습을 위한 용도 model.train
    # 그에 해당하는 loss & correct 리스트화 / counter 및 total
    model.train()
    train_running_loss = 0.0
    train_running_correct = 0
    counter = 0
    total = 0
    prog_bar = tqdm(enumerate(train_dataloader), total=int(len(train_dataset)/train_dataloader.batch_size))
    for i, data in prog_bar:
        counter += 1
        # 분석하는 환경에 맞게 데이터 전환
        data, target = data[0].to(device), data[1].to(device)
        total += target.size(0)
        
        # optimizer 초기화
        optimizer.zero_grad()
        
        #학습
        outputs = model(data)
        
        #loss 구하기 및 backwarding
        loss = criterion(outputs, target)
        train_running_loss += loss.item()
        _, preds = torch.max(outputs.data, 1)
        train_running_correct += (preds == target).sum().item()
        loss.backward()
        optimizer.step()
        
    train_loss = train_running_loss / counter
    train_accuracy = 100. * train_running_correct / total
    return train_loss, train_accuracy

In [32]:
def validate(model, test_dataloader, val_dataset, criterion):
    print('Validating')
    # 검증을 위한 용도 model.eval
    # 그에 해당하는 loss & correct 리스트화 / counter 및 total
    model.eval()
    val_running_loss = 0.0
    val_running_correct = 0
    counter = 0
    total = 0
    prog_bar = tqdm(enumerate(test_dataloader), total=int(len(val_dataset)/test_dataloader.batch_size))
    with torch.no_grad():
        for i, data in prog_bar:
            counter += 1
            # 분석하는 환경에 맞게 데이터 전환
            data, target = data[0].to(device), data[1].to(device)
            total += target.size(0)
            
            #학습 및 loss 구하기
            outputs = model(data)
            loss = criterion(outputs, target)
            
            val_running_loss += loss.item()
            _, preds = torch.max(outputs.data, 1)
            val_running_correct += (preds == target).sum().item()
        
        val_loss = val_running_loss / counter
        val_accuracy = 100. * val_running_correct / total
        return val_loss, val_accuracy

In [33]:
# 전체 loss & accuracy를 보여줌.
train_loss, train_accuracy = [], []
val_loss, val_accuracy = [], []
start = time.time()

for epoch in range(epochs):
    print(f"Epoch {epoch+1} of {epochs}")
    
    # 각 epoch 마다, train -> validation을 반복
    train_epoch_loss, train_epoch_accuracy = training(
        model, train_dataloader, train_dataset, optimizer, criterion
    )
    val_epoch_loss, val_epoch_accuracy = validate(
        model, val_dataloader, val_dataset, criterion
    )
    
    # 그에 해당하는 loss / accuracy도 추가
    train_loss.append(train_epoch_loss)
    train_accuracy.append(train_epoch_accuracy)
    val_loss.append(val_epoch_loss)
    val_accuracy.append(val_epoch_accuracy)
    
    # argument가 어떤 것인지에 따라서, 다르게 판단해줌.
    if args['lr_scheduler']:
        lr_scheduler(val_epoch_loss)
    if args['early_stopping']:
        early_stopping(val_epoch_loss, model)
        if early_stopping.early_stop:
            break
    print(f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_accuracy:.2f}")
    print(f'Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_accuracy:.2f}')
end = time.time()
print(f"Training time: {(end-start)/60:.3f} minutes")

Epoch 1 of 100
Training


16it [07:32, 28.26s/it]                                                         


Validating


16it [02:00,  7.51s/it]                                                         


Train Loss: 2.2386, Train Acc: 60.64
Val Loss: 106.3847, Val Acc: 0.20
Epoch 2 of 100
Training


16it [08:14, 30.93s/it]                                                         


Validating


16it [02:09,  8.08s/it]                                                         


Train Loss: 0.6305, Train Acc: 66.47
Val Loss: 1.6918, Val Acc: 54.00
Epoch 3 of 100
Training


16it [08:11, 30.74s/it]                                                         


Validating


16it [02:18,  8.68s/it]                                                         


Train Loss: 0.5101, Train Acc: 76.31
Val Loss: 0.6113, Val Acc: 71.60
Epoch 4 of 100
Training


16it [07:58, 29.91s/it]                                                         


Validating


16it [02:05,  7.85s/it]                                                         


Train Loss: 0.4951, Train Acc: 76.10
Val Loss: 0.6003, Val Acc: 68.80
Epoch 5 of 100
Training


16it [07:06, 26.66s/it]                                                         


Validating


  0%|                                                    | 0/15 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [None]:
#위에 분석을 통해 얻은 list화된 loss & accuracy 등을 plotting 함.

plt.figure(figsize=(10, 7))
plt.plot(train_accuracy, color='green', label='train accuracy')
plt.plot(val_accuracy, color='blue', label='validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(f"../chap08/img/{acc_plot_name}.png")
plt.show()

plt.figure(figsize=(10, 7))
plt.plot(train_loss, color='orange', label='train loss')
plt.plot(val_loss, color='red', label='validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig(f"../chap08/img/{loss_plot_name}.png")
plt.show()
    
print('Saving model...')
torch.save(model.state_dict(), f"../chap08/img/{model_name}.pth")
print('TRAINING COMPLETE')