In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets

# Argumentation과 Dropout
학습능력 향상을 위해 데이터 증진과 노드 드롭 아웃을 공부해보자!!
---
데이터는 많으면 많을수록 학습량이 많아져서 좋다...! Argumantation으로 기존 데이터를 변형해 데이터를 늘려보자!!      
노드 드롭아웃은 각각의 노드들을 골고루 학습시켜 모든 노드의 파라미터들을 적절하게 맞춰준다.   
책에 나온 말을 인용하자면 마치 대답을 잘하는 학생을 제외한 다른 학생에게 대답을 요구하는 상황...!!

In [2]:
#GPU, CPU를 맞춰주기 위한 코드
USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device("cuda" if USE_CUDA else "cpu")

In [3]:
#하이퍼 파라미터 설정
EPOCHS = 50
BATCH_SIZE = 64

In [4]:
#데이터 불러오기
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./.data',
                    train = True,
                    download = True,
                    transform = transforms.Compose([
                        transforms.RandomHorizontalFlip(),#데이터 증진을 위해 랜덤하게 좌우 반전을 시킨다.
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307),(0.3081)) #정규화 시켜주기
                    ])),
    batch_size = BATCH_SIZE, shuffle = True)

#테스트 데이터셋은 정규화만 시켜준다
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./.data',
                    train = False,
                    download = True,
                    transform = transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307),(0.3081))
                    ])),
    batch_size = BATCH_SIZE, shuffle = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./.data\MNIST\raw\train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./.data\MNIST\raw\train-images-idx3-ubyte.gz to ./.data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./.data\MNIST\raw\train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./.data\MNIST\raw\train-labels-idx1-ubyte.gz to ./.data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./.data\MNIST\raw\t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./.data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./.data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./.data\MNIST\raw\t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./.data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./.data\MNIST\raw



In [5]:
#모델 선언하기
class Net(nn.Module):
    def __init__(self, dropout_p): #드롭아웃을 받아올 것이다.
        super(Net, self).__init__() #nn.Module 속성으로 초기화
        self.fc1 = nn.Linear(784,256) # 입력값 맞춰서 레이어 만들고
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,10) #출력값 맞춰서 레이어 만든다.
        self.dropout_p = dropout_p #드롭아웃도 설정해준다.
    def forward(self,x):
        x = x.view(-1,784) # 이미지를 랭크1로 펴준다.
        x = F.relu(self.fc1(x)) # 엑티베이션은 relu를 사용한다.
        
        x = F.dropout(x, training=self.training, #신경망 출력 x, 학습인지를 알려주는 self.training
                      p = self.dropout_p) #드롭아웃 확률을 설정한다
        x = F.relu(self.fc2(x)) 
        x = F.dropout(x,training = self.training,
                      p = self.dropout_p)
        x = self.fc3(x)
        return x

In [6]:
model = Net(dropout_p=0.2).to(DEVICE) #모델을 생성한다.-드롭아웃을 같이 설정한다.
optimizer = optim.SGD(model.parameters(),lr=0.01) #역전파를 위한 옵티마이저와 학습률도 설정한다.

In [7]:
def train(model, train_loader, optimizer): #모델 학습을 위한 데피니션
    model.train() 
    for batch_idx, (data, target) in enumerate(train_loader):
        data,target = data.to(DEVICE), target.to(DEVICE)
        optimizer.zero_grad() #반복마다 새로운 기울기를 설정
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

In [8]:
def evaluate(model, test_loader): # 모델 성능 확인을 위한 데피니션
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad(): #테스트는 기울기가 필요없음
        for data,target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            test_loss += F.cross_entropy(output,target, reduction = 'sum').item()
            
            pred = output.max(1,keepdim=True)[1]
            correct +=pred.eq(target.view_as(pred)).sum().item()
    
    #오차와 정확도를 위한 equation
    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct/len(test_loader.dataset)
    return test_loss, test_accuracy

In [9]:
#모델 성능 테스트...!!
for epoch in range(1,EPOCHS+1):
    train(model,train_loader,optimizer)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(epoch,test_loss, test_accuracy))

[1] Test Loss: 0.5464, Accuracy: 82.21%
[2] Test Loss: 0.4268, Accuracy: 86.41%
[3] Test Loss: 0.3513, Accuracy: 89.19%
[4] Test Loss: 0.2925, Accuracy: 91.32%
[5] Test Loss: 0.2532, Accuracy: 92.27%
[6] Test Loss: 0.2242, Accuracy: 93.12%
[7] Test Loss: 0.2010, Accuracy: 93.83%
[8] Test Loss: 0.1892, Accuracy: 94.30%
[9] Test Loss: 0.1730, Accuracy: 94.76%
[10] Test Loss: 0.1628, Accuracy: 94.87%
[11] Test Loss: 0.1569, Accuracy: 95.08%
[12] Test Loss: 0.1500, Accuracy: 95.29%
[13] Test Loss: 0.1429, Accuracy: 95.54%
[14] Test Loss: 0.1362, Accuracy: 95.72%
[15] Test Loss: 0.1321, Accuracy: 95.78%
[16] Test Loss: 0.1289, Accuracy: 95.96%
[17] Test Loss: 0.1252, Accuracy: 96.15%
[18] Test Loss: 0.1236, Accuracy: 96.04%
[19] Test Loss: 0.1217, Accuracy: 96.06%
[20] Test Loss: 0.1162, Accuracy: 96.22%
[21] Test Loss: 0.1133, Accuracy: 96.34%
[22] Test Loss: 0.1109, Accuracy: 96.36%
[23] Test Loss: 0.1110, Accuracy: 96.39%
[24] Test Loss: 0.1066, Accuracy: 96.55%
[25] Test Loss: 0.1054, A