# Dropout

DNN에서의 Regularization 방법 중 하나인 Dropout을 사용해보자!

***Dropout을 사용할 때에는 학습에만 적용되도록 `model.train()`, `model.eval()`을 사용해주어야 함***

In [1]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
batch_size = 100
learning_rate = 0.001
training_epochs = 15

In [3]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms

mnist_path = "../datasets/MNIST_data/"

mnist_train = dsets.MNIST(root=mnist_path, train=True, transform=transforms.ToTensor(),
                          download=True)
mnist_test = dsets.MNIST(root=mnist_path, train=False, transform=transforms.ToTensor(),
                         download=True)

In [4]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [5]:
from torch import nn

class MNISTClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1 = nn.Linear(784, 512, bias=True)
        self.linear2 = nn.Linear(512, 512, bias=True)
        self.linear3 = nn.Linear(512, 512, bias=True)
        self.linear4 = nn.Linear(512, 512, bias=True)
        self.linear5 = nn.Linear(512, 10, bias=True)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.3)
        
        nn.init.kaiming_normal_(self.linear1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.linear2.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.linear3.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.linear4.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.linear5.weight, nonlinearity='relu')
        
    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.linear2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.linear3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.linear4(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.linear5(x)
        
        return x
    
    
model = MNISTClassifier().to(device)

In [6]:
from torch import optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), learning_rate)

In [7]:
num_batch = len(data_loader)
# training에는 dropout 켜기
model.train()
for epoch in range(training_epochs):
    avg_loss = 0
    
    for X, Y in data_loader:
        X = X.view(-1, 784).to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        loss = criterion(hypothesis, Y)
        loss.backward()
        optimizer.step()
        
        avg_loss += loss / num_batch
        
    print("Epoch: {:04d}, Loss: {:.9f}".format(epoch+1, avg_loss))

print("Learning Finished")        

Epoch: 0001, Loss: 0.342470258
Epoch: 0002, Loss: 0.155405015
Epoch: 0003, Loss: 0.120883971
Epoch: 0004, Loss: 0.100696743
Epoch: 0005, Loss: 0.089069344
Epoch: 0006, Loss: 0.079865687
Epoch: 0007, Loss: 0.069851093
Epoch: 0008, Loss: 0.067985781
Epoch: 0009, Loss: 0.063389972
Epoch: 0010, Loss: 0.056394421
Epoch: 0011, Loss: 0.055560544
Epoch: 0012, Loss: 0.052434921
Epoch: 0013, Loss: 0.052020602
Epoch: 0014, Loss: 0.046363417
Epoch: 0015, Loss: 0.046445902
Learning Finished


In [8]:
import random

with torch.no_grad():
    # test에는 dropout 끄기
    model.eval()
    
    X_test = mnist_test.data.view(-1, 784).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    predictions = model(X_test)
    correct_predictions = torch.argmax(predictions, dim=1) == Y_test
    accuracy = correct_predictions.float().mean()
    print("Accuracy: ", accuracy.item())
    
    # test data의 sample 한개 예측
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.data[r:r + 1].view(-1, 28 * 28).float().to(device)
    Y_single_data = mnist_test.targets[r:r + 1].to(device)

    print('Label: ', Y_single_data.item())
    single_prediction = model(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy:  0.9779999852180481
Label:  9
Prediction:  9
