In [2]:
# dropout manully
import torch
from torch import nn
from d2l import torch as d2l

# basic paramter
batch_size = 256
inputs = 784
hiddens1 = 256
hiddens2 = 256
outputs = 10

# load dataset
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [1]:
# model
dropout1 = 0.2
dropout2 = 0.5

def dropout(X, p):
    assert 0 <= p <= 1
    if p == 1:
        return torch.zeros_like(X)
    if p == 0:
        return X
    mask = (torch.rand(X.shape) > p).float()
    return mask * X / (1.0 - p)

class Net(nn.Module):
    def __init__(self, n_inputs, n_hiddens1, n_hiddens2, n_outputs, is_training=True) -> None:
        super(Net, self).__init__()
        self.n_inputs = n_inputs
        self.layer1 = nn.Linear(n_inputs, n_hiddens1)
        self.layer2 = nn.Linear(n_hiddens1, n_hiddens2)
        self.layer3 = nn.Linear(n_hiddens2, n_outputs)
        self.is_training = is_training
        self.activation = nn.ReLU()
    
    # 1. override .forward() function in nn.Module(base class)
    # 2. when we use net(X), we are actually calling initial function __call()__
    #    __call__ function will call .forward() inside so we dont need to explicitly
    #    call net.forward()
    def forward(self, X):
        # input layer
        input = X.reshape((-1, self.n_inputs))
        # hidden layer 1
        H1 = self.activation(self.layer1(input))
        if self.is_training == True:
            H1 = dropout(H1, dropout1)
        # hidden layer 2
        H2 = self.activation(self.layer2(H1))
        if self.is_training == True:
            H2 = dropout(H2, dropout2)
        # output layer
        output = self.layer3(H2)
        return output
    
    def train_test_convert(self, is_training):
        self.is_training = is_training

net = Net(inputs, hiddens1, hiddens2, outputs)

# loss
loss = nn.CrossEntropyLoss()

# optimizer
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

# accuracy
def accuracy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    compare = y_hat.type(y.dtype) == y
    return float(compare.type(y.dtype).sum() / len(y))

# train
n_epochs = 10
def train(net, minibatch_data, epochs, loss, optimizer, is_training=True):
    accuracy_epoch = []
    loss_epoch = []
    for epoch in range(epochs):
        accuracy_batch = []
        loss_batch = []
        for X, y in minibatch_data:
            # train
            y_hat = net(X)
            # accuracy
            a = accuracy(y_hat, y)
            accuracy_batch.append(a)
            if is_training == True:
                # loss
                l = loss(y_hat, y)
                loss_batch.append(l)
                # optimization
                optimizer.zero_grad()
                l.backward()
                optimizer.step()
        a_avg = sum(accuracy_batch) / len(accuracy_batch)
        accuracy_epoch.append(a_avg)
        if is_training == True:
            l_avg = sum(loss_batch) / len(loss_batch)
            loss_epoch.append(l_avg)
    if is_training == True:
        return (loss_epoch, accuracy_epoch)
    else:
        return accuracy_epoch
    

dropout_loss, dropout_accuracy = train(net, train_iter, n_epochs, loss, optimizer)
print('loss: %.3f' % (sum(dropout_loss) / n_epochs))
print('accuracy: %.3f' % (sum(dropout_accuracy) / n_epochs * 100), '%')

net.train_test_convert(False)
test_dropout_accuracy = train(net, test_iter, n_epochs, loss, optimizer, is_training=False)
print('test accuracy: %.3f' % (sum(test_dropout_accuracy) / n_epochs * 100), '%')


NameError: name 'nn' is not defined

In [7]:
# dropout with pytorch
# we add dropout layer after activation
net = nn.Sequential(nn.Flatten(), 
                    nn.Linear(inputs, hiddens1), 
                    nn.ReLU(), 
                    nn.Dropout(dropout1), 
                    nn.Linear(hiddens1, hiddens2), 
                    nn.ReLU(), 
                    nn.Dropout(dropout2), 
                    nn.Linear(hiddens2, outputs))