In [1]:
import torch
from torch import optim
from torch.autograd import Variable
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import numpy as np

In [2]:
train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

In [3]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
np.random.seed(0)

In [4]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [5]:
class MultiLayerNN(torch.nn.Module):
    def __init__(self, input_dim, hidden_units_dim, output_dim):
        super(MultiLayerNN, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_units_dim),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_units_dim),
            torch.nn.Linear(hidden_units_dim, hidden_units_dim),
            torch.nn.ReLU(),
            torch.nn.BatchNorm1d(hidden_units_dim),
            torch.nn.Linear(hidden_units_dim, output_dim),
            torch.nn.ReLU()
        )
        for m in self.model:
            if isinstance(m, torch.nn.Linear):
                torch.nn.init.kaiming_normal_(m.weight)
                torch.nn.init.constant_(m.bias, 0)
        
    def forward(self, x):
        return self.model(x)

In [6]:
batch_size = 100
epochs = 200
input_dim = 784
hidden_dim = 1000
output_dim = 10
lr_rate = 0.05
len_train_dataset = len(train_loader.dataset)

In [7]:
model = MultiLayerNN(input_dim, hidden_dim, output_dim).cuda()
criterion = torch.nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(model.parameters())

In [8]:
def get_accuracy(model):
    model.eval()
    
    correct = 0.
    total = 0.

    for images, labels in test_loader:
        images = Variable(images.view(-1, 28*28)).cuda()
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct+= (predicted.cpu() == labels).sum()

    accuracy = 100 * correct.float() / total
    
    model.train()
    
    return accuracy

In [9]:
results = []
for epoch in range(epochs):
    avg_loss = 0.
    
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1, 28 * 28)).cuda()
        labels = Variable(labels).cuda()
        
        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        avg_loss += loss / (len_train_dataset / batch_size)
        
    results.append(avg_loss)
    print(epoch, avg_loss.data)
    
    if epoch % 10 == 0:
        print("accuracy : ", get_accuracy(model))

0 tensor(0.1994, device='cuda:0')
accuracy :  tensor(96.1100)
1 tensor(0.0722, device='cuda:0')
2 tensor(0.0454, device='cuda:0')
3 tensor(0.0334, device='cuda:0')
4 tensor(0.0319, device='cuda:0')
5 tensor(0.0242, device='cuda:0')
6 tensor(0.0211, device='cuda:0')
7 tensor(0.0214, device='cuda:0')
8 tensor(0.0187, device='cuda:0')
9 tensor(0.0164, device='cuda:0')
10 tensor(0.0172, device='cuda:0')
accuracy :  tensor(97.4200)
11 tensor(0.0149, device='cuda:0')
12 tensor(0.0113, device='cuda:0')
13 tensor(0.0121, device='cuda:0')
14 tensor(0.0127, device='cuda:0')
15 tensor(0.0096, device='cuda:0')
16 tensor(0.0109, device='cuda:0')
17 tensor(0.0110, device='cuda:0')
18 tensor(0.0104, device='cuda:0')
19 tensor(0.0063, device='cuda:0')
20 tensor(0.0088, device='cuda:0')
accuracy :  tensor(97.7100)
21 tensor(0.0099, device='cuda:0')
22 tensor(0.0076, device='cuda:0')
23 tensor(0.0063, device='cuda:0')
24 tensor(0.0046, device='cuda:0')
25 tensor(0.0067, device='cuda:0')
26 tensor(0.0090

In [10]:
import matplotlib.pyplot as plt

plt.plot(results)

[<matplotlib.lines.Line2D at 0x2a34ad12e10>]

In [12]:
print("accuracy : ", get_accuracy(model))

accuracy :  tensor(98.3300)


In [15]:
sum([p.numel() for p in model.parameters() if p.requires_grad])

1800010