In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [2]:
batch_size = 100
num_epochs = 15
learning_rate = 0.001

In [3]:
mnist_train = dsets.MNIST(root="../datasets/MNIST_data",
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)

mnist_test = dsets.MNIST(root="../datasets/MNIST_data",
                         train=False,
                         transform=transforms.ToTensor(),
                         download=True)

train_loader = torch.utils.data.DataLoader(mnist_train,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           drop_last=True)

In [4]:
from torch import nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )
        
        self.linear1 = nn.Linear(128*4*4, 625, bias=True)
        self.linear2 = nn.Linear(625, 10, bias=True)
        nn.init.xavier_normal_(self.linear1.weight)
        nn.init.xavier_normal_(self.linear2.weight)
        
        self.fc = nn.Sequential(
            self.linear1,
            nn.ReLU(),
            nn.Dropout(p=0.5),
            self.linear2
        )
                
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x
    
    
model = CNN().to(device)

In [5]:
from torch import optim

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
num_batches = len(train_loader)

# train mode
model.train()
for epoch in range(num_epochs):
    avg_loss = 0
    
    for X, Y in train_loader:
        X = X.to(device)
        Y = Y.to(device)
        optimizer.zero_grad()
        
        h = model(X)
        loss = criterion(h, Y)
        loss.backward()
        optimizer.step()
        
        avg_loss += loss / num_batches
        
    print("[Epoch {:3d}] Loss : {:.6f}".format(epoch+1, avg_loss))

[Epoch   1] Loss : 0.192012
[Epoch   2] Loss : 0.052426
[Epoch   3] Loss : 0.037774
[Epoch   4] Loss : 0.029248
[Epoch   5] Loss : 0.025782
[Epoch   6] Loss : 0.019136
[Epoch   7] Loss : 0.016595
[Epoch   8] Loss : 0.014823
[Epoch   9] Loss : 0.014007
[Epoch  10] Loss : 0.011347
[Epoch  11] Loss : 0.010047
[Epoch  12] Loss : 0.010250
[Epoch  13] Loss : 0.007092
[Epoch  14] Loss : 0.007534
[Epoch  15] Loss : 0.008307


In [7]:
with torch.no_grad():
    # evaluation mode
    model.eval()
    X_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.targets.to(device)
    
    preds = model(X_test)
    correct_preds = torch.argmax(preds, dim=1) == Y_test
    accuracy = correct_preds.float().mean()
    
    print("Accuracy : {:.6f}".format(accuracy))

Accuracy : 0.989500
