In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable as V
import torchvision.transforms as transforms
import torchvision.datasets as dsets

In [2]:
train_dataset = dsets.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = True)

test_dataset = dsets.MNIST(root='./data', train = False, transform = transforms.ToTensor())

In [3]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [27]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.cnn1 = nn.Conv2d(in_channels = 1, out_channels = 16, kernel_size = 5, stride = 1, padding = 0)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size = 2)
        
        self.cnn2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 5, stride = 1, padding = 0)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size = 2)
        
        self.fc1 = nn.Linear(32*4*4, 10)
        
        
    def forward(self, x):
        out = self.cnn1(x)
        out = self.relu1(out)
        out = self.maxpool1(out)
        
        out = self.cnn2(out)
        out = self.relu2(out)
        out = self.maxpool2(out)
        
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        
        return out

In [28]:
model = CNN()

if torch.cuda.is_available():
    model.cuda()

In [29]:
criterion = nn.CrossEntropyLoss()

In [30]:
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

In [31]:
it = 0
epochs = 5

for e in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = V(images.cuda())
            labels = V(labels.cuda())
        else:
            images = V(images)
            labels = V(labels)
            
        optimizer.zero_grad()
            
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        it+=1
        
        if it%500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images = V(images.cuda())
                else:
                    images = V(images)
                    
                outputs = model(images)
                _,predicted = torch.max(outputs.data, 1)
                total+=labels.size(0)
                
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
                
            accuracy = 100 * correct/total
                
            print('iter: {}, loss: {}, accuracy: {}'.format(it, loss, accuracy))

iter: 500, loss: 0.5792158246040344, accuracy: 88
iter: 1000, loss: 0.24649590253829956, accuracy: 93
iter: 1500, loss: 0.23712953925132751, accuracy: 94
iter: 2000, loss: 0.17424370348453522, accuracy: 95
iter: 2500, loss: 0.09146829694509506, accuracy: 95
iter: 3000, loss: 0.1660258173942566, accuracy: 96
