In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.autograd import Variable as V
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
train_dataset = dsets.MNIST(root = './data', train = True, transform = transforms.ToTensor(), download = True)

test_dataset = dsets.MNIST(root='./data', train = False, transform = transforms.ToTensor())

In [4]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = False)

In [5]:
class FeedForwardNeuralNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForwardNeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_dim,hidden_dim)
        self.sigmoid = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        
        return out

In [7]:
input_dim = 28*28
output_dim = 10
hidden_dim = 100

model = FeedForwardNeuralNet(input_dim, hidden_dim, output_dim)

if torch.cuda.is_available():
    model.cuda()

In [8]:
criterion = nn.CrossEntropyLoss()

In [9]:
lr = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr =lr)

In [12]:
it=0
epochs=5
for e in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = V(images.view(-1, 28*28).cuda())
            labels = V(labels.cuda())
        else:
            images = V(images.view(-1, 28*28))
            labels = V(labels)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        it+=1
        if it%500 == 0:
            correct = 0
            total = 0
            
            for images, labels in test_loader:
                if torch.cuda.is_available():
                    images =V(images.view(-1, 28*28).cuda())
                else:
                    images =V(images.view(-1, 28*28))
                    
                outputs = model(images)
                
                _, predicted = torch.max(outputs.data, 1)
                
                total+= labels.size(0)
                
                if torch.cuda.is_available():
                    correct += (predicted.cpu() == labels.cpu()).sum()
                else:
                    correct += (predicted == labels).sum()
                    
            accuracy = 100 * correct/total
                
            print('iter: {} Loss: {} Accuracy: {}'.format(it, loss.data, accuracy))

iter: 500 Loss: 0.4747294485569 Accuracy: 90
iter: 1000 Loss: 0.35513266921043396 Accuracy: 91
iter: 1500 Loss: 0.541461169719696 Accuracy: 90
iter: 2000 Loss: 1.5592749118804932 Accuracy: 91
iter: 2500 Loss: 0.47602665424346924 Accuracy: 91
iter: 3000 Loss: 0.9050096869468689 Accuracy: 90
