## Feedforward Neural Network Model Structure

In [17]:
import torch
import torch.nn as nn

class Net(nn.Module):
    def __init__(self, input_size,  num_classes):
        super(Net, self).__init__()                    # Inherited from the parent class nn.Module
        self.fc1 = nn.Linear(input_size, 256)  # 1st Full-Connected Layer: 784 (input data) -> 256 (hidden node)
        self.relu1 = nn.ReLU()                          # Non-Linear ReLU Layer: max(0,x)
        self.fc2 = nn.Linear(256, 64) # 2nd Full-Connected Layer: 256 (hidden node) -> 64 (hidden node)
        self.relu2 = nn.ReLU()                          # Non-Linear ReLU Layer: max(0,x)
        self.fc3 = nn.Linear(64, 10) # 3nd Full-Connected Layer: 64 (hidden node) -> 10 (output class)
        self.softmax1=nn.LogSoftmax(dim=1)                   #Logsoftmax
    
    def forward(self, x):                              # Forward pass: stacking each layer together
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        out = self.softmax1(out)
        return out

In [22]:
learning_rate=0.0001
net = Net(28*28, 10)
print(net)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

Net(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (relu2): ReLU()
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (softmax1): LogSoftmax(dim=1)
)


## Download MNIST and Train the FNN Model

In [23]:
import torch
import torch.nn as nn
from torch.autograd import Variable

from torchvision import datasets, transforms


def download_mnist():

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.1307,), std=(0.3081,))
    ])

    train_dataset = datasets.MNIST(
        root="./data/", transform=transform, train=True, download=True)
    test_dataset = datasets.MNIST(
        root="./data/", transform=transform, train=False, download=True)

    return train_dataset, test_dataset


def one_hot(y):
    y_ = torch.zeros((y.shape[0], 10))
    y_[torch.arange(y.shape[0], dtype=torch.long), y] = 1
    return y_

def batch(dataset, numpy=True):
    data = []
    label = []
    for each in dataset:
        data.append(each[0])
        label.append(each[1])
    data = torch.stack(data)
    label = torch.LongTensor(label)
    if numpy:
        return [(data.numpy(), label.numpy())]
    else:
        return [(data, label)]
def mini_batch(dataset, batch_size=128):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)


def torch_run():
    train_dataset, test_dataset = download_mnist()

    epoch_number = 10

    for epoch in range(epoch_number):
        for x, y in mini_batch(train_dataset):
            x=Variable(x.view(-1,28*28))
            # y = one_hot(y)
            y=Variable(y)
            optimizer.zero_grad()                             # Intialize the hidden weight to all zeros
            outputs = net(x)                             # Forward pass: compute the output class given a image
            loss = criterion(outputs, y)                 # Compute the loss: difference between the output class and the pre-given label
            loss.backward()                                   # Backward pass: compute the weight
            optimizer.step()                                  # Optimizer: update the weights of hidden nodes
        x, y = batch(test_dataset, numpy=False)[0]
        x=Variable(x.view(-1,28*28))
        accuracy = net(x).argmax(dim=1).eq(y).float().mean().item()
        # print('[{}] Accuracy: {:.4f}'.format(epoch, accuracy))
        print(f'Epoch [{epoch+1:d}/{epoch_number:d}], Loss: {loss.item():.4f}, Accuracy: {accuracy:.4f}')
        # print('Epoch [%d/%d],  Loss: %.4f'%(epoch+1, epoch_number, loss.item()))

    # correct = 0
    # total = 0
    # for images, labels in mini_batch(test_dataset):
    #     images = Variable(images.view(-1, 28*28))
    #     outputs = net(images)
    #     _, predicted = torch.max(outputs.data, 1)  # Choose the best class from the output: The class with the best score
    #     total += labels.size(0)                    # Increment the total count
    #     correct += (predicted == labels).sum()     # Increment the correct count
        
    # print('Accuracy of the network on the 10K test images: %d %%' % (100 * correct / total))
if __name__ == "__main__":
    torch_run()

Epoch [1/10], Loss: 0.3682, Accuracy: 0.9114
Epoch [2/10], Loss: 0.3364, Accuracy: 0.9269
Epoch [3/10], Loss: 0.3293, Accuracy: 0.9402
Epoch [4/10], Loss: 0.1457, Accuracy: 0.9465
Epoch [5/10], Loss: 0.0934, Accuracy: 0.9527
Epoch [6/10], Loss: 0.0889, Accuracy: 0.9581
Epoch [7/10], Loss: 0.2392, Accuracy: 0.9620
Epoch [8/10], Loss: 0.1756, Accuracy: 0.9640
Epoch [9/10], Loss: 0.0959, Accuracy: 0.9677
Epoch [10/10], Loss: 0.0955, Accuracy: 0.9692
