#Import torch & device info


In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn.init
import torch.nn as nn

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'avaiable device : %s' % device)
# print(f'avaiable device : {device}')
# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

avaiable device : cuda


 # Prepare data & data loader


In [3]:
# MNIST dataset
mnist_train = torchvision.datasets.MNIST(root='MNIST_data/',
                                         train=True,
                                         transform=transforms.ToTensor(),
                                         download=True)
mnist_test = torchvision.datasets.MNIST(root='MNIST_data/',
                                        train=False,
                                        transform=transforms.ToTensor(),
                                        download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9912422/9912422 [00:03<00:00, 2995799.05it/s]


Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28881/28881 [00:00<00:00, 141094.01it/s]


Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1648877/1648877 [00:01<00:00, 1468372.05it/s]


Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4542/4542 [00:00<00:00, 1174508.56it/s]


Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



In [4]:
# dataset loader
batch_size = 100

# drop_lat : drops the last non-full batch of each worker’s dataset replica.
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size,
                                         shuffle=True)

# Define my Neural Network


Check https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#conv2d

In [5]:
# Define my CNN Model (2 conv layers)
class FasterDumber(torch.nn.Module):

    def __init__(self):
        super(FasterDumber, self).__init__()
        # Torch tensor dim. (bath_size, C, H, W)
        # L1 ImgIn shape=(batch_size, 1,  28, 28)
        #    Conv     -> (batch_size, 32, 28, 28)
        #    Relu     -> (batch_size, 32, 28, 28)
        #    Pool     -> (batch_size, 32, 14, 14)
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = torch.nn.ReLU()
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)

        # L2 ImgIn shape=(batch_size, 32, 14, 14)
        #    Conv      ->(batch_size, 64, 14, 14)
        #    Relu      ->(batch_size, 64, 14, 14)
        #    Pool      ->(batch_size, 64,  7,  7)
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = torch.nn.ReLU()
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)

        # Final FC 7x7x64 inputs -> 10 outputs
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        
        out = self.conv1(x)
        out = self.relu1(out)
        out = self.pool1(out)
        #out = self.pool1(self.relu1(self.conv1(x)))

        out = self.conv2(out)
        out = self.relu2(out)
        out = self.pool2(out) 
        #out = self.pool2(self.relu2(self.conv2(x)))
        # dim : (batch_size, 64, 7, 7)
        
        # Flatten them for FC --> dim : (batch_size, 7*7*64)
        out = out.view(out.size(0), -1)   
        out = self.fc(out) # dim : (batch_size, 10)
        return out

## Using nn.Sequential()

# Set training protocols

In [6]:
# instantiate CNN model
model = FasterDumber().to(device)

In [10]:
# hyperparamters
learning_rate = 0.001
training_epochs = 100

# define cost/loss * optmizer
criterion = torch.nn.CrossEntropyLoss().to(device) # Softmax is internally computed.
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate) # SGD --> AdamW

# Training myNN

In [None]:
# train myNN
total_batch = len(train_loader)
print('Learning started.')

for epoch in range(training_epochs):
    running_loss = 0.0
    for data, labels in train_loader:
        # image is already size of (1x28x28), no reshape
        # label is not one-hot encoded
        
        #load the data into the device (GPU)
        data = data.to(device) 
        labels = labels.to(device)

        optimizer.zero_grad() # set gradients to zero
        outputs = model(data) # inference the myNN
        loss = criterion(outputs, labels) #compute the loss
        loss.backward() #back prop
        optimizer.step() #weight update

        running_loss += loss.item() / total_batch

    print('[Epoch: {:>4}] loss = {:>.9}'.format(epoch + 1, running_loss))

print('Learning Fishied.')        


Learning started.
[Epoch:    1] loss = 0.2257503
[Epoch:    2] loss = 0.0632388154
[Epoch:    3] loss = 0.0465501602
[Epoch:    4] loss = 0.0377348344
[Epoch:    5] loss = 0.0317652104
[Epoch:    6] loss = 0.0264377126
[Epoch:    7] loss = 0.0223373118
[Epoch:    8] loss = 0.0188700217
[Epoch:    9] loss = 0.0167222377
[Epoch:   10] loss = 0.0138172164
[Epoch:   11] loss = 0.0107341183
[Epoch:   12] loss = 0.0103400059
[Epoch:   13] loss = 0.00923958748
[Epoch:   14] loss = 0.00724088352
[Epoch:   15] loss = 0.00709193482
[Epoch:   16] loss = 0.00709302625
[Epoch:   17] loss = 0.00501579948
[Epoch:   18] loss = 0.00427677528
[Epoch:   19] loss = 0.00515349944
[Epoch:   20] loss = 0.0037447117
[Epoch:   21] loss = 0.00341161404
[Epoch:   22] loss = 0.00437436724
[Epoch:   23] loss = 0.00280236387
[Epoch:   24] loss = 0.00258682111
[Epoch:   25] loss = 0.00340472394
[Epoch:   26] loss = 0.0015854699
[Epoch:   27] loss = 0.00405851698
[Epoch:   28] loss = 0.00323520521
[Epoch:   29] loss 

# Validate myNN

Refer to torch.max() document. https://pytorch.org/docs/stable/generated/torch.max.html

In [None]:
correct = 0
total = 0

# Note that when you test the model not train, you set model.eval() mode
model.eval()

with torch.no_grad():
    for images, labels in test_loader:

      images = images.to(device)
      labels = labels.to(device)
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

print('Accuracy of FasterDumber on the test set: %.4f %% ' % (100 * correct/total))

Accuracy of myNN on the test set: 87.3200 % 


# Save the model

In [None]:
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

In [None]:
!ls

MNIST_data  model.ckpt	sample_data
