# 11. Convolutional Neural Networks (CNNs)
## 11-1. Image Classification with LeNet-5

In [1]:
# Define relevant variables for the ML task
batch_size = 64
num_classes = 10
learning_rate = 0.001
num_epochs = 10

In [2]:
import torchvision

train_dataset = torchvision.datasets.MNIST(root='.', train=True, download=True)
print('Min Pixel Value: {} \nMax Pixel Value: {}'.format(train_dataset.data.min(), train_dataset.data.max()))
print('Mean Pixel Value {} \nPixel Values Std: {}'.format(train_dataset.data.float().mean(), train_dataset.data.float().std()))
print('Scaled Mean Pixel Value {} \nScaled Pixel Values Std: {}'.format(train_dataset.data.float().mean() / 255, train_dataset.data.float().std() / 255))

Min Pixel Value: 0 
Max Pixel Value: 255
Mean Pixel Value 33.31842041015625 
Pixel Values Std: 78.56748962402344
Scaled Mean Pixel Value 0.13066047430038452 
Scaled Pixel Values Std: 0.30810779333114624


In [3]:
test_dataset = torchvision.datasets.MNIST(root='.', train=False, download=True)
print('Min Pixel Value: {} \nMax Pixel Value: {}'.format(test_dataset.data.min(), test_dataset.data.max()))
print('Mean Pixel Value {} \nPixel Values Std: {}'.format(test_dataset.data.float().mean(), test_dataset.data.float().std()))
print('Scaled Mean Pixel Value {} \nScaled Pixel Values Std: {}'.format(test_dataset.data.float().mean() / 255, test_dataset.data.float().std() / 255))

Min Pixel Value: 0 
Max Pixel Value: 255
Mean Pixel Value 33.791221618652344 
Pixel Values Std: 79.17247009277344
Scaled Mean Pixel Value 0.1325145959854126 
Scaled Pixel Values Std: 0.3104802668094635


In [5]:
import torch

#Loading the dataset and preprocessing
train_dataset = torchvision.datasets.MNIST(root = './data',
                                           train = True,
                                           transform = torchvision.transforms.Compose([
                                                  torchvision.transforms.Resize((32,32)),
                                                  torchvision.transforms.ToTensor(),
                                                  torchvision.transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download = True)


test_dataset = torchvision.datasets.MNIST(root = './data',
                                          train = False,
                                          transform = torchvision.transforms.Compose([
                                                  torchvision.transforms.Resize((32,32)),
                                                  torchvision.transforms.ToTensor(),
                                                  torchvision.transforms.Normalize(mean = (0.1325,), std = (0.3105,))]),
                                          download=True)

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

In [6]:
train_dataset.data.shape

torch.Size([60000, 28, 28])

In [7]:
train_dataset.targets

tensor([5, 0, 4,  ..., 5, 6, 8])

In [8]:
# number of classes
K = len(set(train_dataset.targets.numpy()))
print("number of classes:", K)

number of classes: 10


In [10]:
import torch.nn as nn

# Define the model
class LeNet5(nn.Module):
    def __init__(self, K):
        super(LeNet5, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        # http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
        # "No zero padding, non-unit strides"
        # https://pytorch.org/docs/stable/nn.html
        self.dense_layers = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(400, 120),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, K)
        )
  
    def forward(self, X):
        out = self.conv_layers(X)
        out = out.view(out.size(0), -1)
        out = self.dense_layers(out)
        return out

model = LeNet5(num_classes).to(device)
model

LeNet5(
  (conv_layers): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense_layers): Sequential(
    (0): Dropout(p=0.2, inplace=False)
    (1): Linear(in_features=400, out_features=120, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=120, out_features=84, bias=True)
    (5): ReLU()
    (6): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [11]:
# Create a loss function for multi-class classification
loss_fn = nn.CrossEntropyLoss()

# Create an optimizer for multi-class classification
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate)

total_step = len(train_loader)
total_step

938

In [14]:
# Loop through data
for epoch in range(num_epochs):
  # Training
  for i, (images, labels) in enumerate(train_loader):
      images = images.to(device)
      labels = labels.to(device)

      # Forward pass
      outputs = model(images)
      loss = loss_fn(outputs, labels)

      # Backward & optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

      if (i + 1) % 400 == 0:
          print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))

Epoch [1/10], Step [400/938], Loss: 0.0209
Epoch [1/10], Step [800/938], Loss: 0.0154
Epoch [2/10], Step [400/938], Loss: 0.0931
Epoch [2/10], Step [800/938], Loss: 0.0954
Epoch [3/10], Step [400/938], Loss: 0.0467
Epoch [3/10], Step [800/938], Loss: 0.1767
Epoch [4/10], Step [400/938], Loss: 0.0360
Epoch [4/10], Step [800/938], Loss: 0.0144
Epoch [5/10], Step [400/938], Loss: 0.1090
Epoch [5/10], Step [800/938], Loss: 0.0107
Epoch [6/10], Step [400/938], Loss: 0.0227
Epoch [6/10], Step [800/938], Loss: 0.0312
Epoch [7/10], Step [400/938], Loss: 0.0247
Epoch [7/10], Step [800/938], Loss: 0.0248
Epoch [8/10], Step [400/938], Loss: 0.0003
Epoch [8/10], Step [800/938], Loss: 0.0679
Epoch [9/10], Step [400/938], Loss: 0.0147
Epoch [9/10], Step [800/938], Loss: 0.0694
Epoch [10/10], Step [400/938], Loss: 0.0181
Epoch [10/10], Step [800/938], Loss: 0.0727


In [15]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 98.56 %
