In [79]:
# Library import
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision

import gc

In [80]:
# Constants definition
batch_size = 200
epochs = 200
num_classes = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [81]:
# Creating dataloaders
# ToTensor() - Converts a Image (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
data_transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomAffine(degrees=90, translate=(0.1, 0.1), scale=(1.1, 1.1)),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_set = torchvision.datasets.CIFAR10('.data/', train=True, download=True, transform=data_transforms)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.CIFAR10('.data/', train=False, download=True, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [82]:
# Statistics printing
x_batch, y_batch = iter(train_loader).next()
print("Training set: {} samples - Max value: {} - Min value: {}".format(len(train_loader.dataset),
                                                                        x_batch.max(), x_batch.min()))
x_batch, y_batch = iter(test_loader).next()
print("Test set: {} samples - Max value: {} - Min value: {}".format(len(test_loader.dataset),
                                                                    x_batch.max(), x_batch.min()))
print("Example batch shape: {}".format(x_batch.shape))

Training set: 50000 samples - Max value: 1.0 - Min value: -1.0
Test set: 10000 samples - Max value: 1.0 - Min value: -1.0
Example batch shape: torch.Size([200, 3, 32, 32])


In [83]:
# There are no GaussianNoise Layer in Pytorch
# https://discuss.pytorch.org/t/writing-a-simple-gaussian-noise-layer-in-pytorch/4694/4
class GaussianNoise(nn.Module):
    """Gaussian noise regularizer.
    Args:
        sigma (float, optional): relative standard deviation used to generate the
            noise. Relative means that it will be multiplied by the magnitude of
            the value your are adding the noise to. This means that sigma can be
            the same regardless of the scale of the vector.
        is_relative_detach (bool, optional): whether to detach the variable before
            computing the scale of the noise. If `False` then the scale of the noise
            won't be seen as a constant but something to optimize: this will bias the
            network to generate vectors with smaller values.
    """

    def __init__(self, sigma=0.1, is_relative_detach=True):
        super().__init__()
        self.sigma = sigma
        self.is_relative_detach = is_relative_detach
        self.noise = torch.tensor(0).to(device).float()

    def forward(self, x):
        if self.training and self.sigma != 0:
            scale = self.sigma * x.detach() if self.is_relative_detach else self.sigma * x
            sampled_noise = self.noise.repeat(*x.size()).normal_() * scale
            x = x + sampled_noise
        return x
    
class ResNetBlock(nn.Module):
    """Residual network block.
    Args:
        channels (int): number of channels.
    """

    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels)
        self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(channels)
        '''self.conv3 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn3 = nn.BatchNorm2d(channels)'''

    def forward(self, x):
        '''out = F.elu(self.bn1(self.conv1(x)))
        out = F.elu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out = F.elu(x + out)  # resnet connection plus activation'''
        out = F.elu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = F.elu(x + out)  # resnet connection plus activation
        return out
    
class ReducingBlock(nn.Module):
    """Dimension reducer block.
    Args:
        inp_channels (int): number of input channels.
        out_channels (int): number of output channels.
    """

    def __init__(self, inp_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(inp_channels, out_channels, kernel_size=1, stride=2, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)

    def forward(self, x):
        out = self.bn(self.conv(x))
        return out

# Creating our Neural Network - ResNet18
class CustomResNet(nn.Module):
    def __init__(self, n_reductions=4, n_convs=4):
        super(CustomResNet, self).__init__()
        # Initial convolution before resnet blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        
        self.blocks = nn.ModuleList()
        
        for chan in [64*(2**i) for i in range(n_reductions-1)]:
            for conv in range(n_convs):
                self.blocks.append(ResNetBlock(chan))
            self.blocks.append(ReducingBlock(chan, chan*2))
            self.blocks.append(nn.Dropout2d(p=0.1))
        for conv in range(n_convs):
            self.blocks.append(ResNetBlock(64*(2**(n_reductions-1))))

        ########## 1x1@512
        # Final pooling
        self.average_pooling = nn.AvgPool2d(4)

        ########## 512@num_classes
        # To connect to the number of classes
        self.linear = nn.Linear(64*(2**(n_reductions-1)), num_classes)

    def forward(self, x):
        #### 32x32@3 -> 32x32@64
        # 0. Initial convolution ==> 
        initial_conv = F.elu(self.bn1(self.conv1(x)))
        
        res_out = initial_conv
        for b in self.blocks:
            res_out = b(res_out)

        #### 4x4@512 -> 1x1@512
        pool_out = self.average_pooling(res_out)

        #### 512 -> num_classes
        fc_out = self.linear(pool_out.view(pool_out.size(0), -1))

        return fc_out

In [84]:
# Instantiating the network and printing its architecture
gc.collect()
torch.cuda.empty_cache()
net = CustomResNet()
net = net.to(device)
print(net)

CustomResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (blocks): ModuleList(
    (0): ResNetBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): ResNetBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [85]:
# Training hyperparameters
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-6)
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-4, momentum=0.9)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

In [86]:
epoch = 0

In [87]:
# Start training
print("\n---- Start Training ----")
best_accuracy = -1
for i in range(epochs):

    # TRAIN THE NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    for inputs, targets in train_loader:
        # data is a list of [inputs, labels]
        inputs, targets = inputs.to(device), targets.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, pred = outputs.max(1)  # get the index of the max log-probability
        train_correct += pred.eq(targets).sum().item()

        # print statistics
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)

    # TEST NETWORK
    net.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            test_loss += criterion(outputs, targets)
            _, pred = outputs.max(1)  # get the index of the max log-probability
            correct += pred.eq(targets).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    # Get current learning rate via the optimizer
    for param_group in optimizer.param_groups:
        current_lr = param_group['lr']
    
    print("[Epoch {}] LR: {:.4f} - Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
            epoch + 1, current_lr, train_loss, test_loss, 100. * train_correct / len(train_loader.dataset), test_accuracy
        ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        
    scheduler.step(test_loss)
    
    epoch += 1

print("Finished Training")
print("Best Test accuracy: {:.2f}%".format(best_accuracy))


---- Start Training ----
[Epoch 1] LR: 0.1000 - Train Loss: 0.013802 - Test Loss: 0.009636 - Train Accuracy: 20.77% - Test Accuracy: 28.46%
[Epoch 2] LR: 0.1000 - Train Loss: 0.009595 - Test Loss: 0.009275 - Train Accuracy: 29.00% - Test Accuracy: 30.58%
[Epoch 3] LR: 0.1000 - Train Loss: 0.009080 - Test Loss: 0.008608 - Train Accuracy: 33.65% - Test Accuracy: 35.90%
[Epoch 4] LR: 0.1000 - Train Loss: 0.008755 - Test Loss: 0.008382 - Train Accuracy: 35.84% - Test Accuracy: 38.82%
[Epoch 5] LR: 0.1000 - Train Loss: 0.008476 - Test Loss: 0.008117 - Train Accuracy: 38.52% - Test Accuracy: 40.88%
[Epoch 6] LR: 0.1000 - Train Loss: 0.008271 - Test Loss: 0.007973 - Train Accuracy: 39.84% - Test Accuracy: 42.10%
[Epoch 7] LR: 0.1000 - Train Loss: 0.008014 - Test Loss: 0.007674 - Train Accuracy: 42.02% - Test Accuracy: 44.92%
[Epoch 8] LR: 0.1000 - Train Loss: 0.007784 - Test Loss: 0.007958 - Train Accuracy: 43.40% - Test Accuracy: 44.02%
[Epoch 9] LR: 0.1000 - Train Loss: 0.007520 - Test Los

[Epoch 72] LR: 0.0100 - Train Loss: 0.002321 - Test Loss: 0.002409 - Train Accuracy: 83.69% - Test Accuracy: 83.83%
[Epoch 73] LR: 0.0100 - Train Loss: 0.002302 - Test Loss: 0.002357 - Train Accuracy: 83.74% - Test Accuracy: 83.97%
[Epoch 74] LR: 0.0100 - Train Loss: 0.002273 - Test Loss: 0.002380 - Train Accuracy: 84.05% - Test Accuracy: 83.92%
[Epoch 75] LR: 0.0100 - Train Loss: 0.002278 - Test Loss: 0.002356 - Train Accuracy: 84.24% - Test Accuracy: 84.14%
[Epoch 76] LR: 0.0100 - Train Loss: 0.002257 - Test Loss: 0.002391 - Train Accuracy: 84.20% - Test Accuracy: 83.65%
[Epoch 77] LR: 0.0100 - Train Loss: 0.002243 - Test Loss: 0.002418 - Train Accuracy: 84.43% - Test Accuracy: 83.73%
[Epoch 78] LR: 0.0100 - Train Loss: 0.002232 - Test Loss: 0.002323 - Train Accuracy: 84.42% - Test Accuracy: 84.05%
[Epoch 79] LR: 0.0100 - Train Loss: 0.002215 - Test Loss: 0.002303 - Train Accuracy: 84.50% - Test Accuracy: 84.39%
[Epoch 80] LR: 0.0100 - Train Loss: 0.002243 - Test Loss: 0.002305 - Tra

[Epoch 143] LR: 0.0001 - Train Loss: 0.001626 - Test Loss: 0.002042 - Train Accuracy: 88.71% - Test Accuracy: 86.74%
[Epoch 144] LR: 0.0001 - Train Loss: 0.001625 - Test Loss: 0.002048 - Train Accuracy: 88.66% - Test Accuracy: 86.70%
[Epoch 145] LR: 0.0001 - Train Loss: 0.001628 - Test Loss: 0.002047 - Train Accuracy: 88.63% - Test Accuracy: 86.79%
[Epoch 146] LR: 0.0001 - Train Loss: 0.001620 - Test Loss: 0.002047 - Train Accuracy: 88.61% - Test Accuracy: 86.63%
[Epoch 147] LR: 0.0001 - Train Loss: 0.001611 - Test Loss: 0.002047 - Train Accuracy: 88.67% - Test Accuracy: 86.70%
[Epoch 148] LR: 0.0000 - Train Loss: 0.001612 - Test Loss: 0.002047 - Train Accuracy: 88.63% - Test Accuracy: 86.81%
[Epoch 149] LR: 0.0000 - Train Loss: 0.001628 - Test Loss: 0.002039 - Train Accuracy: 88.67% - Test Accuracy: 86.79%
[Epoch 150] LR: 0.0000 - Train Loss: 0.001625 - Test Loss: 0.002058 - Train Accuracy: 88.53% - Test Accuracy: 86.68%
[Epoch 151] LR: 0.0000 - Train Loss: 0.001639 - Test Loss: 0.002

In [88]:
torch.save(net.state_dict(), "./models/cifar_conv_param_state.pt")
torch.save(optimizer.state_dict(), "./models/cifar_conv_optim_state.pt")