In [1]:
# Library import
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision

import gc

In [2]:
# Constants definition
batch_size = 128
epochs = 300
num_classes = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# Creating dataloaders
# ToTensor() - Converts a Image (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0]
'''torchvision.transforms.RandomAffine(degrees=10, translate=(0.10, 0.15), scale=(0.95, 1.05)),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    #torchvision.transforms.RandomVerticalFlip(p=0.5),'''
data_transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomCrop(32, padding=4),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_set = torchvision.datasets.CIFAR10('.data/', train=True, download=True, transform=data_transforms)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.CIFAR10('.data/', train=False, download=True, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Statistics printing
x_batch, y_batch = iter(train_loader).next()
print("Training set: {} samples - Max value: {} - Min value: {}".format(len(train_loader.dataset),
                                                                        x_batch.max(), x_batch.min()))
x_batch, y_batch = iter(test_loader).next()
print("Test set: {} samples - Max value: {} - Min value: {}".format(len(test_loader.dataset),
                                                                    x_batch.max(), x_batch.min()))
print("Example batch shape: {}".format(x_batch.shape))

Training set: 50000 samples - Max value: 1.0 - Min value: -1.0
Test set: 10000 samples - Max value: 1.0 - Min value: -1.0
Example batch shape: torch.Size([128, 3, 32, 32])


In [8]:
# Creating our Neural Network - ResNet18
class ResNet34(nn.Module):
    class IdentityBlock(nn.Module):
        """Residual network block.
        Args:
            channels (int): number of channels.
            acyivation (callable): activation function.
        """

        def __init__(self, channels, activation):
            super().__init__()

            self.activation = activation

            self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(channels)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)

        def forward(self, x):
            out = self.activation(self.bn1(self.conv1(x)))
            out = self.bn2(self.conv2(out))
            out = self.activation(x + out)  # resnet connection plus activation

            return out

    class ConvBlock(nn.Module):
        """Residual network block.
        Args:
            channels (int): number of channels.
            acyivation (callable): activation function.
        """

        def __init__(self, channels, activation):
            super().__init__()

            self.activation = activation

            self.conv_s = nn.Conv2d(channels//2, channels, kernel_size=1, stride=2, bias=False)
            self.bn_s = nn.BatchNorm2d(channels)

            self.conv1 = nn.Conv2d(channels//2, channels, kernel_size=3, stride=2, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(channels)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)

        def forward(self, x):
            short = self.bn_s(self.conv_s(x))

            out = self.activation(self.bn1(self.conv1(x)))
            out = self.bn2(self.conv2(out))
            out = self.activation(short + out)  # resnet connection plus activation

            return out
    
    def __init__(self):
        super(ResNet34, self).__init__()
        # Initial convolution before resnet blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        #self.init_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.blocks = nn.ModuleList()
        
        convs = list(zip([3, 4, 6, 3],[64*(2**i) for i in range(4)]))
        for _ in range(convs[0][0]):
            self.blocks.append(self.IdentityBlock(convs[0][1], F.relu))
        for i, chs in convs[1:]:
            self.blocks.append(self.ConvBlock(chs, F.relu))
            for _ in range(i-1):
                self.blocks.append(self.IdentityBlock(chs, F.relu))

        ########## 1x1@512
        # Final pooling
        self.average_pooling = nn.AdaptiveAvgPool2d(1)

        ########## 512@num_classes
        # To connect to the number of classes
        #self.inner_linear = nn.Linear(512, 1000)
        self.out_layer = nn.Linear(512, num_classes)

    def forward(self, x):
        #### 32x32@3 -> 32x32@64
        # 0. Initial convolution ==> 
        initial_conv = F.relu(self.bn1(self.conv1(x)))
        #initial_pool = self.init_pool(initial_conv)
        
        res_out = initial_conv
        for b in self.blocks:
            res_out = b(res_out)

        #### 4x4@512 -> 1x1@512
        #print(res_out.shape)
        pool_out = self.average_pooling(res_out)
        #print(pool_out.shape)
        
        #### 512 -> num_classes
        #fc_inner = F.relu(self.inner_linear(pool_out.view(pool_out.size(0), -1)))
        fc_out = self.out_layer(pool_out.view(pool_out.size(0), -1))

        return fc_out

In [9]:
# Creating our Neural Network - ResNet18
class ResNet50(nn.Module):
    class IdentityBlock(nn.Module):
        """Residual network 50 block.
        Args:
            channels (int): number of channels.
            acyivation (callable): activation function.
        """

        def __init__(self, channels, activation):
            super().__init__()

            self.activation = activation

            self.conv1 = nn.Conv2d(channels, channels, kernel_size=1, stride=1, padding=0, bias=False)
            self.bn1 = nn.BatchNorm2d(channels)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)
            self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, padding=0, bias=False)
            self.bn3 = nn.BatchNorm2d(channels*4)

        def forward(self, x):
            out = self.activation(self.bn1(self.conv1(x)))
            out = self.activation(self.bn2(self.conv2(out)))
            out = self.bn3(self.conv3(out))
            print(x.shape, out.shape)
            out = self.activation(x + out)  # resnet connection plus activation

            return out

    class ConvBlock(nn.Module):
        """Residual network 50 block.
        Args:
            channels (int): number of channels.
            acyivation (callable): activation function.
        """

        def __init__(self, channels, activation, s):
            super().__init__()

            self.activation = activation

            self.conv_s = nn.Conv2d(channels//2, channels*4, kernel_size=1, stride=2, bias=False)
            self.bn_s = nn.BatchNorm2d(channels)

            self.conv1 = nn.Conv2d(channels, channels, kernel_size=1, stride=2, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(channels)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)
            self.conv3 = nn.Conv2d(channels, channels*4, kernel_size=1, stride=1, padding=1, bias=False)
            self.bn3 = nn.BatchNorm2d(channels*4)

        def forward(self, x):
            short = self.bn_s(self.conv_s(x))

            out = self.activation(self.bn1(self.conv1(x)))
            out = self.activation(self.bn2(self.conv2(out)))
            out = self.bn3(self.conv3(out))
            out = self.activation(short + out)  # resnet connection plus activation

            return out
        
    def __init__(self):
        super(ResNet50, self).__init__()
        # Initial convolution before resnet blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.init_pool = nn.MaxPool2d(kernel_size=3, stride=2)
        
        self.blocks = nn.ModuleList()
        
        convs = list(zip([3, 4, 6, 3],[64*(2**i) for i in range(4)]))
        for _ in range(convs[0][0]):
            self.blocks.append(self.IdentityBlock(convs[0][1], F.relu))
        for i, chs in convs[1:]:
            self.blocks.append(self.ConvBlock(chs, F.relu))
            for _ in range(i-1):
                self.blocks.append(self.IdentityBlock(chs, F.relu))

        ########## 1x1@512
        # Final pooling
        self.average_pooling = nn.AvgPool2d(4)

        ########## 512@num_classes
        # To connect to the number of classes
        self.linear = nn.Linear(512, num_classes)

    def forward(self, x):
        #### 32x32@3 -> 32x32@64
        # 0. Initial convolution ==> 
        initial_conv = F.relu(self.bn1(self.conv1(x)))
        initial_pool = self.init_pool(initial_conv)
        
        res_out = initial_conv
        for b in self.blocks:
            res_out = b(res_out)

        #### 4x4@512 -> 1x1@512
        pool_out = self.average_pooling(res_out)

        #### 512 -> num_classes
        fc_out = self.linear(pool_out.view(pool_out.size(0), -1))

        return fc_out

In [10]:
# Instantiating the network and printing its architecture
gc.collect()
torch.cuda.empty_cache()
net = ResNet34()
net = net.to(device)
print(net)

ResNet34(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (blocks): ModuleList(
    (0): IdentityBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): IdentityBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_sta

In [None]:
# Training hyperparameters
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-6)
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-6)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

In [None]:
epoch = 0

In [None]:
# Start training
print("\n---- Start Training ----")
best_accuracy = -1
for i in range(epochs):

    # TRAIN THE NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    for inputs, targets in train_loader:
        # data is a list of [inputs, labels]
        inputs, targets = inputs.to(device), targets.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, pred = outputs.max(1)  # get the index of the max log-probability
        train_correct += pred.eq(targets).sum().item()

        # print statistics
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)

    # TEST NETWORK
    net.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            test_loss += criterion(outputs, targets)
            _, pred = outputs.max(1)  # get the index of the max log-probability
            correct += pred.eq(targets).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    # Get current learning rate via the optimizer
    for param_group in optimizer.param_groups:
        current_lr = param_group['lr']
    
    print("[Epoch {}] LR: {:.4f} - Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
            epoch + 1, current_lr, train_loss, test_loss, 100. * train_correct / len(train_loader.dataset), test_accuracy
        ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        
    scheduler.step(test_loss)
    
    epoch += 1

print("Finished Training")
print("Best Test accuracy: {:.2f}%".format(best_accuracy))

In [None]:
torch.save(net.state_dict(), "./models/cifar_conv_param_state.pt")
torch.save(optimizer.state_dict(), "./models/cifar_conv_optim_state.pt")