In [1]:
# Library import
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision

import gc

In [2]:
# Constants definition
batch_size = 128
epochs = 300
num_classes = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# Creating dataloaders
data_transforms = torchvision.transforms.Compose([
    torchvision.transforms.RandomCrop(32, padding=4),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomAffine(degrees=5, translate=(0.010, 0.005), scale=(0.995, 1.005)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_set = torchvision.datasets.CIFAR10('.data/', train=True, download=True, transform=data_transforms)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True)

test_set = torchvision.datasets.CIFAR10('.data/', train=False, download=True, transform=test_transforms)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Statistics printing
x_batch, y_batch = iter(train_loader).next()
print("Training set: {} samples - Max value: {} - Min value: {}".format(len(train_loader.dataset),
                                                                        x_batch.max(), x_batch.min()))
x_batch, y_batch = iter(test_loader).next()
print("Test set: {} samples - Max value: {} - Min value: {}".format(len(test_loader.dataset),
                                                                    x_batch.max(), x_batch.min()))
print("Example batch shape: {}".format(x_batch.shape))

Training set: 50000 samples - Max value: 1.0 - Min value: -1.0
Test set: 10000 samples - Max value: 1.0 - Min value: -1.0
Example batch shape: torch.Size([128, 3, 32, 32])


In [5]:
# Creating our Neural Network - PreActResNet18
class PreActResNet18(nn.Module):
    class IdentityBlock(nn.Module):
        """Residual network block.
        Args:
            channels (int): number of channels.
        """

        def __init__(self, channels):
            super().__init__()

            self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(channels)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)

        def forward(self, x):
            out = F.relu(self.bn1(x))
            out = self.conv1(out)
            out = self.conv2(F.relu(self.bn2(out)))
            out = x + out  # resnet connection

            return out

    class ConvBlock(nn.Module):
        """Residual network block.
        Args:
            channels (int): number of channels.
        """

        def __init__(self, channels):
            super().__init__()

            self.conv_s = nn.Conv2d(channels//2, channels, kernel_size=1, stride=2, bias=False)

            self.conv1 = nn.Conv2d(channels//2, channels, kernel_size=3, stride=2, padding=1, bias=False)
            self.bn1 = nn.BatchNorm2d(channels//2)
            self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=False)
            self.bn2 = nn.BatchNorm2d(channels)

        def forward(self, x):
            out = F.relu(self.bn1(x))
            short = self.conv_s(out)
            out = self.conv1(out)
            out = self.conv2(F.relu(self.bn2(out)))
            out = short + out  # resnet connection

            return out
    
    def __init__(self):
        super(PreActResNet18, self).__init__()
        # Initial convolution before resnet blocks
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        
        self.blocks = nn.ModuleList()
        
        convs = list(zip([3, 4, 6, 3],[64*(2**i) for i in range(4)]))
        for _ in range(convs[0][0]):
            self.blocks.append(self.IdentityBlock(convs[0][1]))
        for i, chs in convs[1:]:
            self.blocks.append(self.ConvBlock(chs))
            for _ in range(i-1):
                self.blocks.append(self.IdentityBlock(chs))

        ########## 1x1@512
        # Final pooling
        self.average_pooling = nn.AvgPool2d(4)

        ########## 512@num_classes
        # To connect to the number of classes
        self.out_layer = nn.Linear(512, num_classes)

    def forward(self, x):
        #### 32x32@3 -> 32x32@64
        # 0. Initial convolution ==> 
        initial_conv = self.conv1(x)
        
        res_out = initial_conv
        for b in self.blocks:
            res_out = b(res_out)

        #### 4x4@512 -> 1x1@512
        #print(res_out.shape)
        pool_out = self.average_pooling(res_out)
        #print(pool_out.shape)
        
        #### 512 -> num_classes
        fc_out = self.out_layer(pool_out.view(pool_out.size(0), -1))

        return fc_out

In [6]:
# Instantiating the network and printing its architecture
gc.collect()
torch.cuda.empty_cache()
net = PreActResNet18()
net = net.to(device)
print(net)

PreActResNet18(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (blocks): ModuleList(
    (0): IdentityBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): IdentityBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (2): IdentityBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3

In [7]:
# Training hyperparameters
criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(net.parameters(), lr=0.1, weight_decay=1e-6)
optimizer = optim.SGD(net.parameters(), lr=0.1, weight_decay=1e-6)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

In [8]:
epoch = 0

In [9]:
# Start training
print("\n---- Start Training ----")
best_accuracy = -1
for i in range(epochs):

    # TRAIN THE NETWORK
    train_loss, train_correct = 0, 0
    net.train()
    for inputs, targets in train_loader:
        # data is a list of [inputs, labels]
        inputs, targets = inputs.to(device), targets.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        _, pred = outputs.max(1)  # get the index of the max log-probability
        train_correct += pred.eq(targets).sum().item()

        # print statistics
        train_loss += loss.item()

    train_loss /= len(train_loader.dataset)

    # TEST NETWORK
    net.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            test_loss += criterion(outputs, targets)
            _, pred = outputs.max(1)  # get the index of the max log-probability
            correct += pred.eq(targets).sum().item()

    test_loss /= len(test_loader.dataset)
    test_accuracy = 100. * correct / len(test_loader.dataset)
    
    # Get current learning rate via the optimizer
    for param_group in optimizer.param_groups:
        current_lr = param_group['lr']
    
    print("[Epoch {}] LR: {:.4f} - Train Loss: {:.6f} - Test Loss: {:.6f} - Train Accuracy: {:.2f}% - Test Accuracy: {:.2f}%".format(
            epoch + 1, current_lr, train_loss, test_loss, 100. * train_correct / len(train_loader.dataset), test_accuracy
        ))

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        
    scheduler.step(test_loss)
    
    epoch += 1

print("Finished Training")
print("Best Test accuracy: {:.2f}%".format(best_accuracy))


---- Start Training ----
[Epoch 1] LR: 0.1000 - Train Loss: 0.015390 - Test Loss: 0.014825 - Train Accuracy: 30.64% - Test Accuracy: 34.46%
[Epoch 2] LR: 0.1000 - Train Loss: 0.011535 - Test Loss: 0.011483 - Train Accuracy: 46.24% - Test Accuracy: 47.55%
[Epoch 3] LR: 0.1000 - Train Loss: 0.009588 - Test Loss: 0.009729 - Train Accuracy: 55.97% - Test Accuracy: 56.54%
[Epoch 4] LR: 0.1000 - Train Loss: 0.008003 - Test Loss: 0.008344 - Train Accuracy: 63.65% - Test Accuracy: 64.20%
[Epoch 5] LR: 0.1000 - Train Loss: 0.006832 - Test Loss: 0.008688 - Train Accuracy: 69.27% - Test Accuracy: 66.58%
[Epoch 6] LR: 0.1000 - Train Loss: 0.005844 - Test Loss: 0.007865 - Train Accuracy: 74.11% - Test Accuracy: 67.91%
[Epoch 7] LR: 0.1000 - Train Loss: 0.005190 - Test Loss: 0.007386 - Train Accuracy: 76.93% - Test Accuracy: 70.37%
[Epoch 8] LR: 0.1000 - Train Loss: 0.004613 - Test Loss: 0.005978 - Train Accuracy: 79.54% - Test Accuracy: 74.52%
[Epoch 9] LR: 0.1000 - Train Loss: 0.004246 - Test Los

KeyboardInterrupt: 

In [10]:
torch.save(net.state_dict(), "./models/cifar_preact_resnet_param_state.pt")
torch.save(optimizer.state_dict(), "./models/cifar_preact_resnet_optim_state.pt")