# Application 6 - Residual Neural Netowrk

In [13]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [14]:
# device configuration
device = ('cuda' if torch.cuda.is_available() else 'cpu')

# hyper parameters
num_epochs      = 25
batch_size      = 100
learning_rate   = 0.001

# image preprocessing modules
transform = transforms.Compose([transforms.Pad(4),
                                transforms.RandomHorizontalFlip(),
                                transforms.RandomCrop(32),
                                transforms.ToTensor()])

# CIFAR-10 Dataset
train_dataset = torchvision.datasets.CIFAR10(root       = './data',
                                             train      = True,
                                             transform  = transform,
                                             download   = True)

test_dataset  = torchvision.datasets.CIFAR10(root       = './data',
                                             train      = False,
                                             transform  = transforms.ToTensor())

# data loader
train_loader  = torch.utils.data.DataLoader(dataset     = train_dataset,
                                            batch_size  = batch_size,
                                            shuffle     = True)

test_loader   = torch.utils.data.DataLoader(dataset     = test_dataset,
                                            batch_size  = batch_size,
                                            shuffle     = False)

Files already downloaded and verified


In [15]:
# define Residual Block with consist of two 3x3 Convo layers
def Conv3x3(in_channels, out_channels, stride = 1):
    return nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, bias = False)

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.conv1      = Conv3x3(in_channels, out_channels, stride)
        self.bn1        = nn.BatchNorm2d(out_channels)
        self.relu       = nn.ReLU(inplace = True)
        self.conv2      = Conv3x3(out_channels, out_channels)
        self.bn2        = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual        = x
        out             = self.conv1(x)
        out             = self.bn1(out)
        out             = self.relu(out)
        out             = self.conv2(out)
        out             = self.bn2(out)
        if self.downsample:
            residual    = self.downsample(x)
        out             += residual
        out             = self.relu(out)
        return out

In [16]:
# define Residual Network
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes = 10):
        super(ResNet, self).__init__()
        self.in_channeles   = 16
        self.conv           = Conv3x3(3, 16)
        self.bn             = nn.BatchNorm2d(16)
        self.relu           = nn.ReLU(inplace = True)
        self.layer1         = self.make_layer(block, 16, layers[0])
        self.layer2         = self.make_layer(block, 32, layers[1], 2)
        self.layer3         = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool       = nn.AvgPool2d(8)
        self.fc             = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride = 1):
        downsample = None
        if (stride != 1) or (self.in_channeles != out_channels):
            downsample = nn.Sequential(Conv3x3(self.in_channeles, out_channels, stride = stride),
                                       nn.BatchNorm2d(out_channels))

        layers = []
        layers.append(block(self.in_channeles, out_channels, stride, downsample))
        self.in_channeles = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out        

In [17]:
# create an object of ResNet class
model = ResNet(ResidualBlock, [2, 2, 2])
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

In [18]:
# training the Residual Neural Network
decay = 0
model.train()

for epoch in range(num_epochs):
    # decay learning rate at every 20 epoch
    if (epoch+1) % 20 == 0:
        decay += 1
        optimizer.param_groups[0]['lr'] = learning_rate * (0.5 ** decay)
        print("New learning rate: {}".format(optimizer.param_groups[0]['lr']))

    for i, (images, labels) in enumerate(train_loader):
        images  = images.to(device)
        labels  = labels.to(device)
        outputs = model(images)
        loss    = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print("Epoch: [{}/{}], Step: [{}/{}], Loss: {:.4f}"
            .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

Epoch: [1/25], Step: [100/500], Loss: 1.9848
Epoch: [1/25], Step: [200/500], Loss: 1.4157
Epoch: [1/25], Step: [300/500], Loss: 1.4667
Epoch: [1/25], Step: [400/500], Loss: 1.1841
Epoch: [1/25], Step: [500/500], Loss: 1.1260
Epoch: [2/25], Step: [100/500], Loss: 1.0006
Epoch: [2/25], Step: [200/500], Loss: 1.0053
Epoch: [2/25], Step: [300/500], Loss: 0.9724
Epoch: [2/25], Step: [400/500], Loss: 1.0166
Epoch: [2/25], Step: [500/500], Loss: 0.9832
Epoch: [3/25], Step: [100/500], Loss: 0.9426
Epoch: [3/25], Step: [200/500], Loss: 0.7434
Epoch: [3/25], Step: [300/500], Loss: 0.9765
Epoch: [3/25], Step: [400/500], Loss: 0.6937
Epoch: [3/25], Step: [500/500], Loss: 0.8949
Epoch: [4/25], Step: [100/500], Loss: 0.9076
Epoch: [4/25], Step: [200/500], Loss: 0.8967
Epoch: [4/25], Step: [300/500], Loss: 0.6058
Epoch: [4/25], Step: [400/500], Loss: 0.6604
Epoch: [4/25], Step: [500/500], Loss: 0.7146
Epoch: [5/25], Step: [100/500], Loss: 0.6452
Epoch: [5/25], Step: [200/500], Loss: 0.5550
Epoch: [5/

In [19]:
# testing the model
model.eval()
with torch.no_grad():
    correct = 0
    total   = 0
    for images, labels in test_loader:
        images  = images.to(device)
        labels  = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total  += labels.size(0)
        correct += (predicted == labels).sum().item()
    print("Accuracy: {:.2f}".format(100 * correct / total))

Accuracy: 86.04
