<a href="https://colab.research.google.com/github/xingchenzhao/study_deep_learning/blob/master/ResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install d2lzh  # installing d2l
!pip install -U --pre mxnet-cu101mkl  # updating mxnet to at least v1.6

In [0]:
import d2lzh as d2l
from mxnet import gluon, init, nd
from mxnet.gluon import nn

In [0]:
class Residual(nn.Block): 
  def __init__(self, num_channels, use_1x1conv=False, **kwargs):
    super(Residual, self).__init__(**kwargs)
    strides = 1 if not use_1x1conv else 2
    self.conv1 = nn.Conv2D(num_channels, kernel_size=3, padding=1, 
                           strides=strides) #strides here could be changed if the shapes are different
    self.conv2 = nn.Conv2D(num_channels, kernel_size=3, padding=1)
    if use_1x1conv:
      self.conv3 = nn.Conv2D(num_channels, kernel_size=1, strides=strides)
    else:
      self.conv3=None
    self.bn1 = nn.BatchNorm()
    self.bn2 = nn.BatchNorm()

  def forward(self, X):
    Y = nd.relu(self.bn1(self.conv1(X)))
    Y = self.bn2(self.conv2(Y))
    if self.conv3:
      X = self.conv3(X)
    return nd.relu(Y + X)


Let's check the input and output shape

In [0]:
blk = Residual(3)
blk.initialize()
X = nd.random.uniform(shape = (4,3,6,6))
blk(X).shape

(4, 3, 6, 6)

We can see that as the channels increase, the height and width decrease

In [0]:
blk = Residual(6, use_1x1conv=True)
blk.initialize()
blk(X).shape

(4, 6, 3, 3)

ResNet Model

In [0]:
net = nn.Sequential()
net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3),
        nn.BatchNorm(), nn.Activation('relu'),
        nn.MaxPool2D(pool_size=3, strides=2, padding=1))

In [0]:
def resnet_blk(num_channels, num_residuals, first_block=False):
  blk = nn.Sequential()
  for i in range(num_residuals):
    if i == 0 and not first_block:
      blk.add(Residual(num_channels, use_1x1conv=True))
    else:
      blk.add(Residual(num_channels))
  return blk

In [0]:
# Using ResNet-34
net.add(resnet_blk(64, 3, first_block=True),
        resnet_blk(128, 4),
        resnet_blk(256, 6),
        resnet_blk(512, 3))

In [0]:
net.add(nn.GlobalAvgPool2D(), nn.Dense(10))

Check the shape

In [0]:
X = nd.random.uniform(shape=(1, 1, 224, 224))
net.initialize()
for layer in net:
  X = layer(X)
  print(layer.name, 'output shape\t', X.shape)

conv5 output shape	 (1, 64, 112, 112)
batchnorm4 output shape	 (1, 64, 112, 112)
relu0 output shape	 (1, 64, 112, 112)
pool0 output shape	 (1, 64, 56, 56)
sequential1 output shape	 (1, 64, 56, 56)
sequential2 output shape	 (1, 128, 28, 28)
sequential3 output shape	 (1, 256, 14, 14)
sequential4 output shape	 (1, 512, 7, 7)
pool1 output shape	 (1, 512, 1, 1)
dense0 output shape	 (1, 10)


Training the ResNet by Fashion-MNIST

In [0]:
lr, num_epochs, batch_size, ctx = 0.05, 15, 256, d2l.try_gpu()
net.initialize(force_reinit=True, ctx=ctx, init=init.Xavier())
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
d2l.train_ch5(net, train_iter, test_iter, batch_size, trainer, ctx, num_epochs)

training on gpu(0)
epoch 1, loss 0.7401, train acc 0.792, test acc 0.865, time 48.0 sec
epoch 2, loss 0.2835, train acc 0.895, test acc 0.903, time 45.5 sec
epoch 3, loss 0.2206, train acc 0.918, test acc 0.913, time 45.5 sec
epoch 4, loss 0.1787, train acc 0.934, test acc 0.918, time 45.6 sec
epoch 5, loss 0.1451, train acc 0.946, test acc 0.922, time 45.5 sec
epoch 6, loss 0.1154, train acc 0.958, test acc 0.912, time 45.5 sec
epoch 7, loss 0.0914, train acc 0.967, test acc 0.898, time 45.4 sec
epoch 8, loss 0.0718, train acc 0.973, test acc 0.921, time 45.5 sec
epoch 9, loss 0.0602, train acc 0.978, test acc 0.917, time 45.6 sec
epoch 10, loss 0.0453, train acc 0.983, test acc 0.901, time 45.5 sec
epoch 11, loss 0.0337, train acc 0.988, test acc 0.919, time 45.4 sec
epoch 12, loss 0.0258, train acc 0.991, test acc 0.905, time 45.4 sec
epoch 13, loss 0.0202, train acc 0.993, test acc 0.920, time 45.5 sec
epoch 14, loss 0.0224, train acc 0.992, test acc 0.901, time 45.5 sec
epoch 15, 