In [1]:
from mxnet import ndarray as nd
from mxnet.gluon import nn
from mxnet import autograd

In [2]:
# define residual block
class Residual(nn.Block):
    def __init__(self, channels, same_shape = True,**kwargs):
        super(Residual,self).__init__(**kwargs)
        self.same_shape = same_shape
        self.stride = 1 if same_shape else 2
        self.conv1 = nn.Conv2D(channels,kernel_size=3,padding=1,strides=self.stride)
        self.bn1 = nn.BatchNorm()
        
        self.conv2 = nn.Conv2D(channels,kernel_size=3,padding=1)
        self.bn2 = nn.BatchNorm()
        
        if not same_shape:
            self.conv3 = nn.Conv2D(channels,kernel_size=1,strides=self.stride)
        
    def forward(self,x):
        out1 = nd.relu(self.bn1(self.conv1(x)))
        out2 = self.bn2(self.conv2(out1))
        if not self.same_shape:
            x = self.conv3(x)
        return nd.relu(out2 + x)
    

    

In [3]:
block = Residual(8,False)
block.initialize()
x = nd.random.uniform(shape=(4,3,6,6))
y = block(x)
y.shape

(4, 8, 3, 3)

In [27]:

class ResidualProposed(nn.Block):
    def __init__(self,channels,same_shape=True,**kwargs):
        super(ResidualProposed,self).__init__(**kwargs)
        self.same_shape = same_shape
        self.stride = 1 if self.same_shape else 2
        self.bn1 = nn.BatchNorm()
        self.conv1 = nn.Conv2D(channels,kernel_size=3,padding=1,strides=self.stride)
        
        self.bn2 = nn.BatchNorm()
        self.conv2 = nn.Conv2D(channels, kernel_size=3,padding=1)
        if not self.same_shape:
            self.conv3 = nn.Conv2D(channels,kernel_size=1,strides=self.stride)
        
    def forward(self,x):
        out = self.conv1(nd.relu(self.bn1(x)))
        out = self.conv2(nd.relu(self.bn2(out)))
        if not self.same_shape:
            x = self.conv3(x)
        return nd.relu(out + x)
            

In [29]:
# block = ResidualProposed(128,False)
block = ResidualProposed(64)
block
block.initialize()
x = nd.random.uniform(shape=(4,64,6,6))
print(y.shape)

(4, 64, 6, 6)


In [4]:
class Residual3(nn.Block):
    def __init__(self, in_channels, out_channels,same_shape = True,**kwargs):
        super(Residual3,self).__init__(**kwargs)
        self.same_shape = same_shape
        self.stride = 1 if self.same_shape else 2
        #论文中每个块都是全尺寸输出，如果改变输入channel则尺寸需要减半
        self.conv1 = nn.Conv2D(in_channels,kernel_size=1,strides=self.stride)
        self.bn1 = nn.BatchNorm()
        
        self.conv2 = nn.Conv2D(in_channels,kernel_size=3,padding=1)
        self.bn2 = nn.BatchNorm()
        
        self.conv3 = nn.Conv2D(out_channels,kernel_size=1,strides=1)
        self.bn3 = nn.BatchNorm()
        #如果size减半，则需要将原始的输入x的通道变成新的输出channel，并且size减半
        if not self.same_shape:
            self.conv4_1 = nn.Conv2D(out_channels,kernel_size=1,strides=self.stride)
        else:
            self.conv4 = nn.Conv2D(out_channels,kernel_size=1)
        
    def forward(self,x):
        out1 = nd.relu(self.bn1(self.conv1(x)))
        out2 = nd.relu(self.bn2(self.conv2(out1)))
        out3 = self.bn3(self.conv3(out2))
#         print(out3.shape)
        if not self.same_shape:
            x = self.conv4_1(x)
        else:
            x = self.conv4(x)
        return nd.relu(out3 + x)

In [5]:
block = Residual3(128,512,False)
# block = Residual3(64,256)
block
block.initialize()
x = nd.random.uniform(shape=(4,64,6,6))
y = block(x)
print(y.shape)


(4, 512, 3, 3)


In [6]:
class Resnet50(nn.Block):
    def __init__(self,num_classes,verbose=False,**kwargs):
        super(Resnet50,self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            b1 = nn.Sequential()
            b1.add(nn.Conv2D(64,kernel_size=7,strides=2,padding=3))
            
            b2 = nn.Sequential()
            b2.add(nn.MaxPool2D(pool_size=3,strides=2,padding=1),
                   Residual3(64,256),
                   Residual3(64,256),
                   Residual3(64,256)
                  )
            
            b3 = nn.Sequential()
            b3.add(
                Residual3(128,512,False))
            for _ in range(3):
                b3.add(Residual3(128,512))
            
            
            b4 = nn.Sequential()
            b4.add(
                Residual3(256,1024,False))
            for _ in range(5):
                b4.add(Residual3(256,1024))
            
                        
            b5 = nn.Sequential()
            b5.add(
                Residual3(512,2048,False))
            for _ in range(2):
                b5.add(Residual3(512,2048))
                       
            b6 = nn.Sequential()
            b6.add(
                nn.AvgPool2D(pool_size=3),
                nn.Dense(num_classes)
            )
            # chain net
            self.b = nn.Sequential()
            self.b.add(b1,b2,b3,b4,b5,b6)
    
    def forward(self,x):
        out = x
        for i, b in enumerate(self.b):
            out = b(out)
            if self.verbose:
                print('block %d shape: %s'%(i+1,out.shape))
        return out
            

In [7]:
net50 = Resnet50(10,verbose=True)
net50.initialize()

x = nd.random.uniform(shape=(4,3,224,224))
y = net50(x)
y.shape


block 1 shape: (4, 64, 112, 112)
block 2 shape: (4, 256, 56, 56)
block 3 shape: (4, 512, 28, 28)
block 4 shape: (4, 1024, 14, 14)
block 5 shape: (4, 2048, 7, 7)
block 6 shape: (4, 10)


(4, 10)

In [8]:
class Resnet(nn.Block):
    def __init__(self, num_classes,verbose = False,**kwargs):
        super(Resnet,self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            b1 = nn.Sequential()
            b1.add(nn.Conv2D(64,kernel_size=7,strides=2))
            
            b2 = nn.Sequential()
            b2.add(nn.MaxPool2D(pool_size=3,strides=2),
                   Residual(64),
                   Residual(64)
                  )
            
            b3 = nn.Sequential()
            b3.add(Residual(128,same_shape=False),
                  Residual(128)
                  )

            b4 = nn.Sequential()
            b4.add(Residual(256,False),
                  Residual(256)
                  )
            
            b5 = nn.Sequential()
            b5.add(Residual(512,False),
                  Residual(512)
                  )
            
            b6 = nn.Sequential()
            b6.add(nn.AvgPool2D(pool_size=3),
                  nn.Dense(num_classes)
                  )
            
            self.net = nn.Sequential()
            self.net.add(b1,b2,b3,b4,b5,b6)
            
    def forward(self,x):
        out = x
        for i,b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('block %d shape : %s'%(i+1,out.shape))
        return out
        

In [9]:
class Resnet34(nn.Block):
    def __init__(self,num_classes,verbose=False,**kwargs):
        super(Resnet34,self).__init__(**kwargs)
        self.verbose = verbose
        with self.name_scope():
            b1 = nn.Sequential()
            b1.add(nn.Conv2D(64,kernel_size=7,strides=2))
            
            b2 = nn.Sequential()
            b2.add(nn.MaxPool2D(pool_size=3,strides=2),
                   Residual(64),
                   Residual(64),
                   Residual(64)
                  )
            
            b3 = nn.Sequential()
            b3.add(Residual(128,same_shape=False),
                   Residual(128),
                   Residual(128),
                   Residual(128)
                  )

            b4 = nn.Sequential()
            b4.add(Residual(256,False),
                   Residual(256),
                   Residual(256),
                   Residual(256),
                   Residual(256),
                   Residual(256),
                  )
            
            b5 = nn.Sequential()
            b5.add(Residual(512,False),
                   Residual(512),
                   Residual(512),
                  )
            
            b6 = nn.Sequential()
            b6.add(nn.AvgPool2D(pool_size=3),
                  nn.Dense(num_classes)
                  )
            
            self.net = nn.Sequential()
            self.net.add(b1,b2,b3,b4,b5,b6)
            
            
    def forward(self,x):
        out = x
        for i,b in enumerate(self.net):
            out = b(out)
            if self.verbose:
                print('block %d shape: %s'%(i+1,out.shape()))
        return out

In [10]:
net = Resnet50(10, verbose=True)
net.initialize()

x = nd.random.uniform(shape=(4, 3, 96, 96))
y = net(x)

block 1 shape: (4, 64, 48, 48)
block 2 shape: (4, 256, 24, 24)
block 3 shape: (4, 512, 12, 12)
block 4 shape: (4, 1024, 6, 6)
block 5 shape: (4, 2048, 3, 3)
block 6 shape: (4, 10)


In [11]:
param = net.collect_params()
# param.get('dense0_weight').data()
# param


[[-0.02610619 -0.03885062  0.05612955 ..., -0.04622412  0.06981292
  -0.02007455]
 [ 0.05836449  0.05372658 -0.01032274 ...,  0.06461724  0.01257394
   0.0698004 ]
 [ 0.00400377  0.04627431 -0.01657203 ..., -0.06152606  0.00436205
  -0.01263054]
 ..., 
 [-0.00737525 -0.02245046  0.06219975 ...,  0.0640495  -0.05439839
   0.00171988]
 [ 0.00825685 -0.02088102 -0.01504967 ..., -0.04874242  0.02617098
   0.05599461]
 [ 0.04695705 -0.01793427 -0.06092513 ...,  0.05259696 -0.06517988
   0.00960138]]
<NDArray 10x2048 @cpu(0)>

In [12]:
import sys
sys.path.append('./')
import utils1
from mxnet import init
from mxnet import gluon
train_data,test_data = utils1.load_data_fashion_mnist(batch_size=64,resize=96)
ctx = utils1.try_gpu()

net = Resnet(10)
net.initialize(ctx=ctx,init = init.Xavier())
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.05})

# utils1.train(train_data,test_data,net,loss,trainer,ctx,num_epochs=1)


In [13]:
import sys
sys.path.append('./')
import utils1
from mxnet import gluon
from mxnet import init

train_data, test_data = utils1.load_data_fashion_mnist(
    batch_size=16, resize=96)

ctx = utils1.try_gpu()
net = Resnet(10)
net.initialize(ctx=ctx, init=init.Xavier())

net1 = Resnet34(10)
net1.initialize(ctx=ctx,init=init.Xavier())

net2 = Resnet50(10)
net2.initialize(ctx=ctx,init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
                        'sgd', {'learning_rate': 0.01})
trainer34 = gluon.Trainer(net1.collect_params(),
                        'sgd', {'learning_rate': 0.01})

trainer50 = gluon.Trainer(net2.collect_params(),
                        'sgd', {'learning_rate': 0.01})


In [14]:
len(train_data.dataset[0])
# train_data.dataset[0]

2

In [15]:
# utils1.train(train_data, test_data, net, loss,trainer, ctx, num_epochs=2)
#Epoch 0. Loss: 0.439, Train acc 0.84, Test acc 0.88, Time 101.8 sec
#Epoch 1. Loss: 0.280, Train acc 0.90, Test acc 0.81, Time 102.0 sec

In [16]:
# utils1.train(train_data, test_data, net1, loss,trainer34, ctx, num_epochs=2)
#Epoch 0. Loss: 0.478, Train acc 0.83, Test acc 0.89, Time 182.2 sec

In [17]:
utils1.train(train_data, test_data, net2, loss, trainer50, ctx, num_epochs=2)
# dense don't used activate function
# Epoch 0. Loss: 0.522, Train acc 0.81, Test acc 0.87, Time 599.5 sec


Start training on  gpu(0)
Epoch 0. Loss: 0.530, Train acc 0.81, Test acc 0.86, Time 599.3 sec
Epoch 1. Loss: 0.331, Train acc 0.88, Test acc 0.86, Time 596.3 sec
