In [1]:
from mxnet import nd
from mxnet.gluon import nn
from mxnet import init

In [2]:
def conv_block(channels):
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.BatchNorm(),
            nn.Activation(activation='relu'),
            nn.Conv2D(channels,kernel_size=3, padding=1)
        )
    return net


In [3]:
def conv_block1(channels):
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.BatchNorm(),
            nn.Activation(activation='relu'),
            nn.Conv2D(channels,kernel_size=1),
            nn.BatchNorm(),
            nn.Activation(activation='relu'),
            nn.Conv2D(channels,kernel_size=3, padding=1)
        )
    return net

In [4]:
net = conv_block1(10)
net.initialize()
x = nd.random.uniform(shape=(4,3,6,6))
y = net(x)
y.shape
# net.collect_params()

(4, 10, 6, 6)

In [5]:
class DenseBlock(nn.Block):
    def __init__(self,layers,channels,**kwargs):
        super(DenseBlock,self).__init__(**kwargs)
        self.net = nn.Sequential()
        for layer in range(layers):
            self.net.add(conv_block(channels))
        
    def forward(self,x):
        for b in self.net: 
            out = b(x)
            x = nd.concat(x,out,dim=1)
        return x
        
            

In [6]:
net1 = DenseBlock(2,10)
net1.initialize()
x = nd.random.uniform(shape = (4,3,6,6))
y = net1(x)
print(y.shape)
# net1.collect_params()

(4, 23, 6, 6)


In [7]:
def transformBlock(channels):
    net = nn.Sequential()
    net.add(
        nn.BatchNorm(),
        nn.Activation('relu'),
        nn.Conv2D(channels,kernel_size=1),
        nn.AvgPool2D(pool_size=2,strides=2)
    )
    return net
    

In [8]:
x.shape

(4, 3, 6, 6)

In [9]:
net3 = transformBlock(10)
net3.initialize()
net3(x).shape

(4, 10, 3, 3)

In [10]:
# net3.collect_params()|

In [11]:

nets =[]
class DenseNet(nn.Block):
    def __init__(self,growth_gate,channels,num_classes,layers,**kwargs):
        super(DenseNet,self).__init__(**kwargs)
        self.layers = layers
        self.channels = channels
        with self.name_scope():
            net1 = nn.Sequential()
            net1.add(
                nn.Conv2D(self.channels,kernel_size=7,strides=2,padding=3),
                nn.MaxPool2D(pool_size=3,strides=2,padding=1)
            )
            for i,layer in enumerate(self.layers):
                net = nn.Sequential()
                net.add(DenseBlock(layer,growth_gate))
                self.channels += layer*growth_gate
#                 print('channel: %d'%self.channels)
                if i != len(self.layers)-1:
                    net.add(transformBlock(self.channels//2))
                nets.append(net)
            net2 = nn.Sequential()
            net2.add(
                nn.BatchNorm(),
                nn.Activation('relu'),
                nn.AvgPool2D(pool_size=7),
                nn.Dense(num_classes,activation='softrelu')
            )
            self.net = nn.Sequential()
            self.net.add(net1,nets[0],nets[1],nets[2],nets[3],net2)
#             print(nets[0])
            
    def forward(self,x):
        out = x
        for i,b in enumerate(self.net):
            out = b(out)
#             print('block %d shape :%s'%(i+1,out.shape))
        return out

                    
                

In [12]:
# layers = [6,12,24,16] #densenet121
# growth_gate = 32
# net = DenseNet(growth_gate=32,channels=64,num_classes=10,layers=layers)
# net.initialize()
# x = nd.random.uniform(shape = (4,3,224,224))
# y = net(x)
# y.shape

In [13]:
from mxnet import init
import utils1
from mxnet import gluon

train_data,test_data = utils1.load_data_fashion_mnist(batch_size=4,resize=32)
ctx = utils1.try_gpu()
print(ctx)
net = DenseNet(growth_gate=32,channels=64,num_classes=10,layers=[6,12,24,16])
net.initialize(ctx=ctx,init=init.MSRAPrelu())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.1})




gpu(0)


In [14]:
# utils1.train(train_data,test_data,net,loss,trainer,ctx,num_epochs=1)

In [19]:
init_channels = 64
growth_rate = 32
block_layers = [6, 12, 24, 16]
num_classes = 10

def dense_net():
    net = nn.Sequential()
    # add name_scope on the outermost Sequential
    with net.name_scope():
        # first block
        net.add(
            nn.Conv2D(init_channels, kernel_size=7,
                      strides=2, padding=3),
            nn.BatchNorm(),
            nn.Activation('relu'),
            nn.MaxPool2D(pool_size=3, strides=2, padding=1)
        )
        # dense blocks
        channels = init_channels
        for i, layers in enumerate(block_layers):
            net.add(DenseBlock(layers, growth_rate))
            channels += layers * growth_rate
            if i != len(block_layers)-1:
                net.add(transformBlock(channels//2))
        # last block
        net.add(
            nn.BatchNorm(),
            nn.Activation('relu'),
            nn.AvgPool2D(pool_size=1),
            nn.Flatten(),
            nn.Dense(num_classes)
        )
    return net

In [20]:
import utils1
from mxnet import gluon
from mxnet import init

train_data, test_data = utils1.load_data_fashion_mnist(
    batch_size=16, resize=32)

ctx = utils1.try_gpu()
net = dense_net()
net.initialize(ctx=ctx, init=init.Xavier())

loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
                        'sgd', {'learning_rate': 0.1})
utils1.train(train_data, test_data, net, loss,
            trainer, ctx, num_epochs=1)

Start training on  gpu(0)
Epoch 0. Loss: 0.654, Train acc 0.79, Test acc 0.86, Time 334.7 sec
