<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/1105.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 4.1 模型构造

In [0]:
!pip install mxnet d2lzh

In [0]:
from mxnet import nd
from mxnet.gluon import nn

class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')  # 隐藏层
        self.output = nn.Dense(10)  # 输出层

    def forward(self, x):
        return self.output(self.hidden(x))

In [16]:
X = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(X)


[[-0.0822941   0.07456148  0.06385002 -0.04937973  0.0698424   0.02713358
  -0.07743802 -0.00322946  0.02611737 -0.04490523]
 [-0.06431118  0.07135497  0.09599175 -0.06451458  0.09757216  0.04676059
  -0.04679815  0.02725542  0.04404476 -0.04410914]]
<NDArray 2x10 @cpu(0)>

In [0]:
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)

    def add(self, block):
        self._children[block.name] = block

    def forward(self, x):
        for block in self._children.values():
            x = block(x)
        return x 

In [18]:
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)


[[-0.01043473  0.01791799  0.02722769  0.01189875 -0.03495108  0.07704126
   0.00182015 -0.00797976 -0.043986   -0.02848949]
 [-0.02556653  0.01028888  0.02033464  0.01762148 -0.0338972   0.03672597
   0.03781967 -0.03908284 -0.05747136  0.01365921]]
<NDArray 2x10 @cpu(0)>

In [0]:
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = self.params.get_constant('rand_weight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')

    def forward(self, x):
        x = self.dense(x)
        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        x = self.dense(x)
        while x.norm().asscalar() > 1:
            x /= 2 
        if x.norm().asscalar() < 0.8:
            x += 10 
        return x.sum()

In [21]:
net = FancyMLP()
net.initialize()
net(X)


[403.08377]
<NDArray 1 @cpu(0)>

In [0]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'), 
               nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')

    def forward(self, x):
        return self.dense(self.net(x))

In [24]:
net = nn.Sequential()
net.add(NestMLP(), nn.Dense(20), FancyMLP())

net.initialize()
net(X)


[3.7801871]
<NDArray 1 @cpu(0)>

## 4.2 模型参数的访问、初始化和共享

In [0]:
from mxnet import init, nd 
from mxnet.gluon import nn 

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [26]:
net[0].params

dense19_ (
  Parameter dense19_weight (shape=(256, 20), dtype=float32)
  Parameter dense19_bias (shape=(256,), dtype=float32)
)

In [27]:
type(net[0].params)

mxnet.gluon.parameter.ParameterDict

In [28]:
net[0].params['dense19_weight']

Parameter dense19_weight (shape=(256, 20), dtype=float32)

In [29]:
net[0].weight

Parameter dense19_weight (shape=(256, 20), dtype=float32)

In [30]:
net[0].weight.data()


[[-0.06763797  0.02341988  0.02750754 ... -0.0163656   0.0271539
   0.04293371]
 [-0.057276   -0.00545905 -0.03811367 ...  0.017037    0.00038091
  -0.01689015]
 [ 0.05912871  0.00589813  0.00579332 ... -0.03548513  0.06329998
   0.00878964]
 ...
 [-0.04247909 -0.05434325  0.04364657 ... -0.00520646  0.04954348
   0.0615086 ]
 [ 0.01628537 -0.00393314  0.06814129 ...  0.06664989 -0.06824005
   0.02902017]
 [ 0.06353932 -0.03678216  0.03699008 ...  0.03383283 -0.04630034
   0.06421676]]
<NDArray 256x20 @cpu(0)>

In [31]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [32]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [33]:
net.collect_params()

sequential4_ (
  Parameter dense19_weight (shape=(256, 20), dtype=float32)
  Parameter dense19_bias (shape=(256,), dtype=float32)
  Parameter dense20_weight (shape=(10, 256), dtype=float32)
  Parameter dense20_bias (shape=(10,), dtype=float32)
)

In [34]:
net.collect_params('.*weight')

sequential4_ (
  Parameter dense19_weight (shape=(256, 20), dtype=float32)
  Parameter dense20_weight (shape=(10, 256), dtype=float32)
)

In [35]:
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[ 0.00632535  0.02281385 -0.00790374  0.00079064 -0.00359422  0.00275971
  0.01268833  0.01220814 -0.01035277 -0.00453738 -0.00711486  0.00247771
  0.01414129 -0.00594405  0.00053162 -0.00421787 -0.01071319 -0.00609468
  0.00426226 -0.00555236]
<NDArray 20 @cpu(0)>

In [36]:
net.initialize(init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [37]:
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[0].weight.data()[0]


[-0.08361219  0.08051363 -0.09847713  0.07271375  0.12466413  0.06529617
 -0.06072348 -0.00504498 -0.01383175 -0.09022775 -0.00178175 -0.08682375
  0.08202833  0.01045902  0.10150935 -0.01833758 -0.10643165  0.14006686
 -0.02155472 -0.05873382]
<NDArray 20 @cpu(0)>

In [0]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5 

In [39]:
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense19_weight (256, 20)
Init dense20_weight (10, 256)



[ 6.1872997  0.         0.         0.         9.086678  -0.
 -0.         0.         7.9508553  0.         5.3993435 -0.
 -0.         0.         0.        -0.        -0.        -8.298664
  7.487999  -5.336525 ]
<NDArray 20 @cpu(0)>

In [40]:
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[ 7.1872997  1.         1.         1.        10.086678   1.
  1.         1.         8.950855   1.         6.3993435  1.
  1.         1.         1.         1.         1.        -7.298664
  8.487999  -4.336525 ]
<NDArray 20 @cpu(0)>

In [42]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'), 
    shared, 
    nn.Dense(8, activation='relu', params=shared.params), 
    nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 10))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

## 4.3 模型参数的延后初始化

In [0]:
from mxnet import init, nd 
from mxnet.gluon import nn 

class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)

In [0]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'), 
    nn.Dense(10))

net.initialize(init=MyInit())


In [46]:
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

Init dense27_weight (256, 20)
Init dense28_weight (10, 256)


In [0]:
Y = net(X)

In [48]:
net.initialize(init=MyInit(), force_reinit=True)

Init dense27_weight (256, 20)
Init dense28_weight (10, 256)


In [49]:
net = nn.Sequential()
net.add(nn.Dense(256, in_units=20, activation='relu'))
net.add(nn.Dense(10, in_units=256))

net.initialize(init=MyInit())

Init dense29_weight (256, 20)
Init dense30_weight (10, 256)
