In [2]:
from mxnet import init, nd # initas包包含了多种模型初始化方法
from mxnet.gluon import nn

In [4]:
net = nn.Sequential()
net.add(nn.Dense(256, activation = 'relu'))
net.add(nn.Dense(10))
net.initialize() # 使用默认初始化方式

x = nd.random.uniform(shape = (2, 20))
y = net(x) # 前向计算

## 1.访问模型参数

In [5]:
net[0].params, type(net[0].params) # !!!注意，书中结果是dense0_weight，而这里是dense2_weight

(dense2_ (
   Parameter dense2_weight (shape=(256, 20), dtype=float32)
   Parameter dense2_bias (shape=(256,), dtype=float32)
 ), mxnet.gluon.parameter.ParameterDict)

In [7]:
net[0].params['dense2_weight'], net[0].weight

(Parameter dense2_weight (shape=(256, 20), dtype=float32),
 Parameter dense2_weight (shape=(256, 20), dtype=float32))

In [8]:
net[0].weight.data()


[[ 0.06700657 -0.00369488  0.0418822  ... -0.05517294 -0.01194733
  -0.00369594]
 [-0.03296221 -0.04391347  0.03839272 ...  0.05636378  0.02545484
  -0.007007  ]
 [-0.0196689   0.01582889 -0.00881553 ...  0.01509629 -0.01908049
  -0.02449339]
 ...
 [ 0.00010955  0.0439323  -0.04911506 ...  0.06975312  0.0449558
  -0.03283203]
 [ 0.04106557  0.05671307 -0.00066976 ...  0.06387014 -0.01292654
   0.00974177]
 [ 0.00297424 -0.0281784  -0.06881659 ... -0.04047417  0.00457048
   0.05696651]]
<NDArray 256x20 @cpu(0)>

In [9]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

### 其它层的参数，例如输出层的偏差值

In [10]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [11]:
net.collect_params()

sequential1_ (
  Parameter dense2_weight (shape=(256, 20), dtype=float32)
  Parameter dense2_bias (shape=(256,), dtype=float32)
  Parameter dense3_weight (shape=(10, 256), dtype=float32)
  Parameter dense3_bias (shape=(10,), dtype=float32)
)

In [12]:
net.collect_params('.*weight')

sequential1_ (
  Parameter dense2_weight (shape=(256, 20), dtype=float32)
  Parameter dense3_weight (shape=(10, 256), dtype=float32)
)

## 2.初始化模型参数

In [14]:
# 非首次对模型初始化需要指定force_reinit为真
net.initialize(init = init.Normal(sigma = 0.01), force_reinit = True)
net[0].weight.data()[0]


[ 0.01074176  0.00066428  0.00848699 -0.0080038  -0.00168822  0.00936328
  0.00357444  0.00779328 -0.01010307 -0.00391573  0.01316619 -0.00432926
  0.0071536   0.00925416 -0.00904951 -0.00074684  0.0082254  -0.01878511
  0.00885884  0.01911872]
<NDArray 20 @cpu(0)>

In [16]:
'''
使用常数来初始化权重参数
'''
net.initialize(init = init.Constant(1), force_reinit = True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [19]:
'''
只对某个特定参数进行初始化
'''
net[0].weight.initialize(init = init.Xavier(), force_reinit = True)
net[0].weight.data()[0]


[-0.07911687  0.11172259  0.1324969  -0.00227997  0.13015518  0.06356551
  0.08823001 -0.00408132  0.03846699  0.06149748  0.1103715  -0.00054842
 -0.06103499  0.10160227  0.10289782 -0.09013356  0.03475994  0.08059946
 -0.14353862  0.13985124]
<NDArray 20 @cpu(0)>

## 3.自定义初始化方法

In [22]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low = -10, high = 10, shape = data.shape)
        data *= data.abs() >= 5

In [23]:
net.initialize(MyInit(), force_reinit = True)
net[0].weight.data()[0]

Init dense2_weight (256, 20)
Init dense3_weight (10, 256)



[-7.3502474 -0.        -8.931457   9.057413   0.        -9.721032
 -9.771451   0.         5.4116154 -0.        -7.061067   7.6572
 -8.409558  -0.        -8.207939   0.         0.         6.208481
 -5.0926557 -0.       ]
<NDArray 20 @cpu(0)>

In [24]:
'''
通过Parameter类的set_data函数来直接改写模型参数
'''
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[-6.3502474  1.        -7.9314566 10.057413   1.        -8.721032
 -8.771451   1.         6.4116154  1.        -6.061067   8.6572
 -7.4095583  1.        -7.207939   1.         1.         7.208481
 -4.0926557  1.       ]
<NDArray 20 @cpu(0)>

## 4.共享模型参数

In [25]:
net = nn.Sequential()
shared = nn.Dense(8, activation = 'relu')
net.add(nn.Dense(8, activation = 'relu'), shared,
       nn.Dense(8, activation = 'relu', params = shared.params),
       nn.Dense(10))

net.initialize()

x = nd.random.uniform(shape = (2, 20))
net(x)

net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>