In [1]:
from mxnet import init, nd
from mxnet.gluon import nn

In [2]:
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))

net.initialize()

In [3]:
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [4]:
Y


[[ 0.09543003  0.04614331 -0.00286653 -0.07790348 -0.0513024   0.02942039
   0.08696644 -0.01907929 -0.04122178  0.05088577]
 [ 0.07692869  0.03099705  0.00856576 -0.044672   -0.06926841  0.09132433
   0.06786595 -0.06187843 -0.03436673  0.04234697]]
<NDArray 2x10 @cpu(0)>

In [7]:
net[0].params['dense0_weight'], net[0].weight

(Parameter dense0_weight (shape=(256, 20), dtype=float32),
 Parameter dense0_weight (shape=(256, 20), dtype=float32))

In [8]:
net[0].weight.data()


[[ 0.06700657 -0.00369488  0.0418822  ... -0.05517294 -0.01194733
  -0.00369594]
 [-0.03296221 -0.04391347  0.03839272 ...  0.05636378  0.02545484
  -0.007007  ]
 [-0.0196689   0.01582889 -0.00881553 ...  0.01509629 -0.01908049
  -0.02449339]
 ...
 [ 0.00010955  0.0439323  -0.04911506 ...  0.06975312  0.0449558
  -0.03283203]
 [ 0.04106557  0.05671307 -0.00066976 ...  0.06387014 -0.01292654
   0.00974177]
 [ 0.00297424 -0.0281784  -0.06881659 ... -0.04047417  0.00457048
   0.05696651]]
<NDArray 256x20 @cpu(0)>

In [9]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [10]:
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [11]:
net.collect_params()

sequential0_ (
  Parameter dense0_weight (shape=(256, 20), dtype=float32)
  Parameter dense0_bias (shape=(256,), dtype=float32)
  Parameter dense1_weight (shape=(10, 256), dtype=float32)
  Parameter dense1_bias (shape=(10,), dtype=float32)
)

In [13]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5

In [15]:
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense0_weight (256, 20)
Init dense1_weight (10, 256)



[ 0.        -0.         9.97694    6.8228035 -7.011034   8.092955
  7.36252   -9.248813  -6.750141   0.         0.        -6.6630497
 -7.5236006  5.5810204  6.960165   7.298666   6.1463795 -0.
  0.        -7.200548 ]
<NDArray 20 @cpu(0)>

In [18]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10)
    )
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)


[[-4.53198118e-05  9.40150494e-05  4.95607565e-05 -4.31137378e-05
  -3.72556970e-05 -1.30234374e-04  3.91284048e-05  1.00477147e-04
  -4.16371404e-05 -4.73197324e-05]
 [-3.15296529e-05  1.05568506e-04  5.34846076e-05 -8.30196150e-05
   5.01628165e-07 -6.74004841e-05  5.96888603e-05  4.26892657e-05
  -2.75808707e-05 -2.37117492e-05]]
<NDArray 2x10 @cpu(0)>

In [19]:
net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>