<a href="https://colab.research.google.com/github/yananma/5_programs_per_day/blob/master/0428.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 4.1 构造模型

In [0]:
# !pip install mxnet d2lzh 

In [0]:
from mxnet import nd 
from mxnet.gluon import nn 

class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)

    def forward(self, x):
        return self.output(self.hidden(x))

In [0]:
X = nd.random.uniform(shape=(2, 20))
net = MLP()
net.initialize()
net(X)


[[ 0.09543004  0.04614332 -0.00286654 -0.07790349 -0.05130243  0.02942037
   0.08696642 -0.0190793  -0.04122177  0.05088576]
 [ 0.0769287   0.03099705  0.00856576 -0.04467199 -0.06926839  0.09132434
   0.06786595 -0.06187842 -0.03436673  0.04234694]]
<NDArray 2x10 @cpu(0)>

In [0]:
class MySequential(nn.Block):
    def __init__(self, **kwargs):
        super(MySequential, self).__init__(**kwargs)

    def add(self, block):
        self._children[block.name] = block

    def forward(self, x):
        for block in self._children.values():
            x = block(x)
        return x 

In [0]:
net = MySequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()
net(X)


[[ 0.00362228  0.00633332  0.03201144 -0.01369375  0.10336449 -0.03508018
  -0.00032164 -0.01676023  0.06978628  0.01303309]
 [ 0.03871715  0.02608213  0.03544959 -0.02521311  0.11005433 -0.0143066
  -0.03052466 -0.03852827  0.06321152  0.0038594 ]]
<NDArray 2x10 @cpu(0)>

In [0]:
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        self.rand_weight = self.params.get_constant('rand_weight', nd.random.uniform(shape=(20, 20)))
        self.dense = nn.Dense(20, activation='relu')

    def forward(self, x):
        x = self.dense(x)
        x = nd.relu(nd.dot(x, self.rand_weight.data()) + 1)
        x = self.dense(x)
        while x.norm().asscalar() > 1:
            x /= 2 
        if x.norm().asscalar() < 0.8:
            x *= 10 
        return x.sum()

In [0]:
net = FancyMLP()
net.initialize()
net(X)


[18.571953]
<NDArray 1 @cpu(0)>

In [0]:
class NestMLP(nn.Block):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs) 
        self.net = nn.Sequential()
        self.net.add(nn.Dense(64, activation='relu'), 
              nn.Dense(32, activation='relu'))
        self.dense = nn.Dense(16, activation='relu')

    def forward(self, x):
        return self.dense(self.net(x))

net = nn.Sequential()
net.add(NestMLP(), nn.Dense(20), FancyMLP())

net.initialize()
net(X)


[24.86621]
<NDArray 1 @cpu(0)>

## 4.2 模型参数的访问、初始化和共享

In [0]:
from mxnet import init, nd 
from mxnet.gluon import nn

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [0]:
net[0]

Dense(20 -> 256, Activation(relu))

In [0]:
net[0].params, type(net[0].params)

(dense11_ (
   Parameter dense11_weight (shape=(256, 20), dtype=float32)
   Parameter dense11_bias (shape=(256,), dtype=float32)
 ), mxnet.gluon.parameter.ParameterDict)

In [0]:
net[0].weight

Parameter dense11_weight (shape=(256, 20), dtype=float32)

In [0]:
net[0].weight.data()


[[-0.06046963  0.00624272 -0.03472826 ... -0.01759475  0.0686483
  -0.06360765]
 [-0.01273243 -0.02659053 -0.04718638 ...  0.02570021  0.02275064
  -0.0166979 ]
 [-0.03555115  0.01875034  0.02322027 ...  0.06564643  0.04601197
  -0.01915742]
 ...
 [ 0.03173313  0.01789995  0.02519771 ... -0.06176154 -0.03986754
  -0.04898471]
 [ 0.00564718  0.04665586 -0.00028374 ...  0.05332779  0.02100175
  -0.06427249]
 [ 0.0438781   0.05357236  0.02753124 ...  0.04084889 -0.01963295
   0.05668835]]
<NDArray 256x20 @cpu(0)>

In [0]:
net[0].weight.data()


[[-0.06046963  0.00624272 -0.03472826 ... -0.01759475  0.0686483
  -0.06360765]
 [-0.01273243 -0.02659053 -0.04718638 ...  0.02570021  0.02275064
  -0.0166979 ]
 [-0.03555115  0.01875034  0.02322027 ...  0.06564643  0.04601197
  -0.01915742]
 ...
 [ 0.03173313  0.01789995  0.02519771 ... -0.06176154 -0.03986754
  -0.04898471]
 [ 0.00564718  0.04665586 -0.00028374 ...  0.05332779  0.02100175
  -0.06427249]
 [ 0.0438781   0.05357236  0.02753124 ...  0.04084889 -0.01963295
   0.05668835]]
<NDArray 256x20 @cpu(0)>

In [0]:
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [0]:
net.collect_params()

sequential2_ (
  Parameter dense11_weight (shape=(256, 20), dtype=float32)
  Parameter dense11_bias (shape=(256,), dtype=float32)
  Parameter dense12_weight (shape=(10, 256), dtype=float32)
  Parameter dense12_bias (shape=(10,), dtype=float32)
)

In [0]:
net.collect_params('.*weight')

sequential2_ (
  Parameter dense11_weight (shape=(256, 20), dtype=float32)
  Parameter dense12_weight (shape=(10, 256), dtype=float32)
)

In [0]:
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[-0.00803235  0.01792648  0.00174623  0.01004736 -0.00177172  0.01704121
 -0.00315196 -0.0084464   0.00439474  0.00382651 -0.0071534  -0.01518173
 -0.00180067  0.01541854  0.00415876 -0.0093543   0.00476378 -0.00346679
  0.00468796  0.01868755]
<NDArray 20 @cpu(0)>

In [0]:
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()


[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]
<NDArray 256x20 @cpu(0)>

In [0]:
net[0].weight.data().shape

(256, 20)

In [0]:
net[0].weight.initialize(init=init.Xavier(), force_reinit=True)
net[0].weight.data()[0]


[-0.10389185  0.07822403 -0.1289716  -0.1410463  -0.07610903 -0.10696874
 -0.01996909 -0.07058676  0.00648634  0.10942626  0.08052795 -0.09453681
  0.13527533 -0.01967503 -0.11284603 -0.05156991 -0.11588816  0.02459455
  0.02644953  0.12870744]
<NDArray 20 @cpu(0)>

In [0]:
class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        data *= data.abs() >= 5 

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense11_weight (256, 20)
Init dense12_weight (10, 256)



[ 7.142498  -9.206991  -7.6536884 -0.        -0.        -0.
 -0.        -5.294743  -0.        -8.822595   0.        -7.5231113
 -0.         7.135105   0.        -0.         0.        -0.
 -0.        -9.560527 ]
<NDArray 20 @cpu(0)>

In [0]:
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[ 8.142498  -8.206991  -6.6536884  1.         1.         1.
  1.        -4.294743   1.        -7.8225946  1.        -6.5231113
  1.         8.135105   1.         1.         1.         1.
  1.        -8.560527 ]
<NDArray 20 @cpu(0)>

In [0]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
net.add(nn.Dense(8, activation='relu'), 
    shared, 
    nn.Dense(8, activation='relu', params=shared.params), 
    nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)


[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00  0.0000000e+00  0.0000000e+00
   0.0000000e+00  0.0000000e+00]
 [ 2.2168067e-06  2.6662779e-06 -3.6558160e-06 -1.7297840e-07
  -2.8038012e-06  3.0073204e-06  2.0541652e-06  1.5204251e-06
  -4.2007518e-06  3.4896932e-06]]
<NDArray 2x10 @cpu(0)>

In [0]:
net[1].weight.data()[0] == net[2].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>

## 4.3 模型参数的延后初始化

In [0]:
from mxnet import init, nd 
from mxnet.gluon import nn 

class MyInit(init.Initializer):
    def _init_weight(self, name, data):
        print('Init', name, data.shape)

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'), 
    nn.Dense(10))

net.initialize(init=MyInit())

In [0]:
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

Init dense19_weight (256, 20)
Init dense20_weight (10, 256)


In [0]:
Y = net(X)

In [0]:
net.initialize(init=MyInit(), force_reinit=True)

Init dense19_weight (256, 20)
Init dense20_weight (10, 256)


In [0]:
net = nn.Sequential()
net.add(nn.Dense(256, in_units=20, activation='relu'))
net.add(nn.Dense(10, in_units=256))

net.initialize(init=MyInit())

Init dense21_weight (256, 20)
Init dense22_weight (10, 256)


## 自定义层

In [0]:
from mxnet import gluon, nd 
from mxnet.gluon import nn 

class CenteredLayer(nn.Block):
    def __init__(self, **kwargs):
        super(CenteredLayer, self).__init__(**kwargs)
        
    def forward(self, x):
        return x - x.mean()

In [0]:
layer = CenteredLayer()
layer(nd.array([1, 2, 3, 4, 5]))


[-2. -1.  0.  1.  2.]
<NDArray 5 @cpu(0)>

In [0]:
net = nn.Sequential()
net.add(nn.Dense(128), 
    CenteredLayer())

In [0]:
net.initialize()
y = net(nd.random.uniform(shape=(4, 8)))
y.mean().asscalar()

-3.6379788e-11

In [0]:
params = gluon.ParameterDict()
params.get('param2', shape=(2, 3))
params 

(
  Parameter param2 (shape=(2, 3), dtype=<class 'numpy.float32'>)
)

In [0]:
class MyDense(nn.Block):
    def __init__(self, units, in_units, **kwargs):
        super(MyDense, self).__init__(**kwargs)
        self.weight = self.params.get('weight', shape=(in_units, units))
        self.bias = self.params.get('bias', shape=(units, ))

    def forward(self, x):
        linear = nd.dot(x, self.weight.data()) + self.bias.data()
        return nd.relu(linear)

In [0]:
dense = MyDense(units=3, in_units=5)
dense.params

mydense0_ (
  Parameter mydense0_weight (shape=(5, 3), dtype=<class 'numpy.float32'>)
  Parameter mydense0_bias (shape=(3,), dtype=<class 'numpy.float32'>)
)

In [0]:
dense.initialize()
dense(nd.random.uniform(shape=(2, 5)))


[[0.04578072 0.05299256 0.02392543]
 [0.05540341 0.05370332 0.01740249]]
<NDArray 2x3 @cpu(0)>

In [0]:
net = nn.Sequential()
net.add(MyDense(8, in_units=64), 
    MyDense(1, in_units=8))
net.initialize()
net(nd.random.uniform(shape=(2, 64)))


[[0.04486297]
 [0.05108206]]
<NDArray 2x1 @cpu(0)>

## 4.5 读取和存储

In [0]:
from mxnet import nd 
from mxnet.gluon import nn 

x = nd.ones(3)
nd.save('x', x)

In [0]:
x2 = nd.load('x')
x2

[
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>]

In [0]:
y = nd.zeros(4)
nd.save('xy', [x, y])
x2, y2 = nd.load('xy')
(x2, y2)

(
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>, 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>)

In [0]:
mydict = {'x': x, 'y': y}
nd.save('mydict', mydict)
mydict2 = nd.load('mydict')
mydict2

{'x': 
 [1. 1. 1.]
 <NDArray 3 @cpu(0)>, 'y': 
 [0. 0. 0. 0.]
 <NDArray 4 @cpu(0)>}

In [0]:
class MLP(nn.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Dense(256, activation='relu')
        self.output = nn.Dense(10)

    def forward(self, x):
        return self.output(self.hidden(x))

In [0]:
net = MLP()
net.initialize()
X = nd.random.uniform(shape=(2, 20))
Y = net(X)

In [0]:
filename = 'mlp.params'
net.save_parameters(filename)

In [0]:
net2 = MLP()
net2.load_parameters(filename)

In [0]:
Y2 = net2(X)
Y2 == Y


[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]
<NDArray 2x10 @cpu(0)>

## 4.6 GPU 计算

In [1]:
!nvidia-smi

Tue Dec  3 19:10:24 2019       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
!cat /usr/local/cuda/version.txt

CUDA Version 10.0.130


In [3]:
!pip install mxnet-cu100



In [4]:
import mxnet as mx 
from mxnet import nd 
from mxnet.gluon import nn 

mx.cpu(), mx.gpu(), mx.gpu(1)

(cpu(0), gpu(0), gpu(1))

In [5]:
x = nd.array([1, 2, 3])
x


[1. 2. 3.]
<NDArray 3 @cpu(0)>

In [6]:
x.context

cpu(0)

In [7]:
a = nd.array([1, 2, 3], ctx=mx.gpu())
a


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [9]:
B = nd.random.uniform(shape=(2, 3), ctx=mx.gpu(1))
B

MXNetError: ignored

In [11]:
y = x.copyto(mx.gpu())
y


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [15]:
z = x.as_in_context(mx.gpu())
z


[1. 2. 3.]
<NDArray 3 @gpu(0)>

In [12]:
y.as_in_context(mx.gpu()) is y 

True

In [13]:
y.copyto(mx.gpu()) is y 

False

In [16]:
(z + 2).exp() * y


[ 20.085537 109.1963   445.2395  ]
<NDArray 3 @gpu(0)>

In [0]:
net = nn.Sequential()
net.add(nn.Dense(1))
net.initialize(ctx=mx.gpu())

In [18]:
net(y)


[[0.0068339 ]
 [0.01366779]
 [0.02050169]]
<NDArray 3x1 @gpu(0)>

In [19]:
net[0].weight.data()


[[0.0068339]]
<NDArray 1x1 @gpu(0)>