In [1]:
import mxnet as mx

  from ._conv import register_converters as _register_converters
  import OpenSSL.SSL


In [5]:
from mxnet import nd, autograd, gluon
import numpy as np

# 1. `AutoGrad` 自动求导

In [6]:
x = np.arange(12).reshape((3, 4))
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [41]:
w = nd.array(x)
w.attach_grad()
with autograd.record():
    y1 = w * 3 + 2 * w ** 2
    y = y1 + nd.exp(w)

y.backward()

In [43]:
w.grad


[[4.0000000e+00 9.7182817e+00 1.8389057e+01 3.5085537e+01]
 [7.3598145e+01 1.7141316e+02 4.3042880e+02 1.1276332e+03]
 [3.0159580e+03 8.1420840e+03 2.2069465e+04 5.9921141e+04]]
<NDArray 3x4 @cpu(0)>

# 2. `nd.norm`  将数组拉直为向量，再求取 $L_2$ 范数

In [37]:
a = nd.array([[3, 4], [3, 4], [3, 4], [3, 4]])
nd.norm(a)


[10.]
<NDArray 1 @cpu(0)>

# 3. 数据的分块读取

In [140]:
x = np.arange(100 * 32 * 32 * 3).reshape((100, 32, 32, 3))
y = np.mean((np.sin(x) + np.random.normal(size= x.shape)).reshape((100, -1)), axis= 1)

In [141]:
x.shape

(100, 32, 32, 3)

In [142]:
y.shape

(100,)

In [147]:
import random

def data_iter(x, y, batch_size):
    x = nd.array(x)
    y = nd.array(y)
    num_samples = len(x)
    idx = list(range(num_samples))
    random.shuffle(idx)
    for i in range(0, num_samples, batch_size):
        j = nd.array(idx[i: min(i + batch_size, num_samples)])
        yield nd.take(x, j), nd.take(y, j)

In [152]:
z = data_iter(x, y, 10)
a, b = next(z)
a.shape

(10, 32, 32, 3)

In [153]:
def data_iter(x, y, batch_size):
    num_samples = len(x)
    idx = list(range(num_samples))
    random.shuffle(idx)
    for i in range(0, num_samples, batch_size):
        j = nd.array(idx[i: min(i + batch_size, num_samples)])
        yield x[j], y[j]

In [154]:
z = data_iter(x, y, 10)
a, b = next(z)
a.shape

(10, 32, 32, 3)

### 示例

In [201]:
with autograd.record():
    y_ = nd.dot(x, w) + b
    loss = (y - y_) ** 2
    loss.backward()

In [202]:
def SGD(lr, params):
    for param in params:
        param[:] -= lr * param.grad

In [203]:
a = nd.array([1, 2, 4, 5])
a.attach_grad()
with autograd.record():
    b = nd.sin(a)
b.backward()
a.grad


[ 0.5403023  -0.41614684 -0.6536436   0.2836622 ]
<NDArray 4 @cpu(0)>

In [204]:
for i in range(3):
    before = id(a)
    a[:] -= 0.1 * a.grad
    print(a)
    print(id(a) == before)


[0.94596976 2.0416148  4.0653644  4.971634  ]
<NDArray 4 @cpu(0)>
True

[0.8919395 2.0832295 4.1307287 4.943268 ]
<NDArray 4 @cpu(0)>
True

[0.8379093 2.1248443 4.196093  4.9149017]
<NDArray 4 @cpu(0)>
True


由此可知，上述方法对内存的损耗很小。

In [216]:
a = nd.array([1, 2, 4, 5])
a.attach_grad()
with autograd.record():
    b = nd.sin(a)
b.backward()

for i in range(3):
    print(a)
    SGD(1, [a])


[1. 2. 4. 5.]
<NDArray 4 @cpu(0)>

[0.45969772 2.4161468  4.6536436  4.7163377 ]
<NDArray 4 @cpu(0)>

[-0.08060455  2.8322935   5.307287    4.4326754 ]
<NDArray 4 @cpu(0)>
