In [32]:
import mxnet as mx
import mxnet.ndarray as nd
from mxnet.gluon import nn,rnn

![](https://ws1.sinaimg.cn/large/006tKfTcly1fmt9xz889xj30kb07nglo.jpg)

`class mxnet.gluon.rnn.RNN(hidden_size, num_layers=1, activation='relu', layout='TNC', dropout=0, bidirectional=False, i2h_weight_initializer=None, h2h_weight_initializer=None, i2h_bias_initializer='zeros', h2h_bias_initializer='zeros', input_size=0, **kwargs)`

Parameters:	

* hidden_size (int) – 隐含层也就是h的size
* num_layers (int, default 1) – 使用几层RNN
* activation ({'relu' or 'tanh'}, default 'relu') – 选取使用的激活函数
* layout (str, default 'TNC') – The format of input and output tensors. T, N and C stand for sequence length, batch size, and feature dimensions respectively.
* dropout (float, default 0) – 使用dropout的比例
* bidirectional (bool, default False) – 是否使用双向RNN.
* i2h_weight_initializer (str or Initializer) – Initializer for the input weights matrix, used for the linear transformation of the inputs.
* h2h_weight_initializer (str or Initializer) – Initializer for the recurrent weights matrix, used for the linear transformation of the recurrent state.
* i2h_bias_initializer (str or Initializer) – Initializer for the bias vector.
* h2h_bias_initializer (str or Initializer) – Initializer for the bias vector.
* input_size (int, default 0) – $x_{t}$的特征维度，也就是词向量的长度
* prefix (str or None) – Prefix of this Block.
* params (ParameterDict or None) – Shared Parameters for this Block.

初始化后的对象：

* Inputs: data, states
* Outputs: out, out_states


In [40]:
# 这里只使用RNN()，因为基本只用这个类，而不用RNNCell
rnn_seq = rnn.RNN(input_size=100, hidden_size=200)
rnn_seq.collect_params().initialize(mx.init.Xavier())

In [41]:
rnn_seq.i2h_weight

[Parameter rnn4_l0_i2h_weight (shape=(200, 100), dtype=<class 'numpy.float32'>)]

In [42]:
import numpy as np
print('输入到隐层的权值: ', rnn_seq.i2h_weight)
print('上一个时刻状态到下一个时刻的权值: ', rnn_seq.h2h_weight)

输入到隐层的权值:  [Parameter rnn4_l0_i2h_weight (shape=(200, 100), dtype=<class 'numpy.float32'>)]
上一个时刻状态到下一个时刻的权值:  [Parameter rnn4_l0_h2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>)]


<b>注意: </b> mxnet的RNN的输出是直接输出$h_{t}$而不是再经过一个神经网络得到$o_{t}$

In [43]:
# 构造一个序列，长为 6，batch 是 5， 特征是 100
x = nd.random.normal(0,1,shape=(6, 5, 100)) # 这是 rnn 的输入格式：
                                           # (num_step, batchsize, word)也就是(选取的序列长度，每次批量的大小，和词向量的大小)

In [48]:
h0 = mx.nd.random.uniform(shape=(1, 5, 200))

In [49]:
output, state = rnn_seq(x, h0)

In [60]:
# state不取state[0]的维数会有问题，作为新手，暂时不纠结
print('output: ', np.shape(output),'\nstate: ', np.shape(state[0]))

output:  (6, 5, 200) 
state:  (1, 5, 200)


output其实就是所有的隐层输出，state就是最后一个神经元的输出

<b> 当RNN取多个时：

In [76]:
rnn_seq = rnn.RNN(input_size=100, hidden_size=200, num_layers=3)
rnn_seq.collect_params().initialize(mx.init.Xavier())

In [79]:
print('输入到隐层的权值: ', rnn_seq.i2h_weight)
print('上一个时刻状态到下一个时刻的权值: ', rnn_seq.h2h_weight, '\n')

输入到隐层的权值:  [Parameter rnn6_l0_i2h_weight (shape=(200, 100), dtype=<class 'numpy.float32'>), Parameter rnn6_l1_i2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>), Parameter rnn6_l2_i2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>)]
上一个时刻状态到下一个时刻的权值:  [Parameter rnn6_l0_h2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>), Parameter rnn6_l1_h2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>), Parameter rnn6_l2_h2h_weight (shape=(200, 200), dtype=<class 'numpy.float32'>)] 



In [85]:
x = nd.random.normal(0,1,shape=(6, 1, 100))
h0 = mx.nd.random.uniform(shape=(3, 1, 200))

output, state = rnn_seq(x, h0)

print('output: ', np.shape(output),'\nstate: ', np.shape(state[0]))

output:  (6, 1, 200) 
state:  (3, 1, 200)


这里的output输出每一个RNN最后一层的输出，state输出最后一个RNN的所有h