In [1]:
%matplotlib inline
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


接下来定义超参数

In [2]:
num_steps = 10
batch_size = 200
num_classes = 2
state_size = 16
learning_rate = 0.1

接下来生成数据

In [3]:
def gen_data(size=1000000):
    X = np.array(np.random.choice(2, size=(size,)))
    Y = []
    '''根据规则生成Y'''
    for i in range(size):   
        threshold = 0.5
        if X[i-3] == 1:
            threshold += 0.5
        if X[i-8] == 1:
            threshold -=0.25
        if np.random.rand() > threshold:
            Y.append(0)
        else:
            Y.append(1)
    return X, np.array(Y)


'''生成batch数据'''
def gen_batch(raw_data, batch_size, num_step):
    raw_x, raw_y = raw_data
    data_length = len(raw_x)
    batch_patition_length = data_length // batch_size                         # ->5000
    data_x = np.zeros([batch_size, batch_patition_length], dtype=np.int32)    # ->(200, 5000)
    data_y = np.zeros([batch_size, batch_patition_length], dtype=np.int32)    # ->(200, 5000)
    '''填到矩阵的对应位置'''
    for i in range(batch_size):
        data_x[i] = raw_x[batch_patition_length*i:batch_patition_length*(i+1)]# 每一行取batch_patition_length个数，即5000
        data_y[i] = raw_y[batch_patition_length*i:batch_patition_length*(i+1)]
    epoch_size = batch_patition_length // num_steps                           # ->5000/5=1000 就是每一轮的大小
    for i in range(epoch_size):   # 抽取 epoch_size 个数据
        x = data_x[:, i * num_steps:(i + 1) * num_steps]                      # ->(200, 5)
        y = data_y[:, i * num_steps:(i + 1) * num_steps]
        yield (x, y)    # yield 是生成器，生成器函数在生成值后会自动挂起并暂停他们的执行和状态（最后就是for循环结束后的结果，共有1000个(x, y)）
def gen_epochs(n, num_steps):
    for i in range(n):
        yield gen_batch(gen_data(), batch_size, num_steps)

接下来定义网络结构

In [4]:
# 先定义placeholder
x = tf.placeholder(tf.int32, [batch_size, num_steps], name='x')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='y')
init_state = tf.zeros([batch_size, state_size], name='init_state')
'''RNN输入'''
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)

这里rnn_cell里面的W和b代表着共用着同一个（就是在时间步下来是共享参数的），这也就是为什么要分开定义rnn_cell和w、b

In [5]:
'''接下来定义RNN_cell'''
with tf.variable_scope('rnn_cell'):
    W = tf.get_variable('W', [num_classes + state_size, state_size]) # 这里其实是 [W, U]
    b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))

# 从这里开始写RNN的公式，当然tf可以自动算梯度
def rnn_cell(rnn_input, state):
    with tf.variable_scope('rnn_cell', reuse=True):
        W = tf.get_variable('W', [num_classes + state_size, state_size])
        b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
    return tf.tanh(tf.matmul(tf.concat([rnn_input, state], axis=1), W) + b)

    

需要注意的是这里的init_state十分重要

In [6]:
'''这里开始做循环的操作'''
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
    state = rnn_cell(rnn_input, state)
    rnn_outputs.append(state)

In [7]:
with tf.variable_scope('project'):
    # 这一块是吧多个神经元映射到了一个上面
    W = tf.get_variable('W', [state_size, num_classes])
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
# 接下来接一个softmax层
pred = [tf.nn.softmax(logit) for logit in logits]


In [8]:
y_as_list = tf.unstack(y, num=num_steps, axis=1)
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label,logits=logit) for logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)
