In [1]:
%matplotlib inline
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [2]:
num_steps = 10
batch_size = 200
num_classes = 2
state_size = 16
learning_rate = 0.1

In [3]:
def gen_data(size=1000000):
    X = np.array(np.random.choice(2, size=(size,)))
    Y = []
    '''根据规则生成Y'''
    for i in range(size):   
        threshold = 0.5
        if X[i-3] == 1:
            threshold += 0.5
        if X[i-8] == 1:
            threshold -=0.25
        if np.random.rand() > threshold:
            Y.append(0)
        else:
            Y.append(1)
    return X, np.array(Y)


'''生成batch数据'''
def gen_batch(raw_data, batch_size, num_step):
    raw_x, raw_y = raw_data
    data_length = len(raw_x)
    batch_patition_length = data_length // batch_size                         # ->5000
    data_x = np.zeros([batch_size, batch_patition_length], dtype=np.int32)    # ->(200, 5000)
    data_y = np.zeros([batch_size, batch_patition_length], dtype=np.int32)    # ->(200, 5000)
    '''填到矩阵的对应位置'''
    for i in range(batch_size):
        data_x[i] = raw_x[batch_patition_length*i:batch_patition_length*(i+1)]# 每一行取batch_patition_length个数，即5000
        data_y[i] = raw_y[batch_patition_length*i:batch_patition_length*(i+1)]
    epoch_size = batch_patition_length // num_steps                           # ->5000/5=1000 就是每一轮的大小
    for i in range(epoch_size):   # 抽取 epoch_size 个数据
        x = data_x[:, i * num_steps:(i + 1) * num_steps]                      # ->(200, 5)
        y = data_y[:, i * num_steps:(i + 1) * num_steps]
        yield (x, y)    # yield 是生成器，生成器函数在生成值后会自动挂起并暂停他们的执行和状态（最后就是for循环结束后的结果，共有1000个(x, y)）
def gen_epochs(n, num_steps):
    for i in range(n):
        yield gen_batch(gen_data(), batch_size, num_steps)

In [4]:

'''定义placeholder'''
x = tf.placeholder(tf.int32, [batch_size, num_steps], name="x")
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='y')
init_state = tf.zeros([batch_size, state_size])
init_c = tf.zeros([batch_size, state_size])
'''RNN输入'''
x_one_hot = tf.one_hot(x, num_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)

print('----> numclass', num_classes, state_size)
'''定义RNN cell'''
# Input gate: input, previous output, and bias.
ix = tf.Variable(tf.truncated_normal([num_classes, state_size], -0.1, 0.1))
im = tf.Variable(tf.truncated_normal([state_size, state_size], -0.1, 0.1))
ib = tf.Variable(tf.zeros([1, state_size]))
# Forget gate: input, previous output, and bias.
fx = tf.Variable(tf.truncated_normal([num_classes, state_size], -0.1, 0.1))
fm = tf.Variable(tf.truncated_normal([state_size, state_size], -0.1, 0.1))
fb = tf.Variable(tf.zeros([1, state_size]))
# Memory cell: input, state and bias.                             
cx = tf.Variable(tf.truncated_normal([num_classes, state_size], -0.1, 0.1))
cm = tf.Variable(tf.truncated_normal([state_size, state_size], -0.1, 0.1))
cb = tf.Variable(tf.zeros([1, state_size]))
# Output gate: input, previous output, and bias.
ox = tf.Variable(tf.truncated_normal([num_classes, state_size], -0.1, 0.1))
om = tf.Variable(tf.truncated_normal([state_size, state_size], -0.1, 0.1))
ob = tf.Variable(tf.zeros([1, state_size]))
# Variables saving state across unrollings.


----> numclass 2 16


In [5]:
def lstm_cell(rnn_input, h, c):
    # a = tf.matmul(rnn_input, ix)
    # print('--a', a.get_shape())
    # a =  tf.matmul(h, im)
    # print('--a', a.get_shape())
    input_gate = tf.sigmoid(tf.matmul(rnn_input, ix) + tf.matmul(h, im) + ib)
    forget_gate = tf.sigmoid(tf.matmul(rnn_input, fx) + tf.matmul(h, fm) + fb)
    update = tf.matmul(rnn_input, cx) + tf.matmul(c, cm) + cb
    state = forget_gate * c + input_gate * tf.tanh(update)
    output_gate = tf.sigmoid(tf.matmul(rnn_input, ox) + tf.matmul(h, om) + ob)
    return output_gate * tf.tanh(state), state


In [6]:
state = init_state
c = init_c
rnn_outputs = []
for rnn_input in rnn_inputs:
    state, c = lstm_cell(rnn_input, state, c)  # state会重复使用，循环
    rnn_outputs.append(state)
final_state = rnn_outputs[-1]  # 得到最后的state
print('----> fianl state', final_state.get_shape())

# cell = tf.contrib.rnn.BasicRNNCell(num_units=state_size)
# rnn_outputs, final_state = tf.contrib.rnn.static_rnn(cell=cell, inputs=rnn_inputs,
# initial_state=init_state)
# rnn_outputs, final_state = tf.nn.dynamic_rnn(cell=cell, inputs=rnn_inputs,
# initial_state=init_state)


'''预测，损失，优化'''
with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [state_size, num_classes])
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
predictions = [tf.nn.softmax(logit) for logit in logits]

y_as_list = tf.unstack(y, num=num_steps, axis=1)
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logit) for logit, label in
          zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)

----> fianl state (200, 16)
