# Simple RNN layer playground

Keras' simple RNN cell is given by

$$
\begin{align*}
  a^{\langle t \rangle} &= g(W_{a a} a^{\langle t - 1 \rangle} + W_{a x} x^{\langle t \rangle} + b_a) \\
  y^{\langle t \rangle} &= a^{\langle t \rangle}
\end{align*}
$$


See https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-recurrent-neural-networks for figures.

![](https://stanford.edu/~shervine/images/architecture-rnn.png)

## Load packages and set constants

In [1]:
import collections
import tensorflow as tf
import numpy as np
tf.enable_eager_execution()

In [2]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 1.12.0


In [3]:
BATCH_SIZE = 2   # number of examples in a batch
SEQ_LEN = 3      # steps in time
NUM_INPUTS = 4   # number of input elements
NUM_UNITS = 5    # number of output elements

## Generate inputs $x^{\langle t \rangle}$

Let the input tensor to RNN layer have the shape (`BATCH_SIZE`, `SEQ_LEN` , `NUM_INPUT`).

In [4]:
rnn_inputs = tf.random.uniform((BATCH_SIZE, SEQ_LEN, NUM_INPUTS))
rnn_inputs

<tf.Tensor: id=7, shape=(2, 3, 4), dtype=float32, numpy=
array([[[0.8850105 , 0.16727424, 0.9909543 , 0.6814976 ],
        [0.33877563, 0.532905  , 0.52076006, 0.62972796],
        [0.6602318 , 0.01045537, 0.2976935 , 0.87475884]],

       [[0.2355243 , 0.5748211 , 0.4694022 , 0.9221631 ],
        [0.87576556, 0.37243688, 0.1085155 , 0.52194774],
        [0.05801499, 0.5170574 , 0.9994899 , 0.6916456 ]]], dtype=float32)>

## Generate initial state  $a^{\langle 0 \rangle}$

Ths shape of initial state should be 

In [5]:
def get_initial_state(batch_size=BATCH_SIZE, num_units=NUM_UNITS, dtype=tf.float32):
    res = tf.random.uniform(shape=(batch_size, num_units), dtype=dtype)
    return res

In [6]:
rnn_initial_state = get_initial_state()
rnn_initial_state

<tf.Tensor: id=15, shape=(2, 5), dtype=float32, numpy=
array([[0.35296965, 0.4760883 , 0.218315  , 0.18823195, 0.00805771],
       [0.9478549 , 0.09216118, 0.97593653, 0.4762491 , 0.43910003]],
      dtype=float32)>

## Prepare Keras Simple RNN Cell

In [7]:
rnn_cell = tf.keras.layers.SimpleRNNCell(NUM_UNITS)

## Direct implementaiton using the RNN cell

In [8]:
print("RNN cell:", rnn_cell.name)
stack = []
rnn_state = rnn_initial_state
for i in range(SEQ_LEN):
    output_snapshot, [rnn_state] = rnn_cell(inputs=rnn_inputs[:, i, :], states=[rnn_state])
    stack.append(output_snapshot)
tmp = tf.convert_to_tensor(stack)          # time major
rnn_output = tf.transpose(tmp, [1, 0, 2])  # batch major
rnn_output

RNN cell: simple_rnn_cell


<tf.Tensor: id=101, shape=(2, 3, 5), dtype=float32, numpy=
array([[[-0.36660486, -0.4447528 ,  0.87347555, -0.7347421 ,
         -0.01985181],
        [ 0.5818863 ,  0.22447369,  0.48186636, -0.04692241,
          0.39645603],
        [-0.03439999, -0.51233447,  0.89835167, -0.00620399,
          0.30584958]],

       [[-0.48060995, -0.3623284 ,  0.9320335 ,  0.5762862 ,
          0.87290055],
        [-0.46589628,  0.6837234 ,  0.8777357 ,  0.63304055,
          0.5171348 ],
        [-0.5784061 ,  0.69833964,  0.8481758 , -0.5574479 ,
          0.19966465]]], dtype=float32)>

## Equivalent implementation with `nn.dynamic_rnn()`

In [9]:
print("RNN cell:", rnn_cell.name)
rnn_output2, rnn_state2 = tf.nn.dynamic_rnn(rnn_cell, rnn_inputs, initial_state=rnn_initial_state)
rnn_output2

RNN cell: simple_rnn_cell


<tf.Tensor: id=207, shape=(2, 3, 5), dtype=float32, numpy=
array([[[-0.36660486, -0.4447528 ,  0.87347555, -0.7347421 ,
         -0.01985181],
        [ 0.5818863 ,  0.22447369,  0.48186636, -0.04692241,
          0.39645603],
        [-0.03439999, -0.51233447,  0.89835167, -0.00620399,
          0.30584958]],

       [[-0.48060995, -0.3623284 ,  0.9320335 ,  0.5762862 ,
          0.87290055],
        [-0.46589628,  0.6837234 ,  0.8777357 ,  0.63304055,
          0.5171348 ],
        [-0.5784061 ,  0.69833964,  0.8481758 , -0.5574479 ,
          0.19966465]]], dtype=float32)>

## Compare for-loop results with `nn.dynamics_rnn()` outputs

In [10]:
(rnn_output.numpy() == rnn_output2.numpy()).all()

True

In [11]:
(rnn_state.numpy() == rnn_state2.numpy()).all()

True

In [12]:
(rnn_output[:, -1, :].numpy() == rnn_state2.numpy()).all()

True

## Reproduction in numpy

In [13]:
import numpy as np

In [14]:
# prepare all parameters as numpy array
kernel_np = rnn_cell.kernel.numpy()
bias_np = rnn_cell.bias.numpy()
recurrent_kernel_np = rnn_cell.recurrent_kernel.numpy()
rnn_inputs_np = rnn_inputs.numpy()
rnn_initial_state_np = rnn_initial_state.numpy()

In [15]:
rnn_inputs_np.shape

(2, 3, 4)

In [16]:
kernel_np.shape

(4, 5)

In [17]:
first_seq_out_np = np.tanh(np.dot(rnn_inputs_np[:, 0, :], kernel_np) + bias_np + np.dot(rnn_initial_state_np, recurrent_kernel_np))
first_seq_out_np

array([[-0.36660492, -0.44475284,  0.87347555, -0.73474205, -0.01985181],
       [-0.48060992, -0.3623284 ,  0.93203354,  0.57628614,  0.8729006 ]],
      dtype=float32)

## Comparing with Keras SimpleRNNCell

In [18]:
first_seq_out, first_seq_state = rnn_cell(inputs=rnn_inputs[:, 0, :], states=[rnn_initial_state])

In [19]:
first_seq_out

<tf.Tensor: id=238, shape=(2, 5), dtype=float32, numpy=
array([[-0.36660486, -0.4447528 ,  0.87347555, -0.7347421 , -0.01985181],
       [-0.48060995, -0.3623284 ,  0.9320335 ,  0.5762862 ,  0.87290055]],
      dtype=float32)>

In [20]:
np.allclose(first_seq_out_np, first_seq_out.numpy())

True