![](https://pic2.zhimg.com/80/v2-b0175ebd3419f9a11a3d0d8b00e28675_1440w.jpg)

Natural Language Processing usually uses LSTM and GRU.

LSTM and GRU are improved based on the RNN.

U, W and V are shared through the whole process
- U is related to $x_{t-1}$
- W is related to $s_{t-1}$
- How about V?

In [1]:
import numpy as np
from tensorflow import keras

In [2]:
# 16 samples, 32 time series, each time series has 28 characteristics
inputs = np.random.randn(16,32,28).astype(np.float32)
inputs.shape

(16, 32, 28)

In [3]:
model = keras.Sequential()

# the first RNN
model.add(keras.layers.SimpleRNN(64,
                                 return_sequences = True, # whether adding another RNN
                                 return_state = False,
                                 activation = 'linear')) # SimpleRNN has one iintercept: b1
 
# the second RNN
# model.add(keras.layers.SimpleRNN(128))

# V is the paras in Dense
model.add(keras.layers.Dense(2)) # Dense has one intercept: b2

In [4]:
out = model(inputs)
out.shape
# 16: samples, 2: probability

TensorShape([16, 32, 2])

In [5]:
out.numpy()

array([[[  0.26192415,  -0.06166075],
        [ -2.3828194 ,   0.58029526],
        [  3.9920027 ,   0.61705804],
        ...,
        [ -2.166027  ,   2.0818596 ],
        [ -2.4550283 ,   8.124401  ],
        [-17.985926  ,   0.7521892 ]],

       [[ -0.04286465,   0.1337983 ],
        [ -0.4933125 ,   1.2668729 ],
        [ -0.24178153,   0.18593736],
        ...,
        [ -8.786729  ,  -3.5929904 ],
        [  7.6140804 ,   8.789068  ],
        [  4.155034  ,  -2.7936597 ]],

       [[  0.36555687,  -0.48133707],
        [  0.661725  ,  -1.440023  ],
        [  0.34263408,   0.22247672],
        ...,
        [  4.8951707 ,   3.2433033 ],
        [ -1.0145526 ,   2.2592432 ],
        [ 19.576881  ,  -1.2679727 ]],

       ...,

       [[  0.8057046 ,  -1.3026489 ],
        [  2.1861756 ,  -1.9934711 ],
        [ -3.3431735 ,  -1.5255702 ],
        ...,
        [ 11.158299  ,   4.75774   ],
        [  5.721303  ,   4.5129924 ],
        [  6.760111  ,  -5.5375805 ]],

       [[ -0.18

### Manual Calculation

In [6]:
v = model.variables # slopes and intercept

# slopes
U = v[0]
W = v[1]
V = v[3]

# intercepts, defaults: 0
b1 = v[2]
b2 = v[4]

In [7]:
result = []

for i in range(32): # iterate time series
    if i == 0:
        result.append(inputs[:,i].dot(U.numpy()) + b1.numpy())
    else:
        result.append(inputs[:,i].dot(U.numpy()) + result[i-1].dot(W.numpy()) + b1.numpy())

result = np.asarray(result)
result = np.transpose(result,axes = [1,0,2])
result.shape

(16, 32, 64)

In [8]:
result[:,-1].dot(V.numpy()) + b2.numpy()

array([[-17.985926  ,   0.7521892 ],
       [  4.155034  ,  -2.7936597 ],
       [ 19.576881  ,  -1.2679727 ],
       [  5.401359  ,   5.6418033 ],
       [  7.3007574 ,  -0.17100573],
       [  4.0460234 ,   5.7486515 ],
       [ -3.4118857 ,  -2.836771  ],
       [ 13.079189  , -12.672295  ],
       [  3.6396937 ,   7.1409874 ],
       [ -5.2455664 ,   1.0911598 ],
       [  3.5954154 ,  -4.099648  ],
       [  0.76271486,   5.7674685 ],
       [  2.4566226 ,   0.03880143],
       [  6.760111  ,  -5.5375805 ],
       [ -2.9169083 ,   1.6022457 ],
       [ -3.7288969 ,  -1.2840161 ]], dtype=float32)

In [9]:
result = []
for i in range(32): # iterate through time series
    if i == 0:
        result.append(inputs[:,i].dot(U.numpy()) + b1.numpy())
    else:
        result.append(inputs[:,i].dot(U.numpy()) + result[i-1].dot(W.numpy()) + b1.numpy())
result = np.asarray(result)
result.shape

(32, 16, 64)

In [10]:
# the output of lat time
result[-1].dot(V.numpy()) + b2.numpy()

array([[-17.985926  ,   0.7521892 ],
       [  4.155034  ,  -2.7936597 ],
       [ 19.576881  ,  -1.2679727 ],
       [  5.401359  ,   5.6418033 ],
       [  7.3007574 ,  -0.17100573],
       [  4.0460234 ,   5.7486515 ],
       [ -3.4118857 ,  -2.836771  ],
       [ 13.079189  , -12.672295  ],
       [  3.6396937 ,   7.1409874 ],
       [ -5.2455664 ,   1.0911598 ],
       [  3.5954154 ,  -4.099648  ],
       [  0.76271486,   5.7674685 ],
       [  2.4566226 ,   0.03880143],
       [  6.760111  ,  -5.5375805 ],
       [ -2.9169083 ,   1.6022457 ],
       [ -3.7288969 ,  -1.2840161 ]], dtype=float32)