In [1]:
import numpy as np

from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Activation,LSTM
from tensorflow.keras import optimizers
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [2]:
# parameters for data load
num_words = 30000
maxlen = 50
test_split = 0.3

In [3]:
(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words = num_words, maxlen = maxlen, test_split = test_split)

In [4]:
X_train[:5] , y_train[:5]

(array([list([1, 245, 273, 207, 156, 53, 74, 160, 26, 14, 46, 296, 26, 39, 74, 2979, 3554, 14, 46, 4689, 4329, 86, 61, 3499, 4795, 14, 61, 451, 4329, 17, 12]),
        list([1, 486, 341, 785, 26, 14, 482, 26, 255, 606, 252, 83, 146, 91, 102, 17, 12]),
        list([1, 245, 273, 397, 124, 53, 191, 26, 14, 83, 26, 39, 32, 32, 11, 14, 19, 12, 11, 88, 279, 59, 11, 14, 472, 59, 11, 17, 12]),
        list([1, 53, 745, 26, 14, 722, 26, 39, 7442, 18, 14, 6353, 18, 88, 258, 44, 11, 14, 119, 70, 11, 17, 12]),
        list([1, 486, 341, 119, 26, 14, 119, 26, 7, 255, 346, 606, 252, 83, 146, 91, 272, 17, 12])],
       dtype=object),
 array([3, 3, 3, 3, 3], dtype=int64))

In [5]:
# pad the sequences with zeros 
# padding parameter is set to 'post' => 0's are appended to end of sequences
X_train = pad_sequences(X_train, padding = 'post')
X_test = pad_sequences(X_test, padding = 'post')

In [6]:
X_train[:5]

array([[   1,  245,  273,  207,  156,   53,   74,  160,   26,   14,   46,
         296,   26,   39,   74, 2979, 3554,   14,   46, 4689, 4329,   86,
          61, 3499, 4795,   14,   61,  451, 4329,   17,   12,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0],
       [   1,  486,  341,  785,   26,   14,  482,   26,  255,  606,  252,
          83,  146,   91,  102,   17,   12,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0],
       [   1,  245,  273,  397,  124,   53,  191,   26,   14,   83,   26,
          39,   32,   32,   11,   14,   19,   12,   11,   88,  279,   59,
          11,   14,  472,   59,   11,   17,   12,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,   

In [7]:

X_train = np.array(X_train).reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = np.array(X_test).reshape((X_test.shape[0], X_test.shape[1], 1))

In [8]:
y_data = np.concatenate((y_train, y_test))
y_data = to_categorical(y_data)

In [9]:
y_data[:5]

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 

In [10]:
y_train = y_data[:1395]
y_test = y_data[1395:]

In [11]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(1395, 49, 1)
(599, 49, 1)
(1395, 46)
(599, 46)


### 1. Deep RNN

* RNNs can be made deep, with multiple layers,

![img.png](https://www.researchgate.net/profile/Jianjing_Zhang3/publication/326038469/figure/fig2/AS:670058405691403@1536765781751/Diagram-of-a-deep-LSTM-network.png)

In [36]:
def deep_lstm():
    model = Sequential()
    model.add(LSTM(20, input_shape = (49,1), return_sequences = True))
    model.add(LSTM(20, return_sequences = True))
    model.add(LSTM(20, return_sequences = True))
    model.add(LSTM(20, return_sequences = False))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [40]:
model = KerasClassifier(build_fn = deep_lstm, epochs = 10, batch_size = 32, verbose = 1)
model.fit(X_train, y_train)

Train on 1395 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1716fece548>

In [41]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_test, axis = 1)



In [48]:
y_pred = model.predict(X_test)
y_pred[:2]



array([3, 3])

In [49]:
y_test_ = np.argmax(y_test, axis = 1)
y_test_

array([ 3,  4,  3,  3,  3,  3, 42,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        4,  3,  6,  3,  3,  3,  3,  3, 19,  4, 19,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  4,  3,
        3,  3, 13,  3,  3,  3, 16, 20,  4,  1,  4,  3,  4,  3,  3,  4, 19,
        1,  3, 10,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3, 17,  3,
        3,  3, 24,  3,  3,  5,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3, 24,  3,  3,  4,  3,  3,  3,  3,  1,  3,  6,  3,
        4,  3,  3,  3,  3,  3,  3,  3,  3,  1,  4,  3,  1,  3,  1,  3,  8,
        4,  3,  3,  3,  4,  3,  4,  3,  3, 24,  4,  4,  3,  3,  3,  3,  3,
        3,  3,  3,  8,  3,  3, 19,  3,  3,  4,  3,  3,  3,  3,  4,  3,  3,
        2,  3,  4,  3,  3,  3,  3,  3, 23,  3,  3,  3,  3,  3, 40,  3,  4,
        3, 20,  3,  3, 16,  3,  3,  3,  3,  3,  4,  3,  3,  3,  1,  3,  3,
       18,  3,  3,  3,  3

In [50]:
print(accuracy_score(y_pred, y_test_))

0.7479131886477463


### 2. Bidirectional RNN

* Bidirectional RNNs consider not only one-way influence of sequence, but also the other way

* Actually, they can be thought as building two separate RNNs, and merging them

![img.png](https://www.i2tutorials.com/wp-content/media/2019/05/Deep-Dive-into-Bidirectional-LSTM-i2tutorials.jpg)

In [53]:
from tensorflow.keras.layers import Bidirectional

In [54]:
def bidirectional_lstm():
    model = Sequential()
    model.add(Bidirectional(LSTM(20, return_sequences = False), input_shape = (49,1)))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [56]:
model = KerasClassifier(build_fn = bidirectional_lstm, epochs = 10, batch_size = 32, verbose = 1)
model.fit(X_train, y_train)

Train on 1395 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1717ee808c8>

In [57]:
y_pred = model.predict(X_test)
y_pred



array([ 3,  4,  3,  3,  3,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        4,  4,  4,  3,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  4,  3,
        3,  3,  4,  3,  3,  3,  3,  3,  4,  3,  4,  3,  4,  3,  3,  4, 19,
        4,  3,  4,  3,  4,  3,  3,  3,  4,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  4,  3,  4,  3,  3,  3,  3,  4,  3,  4,  4,
        4,  3,  4,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  4,  3,  4,  3,  3,  3,  4,  4,  3,  3,  3,  4,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3,
        4,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  4,  3,  3,
        3, 19,  4,  3,  4,  3,  3,  3,  3,  3,  4,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  4

In [59]:
y_test_ = np.argmax(y_test, axis = 1)
y_test_

array([ 3,  4,  3,  3,  3,  3, 42,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        4,  3,  6,  3,  3,  3,  3,  3, 19,  4, 19,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  3,  3,  3,  4,  3,
        3,  3, 13,  3,  3,  3, 16, 20,  4,  1,  4,  3,  4,  3,  3,  4, 19,
        1,  3, 10,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  3,  3, 17,  3,
        3,  3, 24,  3,  3,  5,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
        3,  3,  3,  3,  3, 24,  3,  3,  4,  3,  3,  3,  3,  1,  3,  6,  3,
        4,  3,  3,  3,  3,  3,  3,  3,  3,  1,  4,  3,  1,  3,  1,  3,  8,
        4,  3,  3,  3,  4,  3,  4,  3,  3, 24,  4,  4,  3,  3,  3,  3,  3,
        3,  3,  3,  8,  3,  3, 19,  3,  3,  4,  3,  3,  3,  3,  4,  3,  3,
        2,  3,  4,  3,  3,  3,  3,  3, 23,  3,  3,  3,  3,  3, 40,  3,  4,
        3, 20,  3,  3, 16,  3,  3,  3,  3,  3,  4,  3,  3,  3,  1,  3,  3,
       18,  3,  3,  3,  3

In [60]:
print(accuracy_score(y_pred, y_test_))

0.8030050083472454


### 3. Deep Bidirectional RNN

* Bidirectional RNNs can be stacked

![img.png](https://www.researchgate.net/profile/Ting_Zhang90/publication/324433547/figure/fig20/AS:614055798644770@1523413719562/A-deep-bidirectional-LSTM-network-with-two-hidden-levels.png)

In [68]:
def deep_bidirectional_lstm():
    model = Sequential()
    model.add(Bidirectional(LSTM(10, return_sequences = True), input_shape = (49,1)))
    model.add(Bidirectional(LSTM(10, return_sequences = True)))
    model.add(Bidirectional(LSTM(10, return_sequences = True)))
    model.add(Bidirectional(LSTM(10, return_sequences = False)))
    model.add(Dense(46))
    model.add(Activation('softmax'))
    
    adam = optimizers.Adam(lr = 0.001)
    model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
    
    return model

In [82]:
model = KerasClassifier(build_fn = deep_bidirectional_lstm, epochs = 10, batch_size = 32, verbose = 1)
model.fit(X_train, y_train)

Train on 1395 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x17229f83cc8>

In [83]:
y_pred = model.predict(X_test)
y_test_ = np.argmax(y_test, axis = 1)



In [84]:
print(accuracy_score(y_pred, y_test_))

0.7946577629382304
