In [1]:
import numpy as np
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
batch_size = 128
embedding_dimension = 64
num_classes = 2
hidden_layer_size = 32
times_steps = 6
element_size = 1

In [3]:
digit_to_word_map = {
    1: "One", 2: "Two", 3: "Three", 4: "Four", 5: "Five",
    6: "Six", 7: "Seven", 8: "Eight", 9: "Nine"
}
digit_to_word_map[0] ="PAD"

In [4]:
digit_to_word_map

{0: 'PAD',
 1: 'One',
 2: 'Two',
 3: 'Three',
 4: 'Four',
 5: 'Five',
 6: 'Six',
 7: 'Seven',
 8: 'Eight',
 9: 'Nine'}

In [5]:
even_sentences = []
odd_sentences = []
seqlens = []

In [6]:
for i in range(10000):
    rand_seq_len = np.random.choice(range(3, 7))
    seqlens.append(rand_seq_len)
    rand_odd_ints = np.random.choice(range(1, 10, 2), rand_seq_len)
    rand_even_ints = np.random.choice(range(2, 10, 2), rand_seq_len)
    
    # Padding
    if rand_seq_len < 6:
        rand_odd_ints = np.append(rand_odd_ints,
                                 [0] * (6 - rand_seq_len))
        rand_even_ints = np.append(rand_even_ints,
                                  [0] * (6 - rand_seq_len))
    
    even_sentences.append(" ".join([digit_to_word_map[r]
                                   for r in rand_odd_ints]))
    odd_sentences.append(" ".join([digit_to_word_map[r] 
                                  for r in rand_even_ints]))

In [7]:
data = even_sentences + odd_sentences
seqlens *= 2

In [8]:
even_sentences[0:6]

['Seven Nine Five Three Seven One',
 'One One One Nine PAD PAD',
 'Five Three Three Three One One',
 'Five Seven Five Seven One One',
 'Seven Three One One PAD PAD',
 'Nine Three One Three Nine PAD']

In [9]:
odd_sentences[0:6]

['Two Six Eight Eight Eight Eight',
 'Two Six Four Two PAD PAD',
 'Two Two Four Six Four Eight',
 'Six Eight Four Two Eight Eight',
 'Six Six Six Eight PAD PAD',
 'Four Six Six Two Eight PAD']

In [10]:
seqlens[0:6]

[6, 4, 6, 6, 4, 5]

### Map from words to indices

In [11]:
word2index_map = {}
index = 0
for sent in data:
    for word in sent.lower().split():
        if word not in word2index_map:
            word2index_map[word] = index
            index += 1

### Inverse map

In [12]:
index2word_map = { index: word for word, index in word2index_map.items()}
vocabulary_size = len(index2word_map)

### create the labels and split the data into train and test sets

In [13]:
labels = [1] * 10000 + [0] * 10000
for i in range(len(labels)):
    label = labels[i]
    one_hot_encoding = [0] * 2
    one_hot_encoding[label] = 1
    labels[i] = one_hot_encoding

In [14]:
data_indices = list(range(len(data)))
np.random.shuffle(data_indices)
data = np.array(data)[data_indices]

In [15]:
labels = np.array(labels)[data_indices]
seqlens = np.array(seqlens)[data_indices]
train_x = data[:10000]
train_y = labels[:10000]
train_seqlens = seqlens[:10000]

In [16]:
test_x = data[10000:]
test_y = labels[10000:]
test_seqlens = seqlens[10000:]

In [17]:
def get_sentence_batch(batch_size, data_x,
                      data_y, data_seqlens):
    instance_indices = list(range(len(data_x)))
    np.random.shuffle(instance_indices)
    batch = instance_indices[:batch_size]
    x = [[word2index_map[word] for word in data_x[i].lower().split()]
        for i in batch]
    y = [data_y[i] for i in batch]
    seqlens = [data_seqlens[i] for i in batch]
    seqlens = [data_seqlens[i] for i in batch]
    return x, y, seqlens

In [18]:
_inputs = tf.placeholder(tf.int32, shape=[batch_size, times_steps])
_labels = tf.placeholder(tf.float32, shape=[batch_size, num_classes])

### seqlens for dynamic calculation

In [19]:
_seqlens = tf.placeholder(tf.int32, shape=[batch_size])

In [20]:
with tf.name_scope("embeddings"):
    embeddings = tf.Variable(
        tf.random_uniform([vocabulary_size,
                          embedding_dimension],
                         -1.0, 1.0), name='embedding'
    )
    embed = tf.nn.embedding_lookup(embeddings, _inputs)

### LSTM and Using Sequence Length

In [21]:
with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size,
                                            forget_bias=1.0)
    outputs, states = tf.nn.dynamic_rnn(lstm_cell, embed,
                                       sequence_length = _seqlens,
                                       dtype=tf.float32)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [22]:
weights = {
    'linear_layer': tf.Variable(tf.truncated_normal([hidden_layer_size,
                                                    num_classes],
                                                   mean=0, stddev=.01))
}
biases = {
    'linear_layer': tf.Variable(tf.truncated_normal([num_classes],
                                                   mean=0, stddev=.01))
}

### Extract the last relevant output and use in a linear layer

In [23]:
final_output = tf.matmul(states[1],
                        weights['linear_layer']) + biases['linear_layer']
softmax = tf.nn.softmax_cross_entropy_with_logits(logits = final_output,
                                                 labels = _labels)
cross_entropy = tf.reduce_mean(softmax)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



### Training Embeddings and the LSTM Classifier

In [24]:
train_step = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cross_entropy)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [25]:
correct_pridiction = tf.equal(tf.argmax(_labels, 1),
                             tf.argmax(final_output, 1))

In [26]:
accuracy = (tf.reduce_mean(tf.cast(correct_pridiction, tf.float32)))*100

In [27]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for step in range(1001):
        x_batch, y_batch, seqlen_batch = get_sentence_batch(batch_size,
                                                           train_x,
                                                           train_y,
                                                          train_seqlens)
        sess.run(train_step, feed_dict={_inputs:x_batch,
                                        _labels:y_batch,
                                        _seqlens:seqlen_batch})
        
        if step % 100 == 0:
            acc = sess.run(accuracy, feed_dict={
                _inputs:x_batch,
                _labels:y_batch,
                _seqlens:seqlen_batch})
            print("Accuracy at %d: %.5f" % (step, acc))
        
    for test_batch in range(5):
        x_test, y_test,seqlen_test = get_sentence_batch(batch_size,
                                                        test_x,test_y,
                                                        test_seqlens)
        batch_pred,batch_acc = sess.run([tf.argmax(final_output,1),
                                        accuracy],
                                        feed_dict={_inputs:x_test,
                                        _labels:y_test,
                                        _seqlens:seqlen_test})
        print("Test batch accuracy %d: %.5f" % (test_batch, batch_acc))
        
        output_example = sess.run([outputs],
                                  feed_dict={_inputs:x_test,
                                             _labels:y_test,
                                            _seqlens:seqlen_test})
        states_example = sess.run([states[1]],
                                  feed_dict={_inputs:x_test,
                                _labels:y_test,
                                _seqlens:seqlen_test})

Accuracy at 0: 34.37500
Accuracy at 100: 100.00000
Accuracy at 200: 100.00000
Accuracy at 300: 100.00000
Accuracy at 400: 100.00000
Accuracy at 500: 100.00000
Accuracy at 600: 100.00000
Accuracy at 700: 100.00000
Accuracy at 800: 100.00000
Accuracy at 900: 100.00000
Accuracy at 1000: 100.00000
Test batch accuracy 0: 100.00000
Test batch accuracy 1: 100.00000
Test batch accuracy 2: 100.00000
Test batch accuracy 3: 100.00000
Test batch accuracy 4: 100.00000


In [28]:
seqlen_test[1]

3

In [29]:
output_example[0][1].shape

(6, 32)

In [30]:
output_example[0][1][:6, 0:3]

array([[ 0.53421783,  0.5949961 , -0.44566885],
       [ 0.77338314,  0.7697201 , -0.72978646],
       [ 0.8353319 ,  0.82752424, -0.82909214],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ]], dtype=float32)

In [31]:
states_example[0][1][0:3]

array([ 0.8353319 ,  0.82752424, -0.82909214], dtype=float32)

In [32]:
num_LSTM_layers = 2
with tf.variable_scope("lstm"):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_layer_size,
                                            forget_bias=1.0)
    cell = tf.contrib.rnn.MultiRNNCell(cells=[lstm_cell]*num_LSTM_layers,
                                      state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell, embed,
                                       sequence_length = _seqlens,
                                       dtype=tf.float32)

Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.


ValueError: in converted code:
    relative to /home/jupyter/.local/lib/python3.6/site-packages/tensorflow/python:

    ops/rnn_cell_impl.py:1719 call
        cur_inp, new_state = cell(cur_inp, cur_state)
    ops/rnn_cell_impl.py:767 call
        array_ops.concat([inputs, h], 1), self._kernel)
    util/dispatch.py:180 wrapper
        return target(*args, **kwargs)
    ops/math_ops.py:2647 matmul
        a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
    ops/gen_math_ops.py:5925 mat_mul
        name=name)
    framework/op_def_library.py:788 _apply_op_helper
        op_def=op_def)
    util/deprecation.py:507 new_func
        return func(*args, **kwargs)
    framework/ops.py:3616 create_op
        op_def=op_def)
    framework/ops.py:2027 __init__
        control_input_ops)
    framework/ops.py:1867 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 64 and 96 for 'lstm_1/rnn/while/rnn/multi_rnn_cell/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [128,64], [96,128].
