### Links

2. Source
    * https://www.tensorflow.org/tutorials/text/text_classification_rnn

In [1]:
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.keras import layers as L
from tensorflow import keras

In [2]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history['val_'+metric], '')
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, 'val_'+metric])
    plt.show()

In [40]:
dataset, info = tfds.load('imdb_reviews/subwords8k',
                          with_info=True,
                          as_supervised=True)

train_dataset, test_dataset = dataset['train'], dataset['test']



In [4]:
encoder = info.features['text'].encoder

In [5]:
print('Vocabulary size: {}'.format(encoder.vocab_size))

Vocabulary size: 8185


In [6]:
sample_string = 'Hello TensorFlow.'

encoded_string = encoder.encode(sample_string)
print('Encoded string is {}'.format(encoded_string))

original_string = encoder.decode(encoded_string)
print('The original string: "{}"'.format(original_string))

Encoded string is [4025, 222, 6307, 2327, 4043, 2120, 7975]
The original string: "Hello TensorFlow."


In [7]:
encoder.encode("Hello")

[4025, 8040]

In [8]:
encoder.encode("o ")

[222]

In [9]:
encoder.decode([4025])

'Hell'

In [10]:
for index in encoded_string:
    print('{} ----> "{}"'.format(index, encoder.decode([index])))

4025 ----> "Hell"
222 ----> "o "
6307 ----> "Ten"
2327 ----> "sor"
4043 ----> "Fl"
2120 ----> "ow"
7975 ----> "."


In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 3),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(4)),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [20]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                optimizer=tf.keras.optimizers.Adam(1e-4),
                metrics=['accuracy'])

In [47]:
for train_example, train_label in train_dataset.take(100):
    print(train_example.shape, train_label.shape)
    break
    print('Encoded text:', train_example[:10].numpy())
    print('Label:', train_label.numpy())

(64, 1323) (64,)


In [43]:
for x in train_dataset.batch(32):
    y = x
    break

InvalidArgumentError: Cannot batch tensors with different shapes in component 0. First element had shape [64,1377] and element 1 had shape [64,1347].

In [45]:
train_dataset.take(1)

<TakeDataset shapes: ((None, None), (None,)), types: (tf.int64, tf.int64)>

In [41]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, padded_shapes=([None],[]))

test_dataset = test_dataset.padded_batch(BATCH_SIZE, padded_shapes=([None],[]))

In [21]:
history = model.fit(train_dataset,
                    epochs=2,
                    validation_data=test_dataset, 
                    validation_steps=30)

Epoch 1/2
    277/Unknown - 146s 527ms/step - loss: 0.6931 - accuracy: 0.5044

KeyboardInterrupt: 

In [None]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))

In [None]:
def pad_to_size(vec, size):
    zeros = [0] * (size - len(vec))
    vec.extend(zeros)
    return vec

In [None]:
def sample_predict(sample_pred_text, pad):
    encoded_sample_pred_text = encoder.encode(sample_pred_text)

    if pad:
        encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0))

    return (predictions)

In [None]:
# predict on a sample text without padding.

sample_pred_text = ('The movie was cool. The animation and the graphics '
                    'were out of this world. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=False)
print(predictions)

In [None]:
sample_pred_text = ('The movie was cool. The animation and the graphics '
                    'were out of this world. I would recommend this movie.')
predictions = sample_predict(sample_pred_text, pad=True)
print(predictions)

In [None]:
plot_graphs(history, 'accuracy')

In [None]:
plot_graphs(history, 'loss')

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64,  return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1)
])