In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)


2.3.1
sys.version_info(major=3, minor=8, micro=3, releaselevel='final', serial=0)
matplotlib 3.2.2
numpy 1.18.5
pandas 1.0.5
sklearn 0.23.1
tensorflow 2.3.1
tensorflow.keras 2.4.0


In [2]:
import tensorflow_datasets as tfds

dataset, info = tfds.load('imdb_reviews/subwords8k', with_info = True,
                          as_supervised=True)

train_dataset, test_dataset = dataset['train'], dataset['test']

ModuleNotFoundError: No module named 'tensorflow_datasets'

In [None]:
tokenizer = info.features['text'].encoder
print('vocabulary size: {}'.format(tokenizer.vocab_size))

In [None]:
sample_string = 'Tensorflow is cool.'
tokenized_string = tokenizer.encode(sample_string)
print('Tokenized string is {}'.format(tokenized_string))

original_string = tokenizer.decode(tokenized_string)
print('Original string is {}'.format(original_string))

assert original_string == sample_string

In [None]:
for token in tokenized_string:
    print('{} --> {}'.format(token, tokenizer.decode([token])))

In [None]:
buffer_size = 10000
batch_size = 64

print(train_dataset.output_shapes)
print(test_dataset.output_shapes)

train_dataset = train_dataset.shuffle(buffer_size)
train_dataset = train_dataset.padded_batch(batch_size, train_dataset.output_shapes)
test_dataset = test_dataset.padded_batch(batch_size, test_dataset.output_shapes)

In [None]:
vocab_size = tokenizer.vocab_size
embedding_dim = 16
batch_size = 512

bi_rnn_model = keras.models.Sequential([
    keras.layers.Embedding(vocab_size, embedding_dim),
    keras.layers.Bidirectional(
        keras.layers.LSTM(
            units = 32, return_sequences = False)),
    keras.layers.Dense(32, activation = 'relu'),
    keras.layers.Dense(1, activation='sigmoid'),
])

bi_rnn_model.summary()
bi_rnn_model.compile(optimizer = 'adam',
                     loss = 'binary_crossentropy',
                     metrics = ['accuracy'])


In [None]:
history = bi_rnn_model.fit(
    train_dataset,
    epochs = 10,
    validation_data = test_dataset)

In [None]:
def plot_learning_curves(history, label, epochs, min_value, max_value):
    data = {}
    data[label] = history.history[label]
    data['val_'+label] = history.history['val_'+label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.show()
    
plot_learning_curves(history, 'accuracy', 10, 0, 1)
plot_learning_curves(history, 'loss', 10, 0, 1)

In [None]:
bi_rnn_model.evaluate(test_dataset)