In [1]:
import tensorflow as tf
import numpy as np
import tensorflow_datasets as tfds
from tensorflow.keras.models import Model

In [2]:
tfds.disable_progress_bar()

In [3]:
dataset = tfds.load('imdb_reviews', as_supervised=True)

In [4]:
train_dataset, test_dataset = dataset['train'], dataset['test']

In [5]:
BUFFER_SIZE = 1000
BATCH_SIZE = 64
train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [7]:
class SentModel(tf.keras.models.Model):
    def __init__(self, encoder):
        super(SentModel, self).__init__()
        self.model = tf.keras.models.Sequential([
            encoder,
            tf.keras.layers.Embedding(input_dim=encoder.vocabulary_size(), output_dim=64, mask_zero=True),
            tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
            tf.keras.layers.Dense(64, activation='relu'),
            tf.keras.layers.Dense(1, activation='sigmoid')
        ])

    def call(self, x):
        return self.model(x)

In [8]:
# encoder has to be "adapted" first before making it a part of the network architecture
VOCAB_SIZE = 1000
encoder = tf.keras.layers.TextVectorization(max_tokens=VOCAB_SIZE)
encoder.adapt(train_dataset.map(lambda text,labels: text))

In [9]:
model = SentModel(encoder)

In [10]:
loss = tf.keras.losses.BinaryCrossentropy()

In [11]:
model.compile(loss=loss, optimizer='adam', metrics=['accuracy'])

In [12]:
history = model.fit(train_dataset, epochs=5, validation_data=test_dataset, validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
