In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

In [2]:
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)

In [3]:
train_dataset, test_dataset = dataset['train'], dataset['test']

In [4]:
encoder = info.features['text'].encoder

In [5]:
BUFFER_SIZE = 10000
BATCH_SIZE = 64

In [6]:
padded_shapes = ([None],())

In [7]:
train_dataset = train_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [8]:
test_dataset = test_dataset.shuffle(BUFFER_SIZE).padded_batch(BATCH_SIZE, padded_shapes=padded_shapes)

In [9]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [10]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [11]:
history = model.fit(train_dataset, epochs=5, validation_data=test_dataset, validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
def pad_to_size(vec, size):
    zeros = [0]*(size - len(vec))
    vec.extend(zeros)
    return vec

In [13]:
def sample_predict(sentence, pad, model):
    encoded_sample_pred_text = encoder.encode(sentence)
    if pad:
        encoded_sample_pred_text = pad_to_size(encoded_sample_pred_text, 64)
    encoded_sample_pred_text = tf.cast(encoded_sample_pred_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_sample_pred_text, 0))
    
    return predictions

In [14]:
sample_text = "This movie was awesome. The acting was incredible. Highly recommend"
predictions = sample_predict(sample_text, pad=True, model=model) * 100
print('probability this is a positive review {}', predictions)

probability this is a positive review {} [[80.845436]]


In [15]:
sample_text = "This movie was so so. The acting was medicore. Kind recommend"
predictions = sample_predict(sample_text, pad=True, model=model) * 100
print('probability this is a positive review {}', predictions)

probability this is a positive review {} [[47.837944]]


In [16]:
model1 = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [17]:
model1.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [18]:
history = model1.fit(train_dataset, epochs=5, validation_data=test_dataset, validation_steps=30)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
sample_text = "This movie was awesome. The acting was incredible. Highly recommend"
predictions = sample_predict(sample_text, pad=True, model=model1) * 100
print('probability this is a positive review {}', predictions)

probability this is a positive review {} [[24.913906]]


In [20]:
sample_text = "This movie was so so. The acting was medicore. Kind recommend"
predictions = sample_predict(sample_text, pad=True, model=model1) * 100
print('probability this is a positive review {}', predictions)

probability this is a positive review {} [[10.4430485]]
