In [12]:
import keras
from keras.datasets import imdb
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, SpatialDropout1D
from keras.layers import SimpleRNN # new!
from keras.callbacks import ModelCheckpoint
import os
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
# output directory name:
output_dir = 'model_output/rnn'

# training:
epochs = 16 # way more!
batch_size = 128

# vector-space embedding:
n_dim = 64
n_unique_words = 10000
max_review_length = 100 # lowered due to vanishing gradient over time
pad_type = trunc_type = 'pre'
drop_embed = 0.2

# RNN layer architecture:
n_rnn = 256
drop_rnn = 0.2

# dense layer architecture:
# n_dense = 256
# dropout = 0.2

In [14]:
(x_train, y_train), (x_valid, y_valid) = imdb.load_data(num_words=n_unique_words) # removed n_words_to_skip

In [15]:
x_train = pad_sequences(x_train, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)
x_valid = pad_sequences(x_valid, maxlen=max_review_length, padding=pad_type, truncating=trunc_type, value=0)

### RNN

In [16]:
model = Sequential()
model.add(Embedding(n_unique_words, n_dim, input_length=max_review_length))
model.add(SpatialDropout1D(drop_embed))

model.add(SimpleRNN(n_rnn, dropout=drop_rnn))
# model.add(Dense(n_dense, activation='relu')) # typically don't see top dense layer in NLP like in
# model.add(Dropout(dropout))
model.add(Dense(1, activation='sigmoid'))


In [21]:
model.build(input_shape=(None, max_review_length)) # Build the model explicitly
model.summary()

In [22]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [20]:
modelcheckpoint = ModelCheckpoint(filepath=output_dir+"/weights.{epoch:02d}.keras")
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [23]:
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_valid, y_valid), callbacks=[modelcheckpoint])

Epoch 1/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 362ms/step - accuracy: 0.5242 - loss: 0.6930 - val_accuracy: 0.5710 - val_loss: 0.6617
Epoch 2/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 392ms/step - accuracy: 0.5962 - loss: 0.6551 - val_accuracy: 0.5996 - val_loss: 0.6516
Epoch 3/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 344ms/step - accuracy: 0.6835 - loss: 0.5828 - val_accuracy: 0.6427 - val_loss: 0.6180
Epoch 4/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 336ms/step - accuracy: 0.7257 - loss: 0.5312 - val_accuracy: 0.6830 - val_loss: 0.6174
Epoch 5/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 326ms/step - accuracy: 0.7909 - loss: 0.4416 - val_accuracy: 0.6700 - val_loss: 0.6158
Epoch 6/16
[1m196/196[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 348ms/step - accuracy: 0.8127 - loss: 0.4124 - val_accuracy: 0.7600 - val_loss: 0.5654
Epoch 7/16

<keras.src.callbacks.history.History at 0x7cc982e0ec00>

In [None]:
# resultado: bosta

RNNs are only able to backpropagate through ~10 time steps
before the gradient diminishes so much that parameter updates become negligibly small.

They're rarely used in practice. More sophisticated recurrent layer types like LSTMs, which can backpropagate through ~100 time steps, are far more common.