## Trains an LSTM model on the IMDB sentiment classification task
- https://github.com/keras-team/keras/blob/master/examples/imdb_lstm.py

In [1]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM
from keras.datasets import imdb

Using TensorFlow backend.


### Parameters

In [2]:
max_features = 20000  # Max number of words to include (Only the most frequent words are kept)
maxlen = 80           # Sequence length
batch_size = 32

### Load Data

In [23]:
# x -- Variable length list storing word index
# y -- 0 / 1
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(x_train.shape, x_train.dtype, y_train.shape, y_train.dtype)
print(x_test.shape, y_test.shape)

(25000,) object (25000,) int64
(25000,) (25000,)


### Data Preprocessing

In [29]:
# Pad / Truncate data to same length
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)

x_train shape: (25000, 80)


### Build Model

In [31]:
model = Sequential()
model.add(Embedding(max_features, 128))  # Map each word to a 128-dim vector
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, None, 128)         2560000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,691,713
Trainable params: 2,691,713
Non-trainable params: 0
_________________________________________________________________


In [32]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

### Train

In [None]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=15,
          validation_data=(x_test, y_test))

### Evaluation

In [34]:
_, acc = model.evaluate(x_test, y_test,
                        batch_size=batch_size)
print('Test accuracy:', acc)

Test accuracy: 0.83396
