In [1]:
import numpy as np
from keras.models import Sequential, Graph
from keras.layers.core import Dense, Activation, Merge, Flatten, Dropout, Highway
from keras.layers.wrappers import TimeDistributed
from keras.layers import Embedding, Merge, LSTM, Dense, GRU, Recurrent
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.optimizers import SGD
from keras.datasets import imdb
from keras.preprocessing import sequence
from keras.utils.np_utils import accuracy
from keras import backend as K
from keras.callbacks import EarlyStopping

Using Theano backend.


In [2]:
np.random.seed(42)  # for reproducibility
max_features = 10000
sequence_length = 500
embedding_size = 32
hidden_dim = 32
batch_size = 16

In [3]:
print('Loading data...')
(X_train, y_train), (X_dev, y_dev) = imdb.load_data(nb_words=max_features,
                                                    test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_dev), 'test sequences')

print('Pad sequences (samples x time)')
X_train = sequence.pad_sequences(X_train, maxlen=sequence_length)
X_dev = sequence.pad_sequences(X_dev, maxlen=sequence_length)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_dev.shape)

Loading data...
(20000, 'train sequences')
(5000, 'test sequences')
Pad sequences (samples x time)
('X_train shape:', (20000, 500))
('X_test shape:', (5000, 500))


In [4]:
model = Graph()
model.add_input(name='input', input_shape=(sequence_length,), dtype='int')
model.add_node(Embedding(max_features, embedding_size,
                         input_length=sequence_length),
               name='embedding', input='input')
model.add_node(GRU(output_dim=hidden_dim, return_sequences=True),
               name='fw', input='embedding')
model.add_node(GRU(output_dim=hidden_dim, return_sequences=True,
                   go_backwards=True), name='bw', input='embedding')
model.add_node(TimeDistributed(Highway()), name='highway',
               inputs=['bw', 'fw', 'embedding'], concat_axis=2,
               merge_mode='concat')
model.add_node(Convolution1D(512, 1, activation='relu'), name='cnn',
               input='highway')
model.add_node(MaxPooling1D(pool_length=sequence_length),
               name='maxpooling', input='cnn')
model.add_node(Flatten(), name='flatten', input='maxpooling')
model.add_node(Dense(1, input_dim=embedding_size),
               name='fullconnection', input='flatten')
model.add_node(Activation('sigmoid'), name='sigmoid', input='fullconnection')
model.add_output(name='output', input='sigmoid')
model.compile(loss={'output': 'binary_crossentropy'},
              optimizer='rmsprop', metrics=["accuracy"])

In [5]:
early_stopping = EarlyStopping(monitor='val_acc', patience=2, verbose=0, mode='max')
model.fit(
    {'input': X_train, 'output': y_train},
    batch_size=batch_size,
    nb_epoch=5,
    validation_data={'input': X_dev, 'output': y_dev},
    verbose=2,
    callbacks=[early_stopping]
)

Train on 20000 samples, validate on 5000 samples
Epoch 1/5
704s - loss: 0.3843 - acc: 0.8216 - val_loss: 0.2740 - val_acc: 0.8842
Epoch 2/5
676s - loss: 0.2400 - acc: 0.9049 - val_loss: 0.2515 - val_acc: 0.9008
Epoch 3/5
650s - loss: 0.1904 - acc: 0.9269 - val_loss: 0.2499 - val_acc: 0.9034
Epoch 4/5
974s - loss: 0.1588 - acc: 0.9412 - val_loss: 0.3279 - val_acc: 0.8766
Epoch 5/5
646s - loss: 0.1332 - acc: 0.9525 - val_loss: 0.2565 - val_acc: 0.9040


<keras.callbacks.History at 0x11bb13e10>