In [15]:
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.preprocessing import sequence
from keras.optimizers import SGD, RMSprop, Adagrad
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, GRU
from keras.datasets import imdb

from theano import function

In [None]:
'''
    This code was borrowed and modified from https://github.com/fchollet/keras/blob/master/examples/imdb_lstm.py
    
    Train a LSTM on the IMDB sentiment classification task.
    The dataset is actually too small for LSTM to be of any advantage
    compared to simpler, much faster methods such as TF-IDF+LogReg.
    Notes:
    - RNNs are tricky. Choice of batch size is important,
    choice of loss and optimizer is critical, etc.
    Some configurations won't converge.
    - LSTM loss decrease patterns during training can be quite different
    from what you see with CNNs/MLPs/etc.
    GPU command:
        THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_lstm.py
'''

In [23]:
max_features = 20000
maxlen = 100  # cut texts after this number of words (among top max_features most common words)
batch_size = 32

print("Loading data...")
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

print("Pad sequences (samples x time)")
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

Loading data...
20000 train sequences
5000 test sequences
Pad sequences (samples x time)
X_train shape: (20000, 100)
X_test shape: (5000, 100)


In [None]:
'''
Sample reviews from the full IMDb movie reviews dataset.

Negative review examples:
* Unfortunately it stays absurd the WHOLE time with no general narrative eventually making it just too off putting.
* Even those from the era should be turned off.
* The cryptic dialogue would make Shakespeare seem easy to a third grader.

Positive review examples:
* I didn't know this came from Canada, but it is very good. Very good!
* I liked this movie a lot. It really intrigued me how Deanna and Alicia became friends over such a tragedy
* When I saw the elaborate DVD box for this and the dreadful Red Queen figurine, 
  I felt certain I was in for a big disappointment, but surprise, surprise, I loved it. 
'''

In [6]:
X_train[0]

[1,
 20,
 28,
 716,
 48,
 495,
 79,
 27,
 493,
 8,
 5067,
 7,
 50,
 5,
 4682,
 13075,
 10,
 5,
 852,
 157,
 11,
 5,
 1716,
 3351,
 10,
 5,
 500,
 7308,
 6,
 33,
 256,
 41,
 13610,
 7,
 17,
 23,
 48,
 1537,
 3504,
 26,
 269,
 929,
 18,
 2,
 7,
 2,
 4284,
 8,
 105,
 5,
 2,
 182,
 314,
 38,
 98,
 103,
 7,
 36,
 2184,
 246,
 360,
 7,
 19,
 396,
 17,
 26,
 269,
 929,
 18,
 1769,
 493,
 6,
 116,
 7,
 105,
 5,
 575,
 182,
 27,
 5,
 1002,
 1085,
 130,
 62,
 17,
 24,
 89,
 17,
 13,
 381,
 1421,
 8,
 5167,
 7,
 5,
 2723,
 38,
 325,
 7,
 17,
 23,
 93,
 9,
 156,
 252,
 19,
 235,
 20,
 28,
 5,
 104,
 76,
 7,
 17,
 169,
 35,
 14764,
 17,
 23,
 1460,
 7,
 36,
 2184,
 934,
 56,
 2134,
 6,
 17,
 891,
 214,
 11,
 5,
 1552,
 6,
 92,
 6,
 33,
 256,
 82,
 7]

In [7]:
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, input_length=maxlen))

model.add(LSTM(128))  # try using a GRU instead, for fun
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary")

Pad sequences (samples x time)
X_train shape: (20000, 100)
X_test shape: (5000, 100)
Build model...


  rval = __import__(module_name, {}, {}, [module_name])


In [16]:
inp = model.get_input()
embedding = model.layers[0].get_output()
F = function([inp], embedding, allow_input_downcast=True)

In [22]:
print(X_train[:1])
print(X_train[:1].shape)
print(F(X_train[:1]))
print(F(X_train[:1]).shape)

[[  269   929    18     2     7     2  4284     8   105     5     2   182
    314    38    98   103     7    36  2184   246   360     7    19   396
     17    26   269   929    18  1769   493     6   116     7   105     5
    575   182    27     5  1002  1085   130    62    17    24    89    17
     13   381  1421     8  5167     7     5  2723    38   325     7    17
     23    93     9   156   252    19   235    20    28     5   104    76
      7    17   169    35 14764    17    23  1460     7    36  2184   934
     56  2134     6    17   891   214    11     5  1552     6    92     6
     33   256    82     7]]
(1, 100)
[[[ 0.03351435 -0.0307366  -0.03750445 ...,  0.03813024 -0.01615332
   -0.04805795]
  [-0.00785761 -0.01286278 -0.04133901 ...,  0.02919266 -0.01766369
    0.00309219]
  [ 0.00047992  0.00507891  0.04045267 ..., -0.00836205 -0.03382651
   -0.00756425]
  ..., 
  [ 0.04124896 -0.03509159 -0.0377143  ...,  0.00091615  0.03342613
   -0.04362203]
  [ 0.01263671  0.02567435 

In [None]:
print("Train...")
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=4, validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size, show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)