In [18]:
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import Conv1D, GlobalMaxPooling1D
from tensorflow.keras.datasets import imdb

In [19]:
# set parameters:
max_features = 5000
maxlen = 400
batch_size = 32
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250
epochs = 2

In [20]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [21]:
x_train

array([list([1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 2, 19, 178, 32]),
       list([1, 194, 1153, 194, 2, 78, 228, 5, 6, 1463, 4369,

In [22]:
i = 0
while i <= 20:
    print(x_train[i][:10], y_train[i])
    i+=1

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65] 1
[1, 194, 1153, 194, 2, 78, 228, 5, 6, 1463] 0
[1, 14, 47, 8, 30, 31, 7, 4, 249, 108] 0
[1, 4, 2, 2, 33, 2804, 4, 2040, 432, 111] 1
[1, 249, 1323, 7, 61, 113, 10, 10, 13, 1637] 0
[1, 778, 128, 74, 12, 630, 163, 15, 4, 1766] 0
[1, 2, 365, 1234, 5, 1156, 354, 11, 14, 2] 1
[1, 4, 2, 716, 4, 65, 7, 4, 689, 4367] 0
[1, 43, 188, 46, 5, 566, 264, 51, 6, 530] 1
[1, 14, 20, 47, 111, 439, 3445, 19, 12, 15] 0
[1, 785, 189, 438, 47, 110, 142, 7, 6, 2] 1
[1, 54, 13, 1610, 14, 20, 13, 69, 55, 364] 0
[1, 13, 119, 954, 189, 1554, 13, 92, 459, 48] 0
[1, 259, 37, 100, 169, 1653, 1107, 11, 14, 418] 0
[1, 503, 20, 33, 118, 481, 302, 26, 184, 52] 0
[1, 6, 964, 437, 7, 58, 43, 1402, 11, 6] 0
[1, 2, 1662, 11, 4, 1749, 9, 4, 2165, 4] 1
[1, 33, 4, 2, 7, 4, 2, 194, 2, 3089] 1
[1, 13, 28, 64, 69, 4, 2, 7, 319, 14] 0
[1, 3432, 26, 9, 6, 1220, 731, 939, 44, 6] 1
[1, 617, 11, 3875, 17, 2, 14, 966, 78, 20] 0


In [23]:
i = 0
while i <= 10:
    print(len(x_train[i]))
    i+=1

218
189
141
550
147
43
123
562
233
130
450


In [24]:
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

x_train shape: (25000, 400)
x_test shape: (25000, 400)


In [25]:
i = 0
while i <= 10:
    print(len(x_train[i]))
    i+=1

400
400
400
400
400
400
400
400
400
400
400


In [26]:
y_train.shape

(25000,)

In [27]:
y_train[:3]

array([1, 0, 0])

In [8]:
model = Sequential()

In [9]:
# we start off with an efficient embedding layer which maps
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
                    embedding_dims,
                    input_length=maxlen))
model.add(Dropout(0.2))

# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Conv1D(filters,
                 kernel_size,
                 padding='valid',
                 activation='relu',
                 strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())

# We add a vanilla hidden layer:
model.add(Dense(hidden_dims))
model.add(Dropout(0.2))
model.add(Activation('relu'))

# We project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [10]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 400, 50)           250000    
_________________________________________________________________
dropout (Dropout)            (None, 400, 50)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 398, 250)          37750     
_________________________________________________________________
global_max_pooling1d (Global (None, 250)               0         
_________________________________________________________________
dense (Dense)                (None, 250)               62750     
_________________________________________________________________
dropout_1 (Dropout)          (None, 250)               0         
_________________________________________________________________
activation (Activation)      (None, 250)               0

In [12]:
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test))

Train on 25000 samples, validate on 25000 samples
Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x63e689c90>

In [15]:
loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

Testing Accuracy:  0.8794
