In [1]:
from __future__ import division, print_function, absolute_import

In [2]:
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_1d, global_max_pool
from tflearn.layers.merge_ops import merge
from tflearn.layers.estimator import regression
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb


hdf5 is not supported on this machine (please install/reinstall h5py for optimal experience)


In [3]:
# IMDB Dataset loading
train, test, _ = imdb.load_data(path='imdb.pkl', n_words=10000,
                                valid_portion=0.1)
trainX, trainY = train
testX, testY = test

In [4]:
# Data preprocessing
# Sequence padding
trainX = pad_sequences(trainX, maxlen=100, value=0.)
testX = pad_sequences(testX, maxlen=100, value=0.)
# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

In [5]:
# Building convolutional network
network = input_data(shape=[None, 100], name='input')
network = tflearn.embedding(network, input_dim=10000, output_dim=128)
branch1 = conv_1d(network, 128, 3, padding='valid', activation='relu', regularizer="L2")
branch2 = conv_1d(network, 128, 4, padding='valid', activation='relu', regularizer="L2")
branch3 = conv_1d(network, 128, 5, padding='valid', activation='relu', regularizer="L2")
network = merge([branch1, branch2, branch3], mode='concat', axis=1)
network = tf.expand_dims(network, 2)
network = global_max_pool(network)
network = dropout(network, 0.5)
network = fully_connected(network, 2, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.001,loss='categorical_crossentropy', name='target')

In [6]:
# Training
model = tflearn.DNN(network, checkpoint_path='CNN-imdb', max_checkpoints=3, tensorboard_verbose=0)
model.fit(trainX, trainY, n_epoch = 5, shuffle=True, validation_set=(testX, testY), show_metric=True, batch_size=32)
model.save('CNN-imdb')

Training Step: 3519  | total loss: [1m[32m0.13663[0m[0m | time: 616.832s
| Adam | epoch: 005 | loss: 0.13663 - acc: 0.9711 -- iter: 22496/22500
Training Step: 3520  | total loss: [1m[32m0.13051[0m[0m | time: 681.188s
| Adam | epoch: 005 | loss: 0.13051 - acc: 0.9708 | val_loss: 0.53883 - val_acc: 0.7972 -- iter: 22500/22500
--


In [7]:
#Evaluation
# model.load()
model.evaluate(testX, testY)

[0.79720000066757202]

In [8]:
# print(metrics.accuracy_score(model.predict(testX), testY))
predictions = model.predict(testX)
labels = testY

In [22]:
predictions

[[0.4319905936717987, 0.5680093765258789],
 [0.6830011606216431, 0.31699880957603455],
 [0.21899816393852234, 0.7810018658638],
 [0.32261016964912415, 0.6773898005485535],
 [0.38154372572898865, 0.618456244468689],
 [0.9880273342132568, 0.011972702108323574],
 [0.9999076128005981, 9.235064499080181e-05],
 [0.9960131645202637, 0.003986859228461981],
 [0.9967138767242432, 0.0032861237414181232],
 [0.9897351264953613, 0.010264920070767403],
 [0.0002289221010869369, 0.9997710585594177],
 [0.01255662739276886, 0.9874433875083923],
 [0.3384641110897064, 0.661535918712616],
 [0.9907097816467285, 0.009290220215916634],
 [0.00687818881124258, 0.9931218028068542],
 [0.6534552574157715, 0.3465447723865509],
 [0.9694105386734009, 0.030589472502470016],
 [0.013488526456058025, 0.9865114688873291],
 [0.5467928647994995, 0.4532071352005005],
 [0.03126932308077812, 0.9687306880950928],
 [0.02696397714316845, 0.9730360507965088],
 [0.01616588979959488, 0.9838340878486633],
 [0.9999094009399414, 9.05747

In [23]:
labels

array([[ 0.,  1.],
       [ 1.,  0.],
       [ 0.,  1.],
       ..., 
       [ 1.,  0.],
       [ 1.,  0.],
       [ 0.,  1.]])

In [24]:
precisions = tf.contrib.metrics.streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name='precision')
