In [1]:
from imdbNet.imdb_utils import load_imdb, imdb_word_dic, pad_data
from imdbNet.dataGenerator import DataGenerator
from imdbNet.imdbModel import imdbModel
from imdbNet.train import Train
import tensorflow as tf

Using TensorFlow backend.


In [2]:
# train parameter
hparam = {
    'train_size': 2000,
    'valid_size': 500,
    'test_size': 500,
    'batch_size': 32,
    'num_epochs': 10,
    'num_steps': 100,
    'hidden_size1': 16,
    'hidden_size2': 32,
    'output_size': 2,
    'keep_prob': 0.5,
    'vocab_size': 10000,
    'embedding_dim': 50,
    'max_grad_norm': 2
    }

# load data
train_x, train_y, valid_x, valid_y, test_x, test_y = load_imdb(num_training=hparam['train_size'],
                                                               num_validation=hparam['valid_size'],
                                                               num_test=hparam['test_size'],
                                                               num_words=hparam['vocab_size'])
word2index, index2word = imdb_word_dic()

# Pad data
train_x, valid_x, test_x = pad_data(train_x, valid_x, test_x, word2index, max_len=hparam['num_steps'])
print(f"\ntrain_x: {train_x.shape}  valid_x: {valid_x.shape} test_x: {test_x.shape}")
print(f"train_y: {train_y.shape} valid_y: {valid_y.shape} test_y: {test_y.shape}")


train_x: (2000, 100)  valid_x: (500, 100) test_x: (500, 100)
train_y: (2000,) valid_y: (500,) test_y: (500,)


In [3]:
tf.reset_default_graph()
# Define Models
with tf.variable_scope("model", reuse=None):
    train_model = imdbModel("is_training", hparam['num_steps'], hparam['hidden_size1'],hparam['hidden_size2'],
                            hparam['output_size'], hparam['vocab_size'], hparam['output_size'],
                            hparam['max_grad_norm'], hparam['keep_prob'])

with tf.variable_scope("model", reuse=True):
    valid_model = imdbModel("is_valid", hparam['num_steps'], hparam['hidden_size1'],hparam['hidden_size2'],
                            hparam['output_size'], hparam['vocab_size'], hparam['output_size'],
                            hparam['max_grad_norm'], keep_prob=1.0)

    test_model = imdbModel("is_testing", hparam['num_steps'], hparam['hidden_size1'],hparam['hidden_size2'],
                            hparam['output_size'], hparam['vocab_size'], hparam['output_size'],
                            hparam['max_grad_norm'], keep_prob=1.0)

# Data Generators
trainGenerator = DataGenerator(train_x, train_y, batch_size=hparam['batch_size'], shuffle=True)
validGenerator = DataGenerator(valid_x, valid_y, batch_size=hparam['batch_size'], shuffle=False)
testGenerator = DataGenerator(test_x, test_y, batch_size=hparam['batch_size'], shuffle=False)

In [4]:
# Train Model
with tf.Session() as sess:
    train = Train(session=sess, num_epochs=hparam['num_epochs'])
    train(trainGenerator, train_model,
          validGenerator, valid_model, verbose=False)

    print("Predictions: \n")
    train.predict(testGenerator, test_model)

epoch: 1/10  train loss: 0.6926 valid loss: 0.6927  time: 4.80 secs
epoch: 2/10  train loss: 0.6654 valid loss: 0.6806  time: 4.92 secs
epoch: 3/10  train loss: 0.4633 valid loss: 0.7533  time: 4.61 secs
epoch: 4/10  train loss: 0.2871 valid loss: 0.9521  time: 4.61 secs
epoch: 5/10  train loss: 0.1691 valid loss: 1.0063  time: 4.55 secs
epoch: 6/10  train loss: 0.1389 valid loss: 1.5289  time: 4.61 secs
epoch: 7/10  train loss: 0.0950 valid loss: 1.7052  time: 4.31 secs
epoch: 8/10  train loss: 0.0465 valid loss: 1.5604  time: 4.32 secs
epoch: 9/10  train loss: 0.0263 valid loss: 1.9200  time: 4.68 secs
epoch: 10/10  train loss: 0.0096 valid loss: 2.2250  time: 4.51 secs
Predictions: 

321 / 500 correct 64.20
