In [10]:
from neon.backends import gen_backend
from neon.data import DataIterator, Text, load_text
from neon.initializers import Uniform, GlorotUniform
from neon.layers import GeneralizedCost, LSTM, Affine, Dropout, LookupTable, RecurrentSum
from neon.models import Model
from neon.optimizers import Adagrad
from neon.transforms import Rectlin, Logistic, Tanh, Softmax, CrossEntropyMulti, Accuracy
from neon.callbacks.callbacks import Callbacks
from neon.util.argparser import NeonArgparser
import numpy as np
import os
import cPickle as pickle
data_root = os.path.expanduser("~") + '/data/CSE255/'

class Args():
    pass
args = Args()

# the command line arguments
args.backend = 'gpu'
args.batch_size = 128
args.epochs = 3

args.config = None
args.data_dir = '/home/linuxthink/nervana/data'
args.datatype = np.float32
args.device_id = 0
args.evaluation_freq = 1
args.history = 1
args.log_thresh = 40
args.logfile = None
args.model_file = None
args.no_progress_bar = False
args.output_file = '/home/linuxthink/nervana/data/neonlog.hd5'
args.progress_bar = True
args.rng_seed = 0
args.rounding = False
args.save_path = '/home/linuxthink/nervana/data/128128_49_model'
args.serialize = 1
args.verbose = 1

num_epochs = args.epochs

# hyperparameters from the reference
batch_size = 128
clip_gradients = True
gradient_limit = 15
vocab_size = 20000
sentence_length = 100
embedding_dim = 128
hidden_size = 128
reset_cells = True

print('batch_size: %s \nvocab_size: %s \nsentence_length: %s \nembedding_dim: %s \nhidden_size: %s' %
      (batch_size,      vocab_size,      sentence_length,      embedding_dim,      hidden_size))

# setup backend
be = gen_backend(backend=args.backend,
                 batch_size=batch_size,
                 rng_seed=args.rng_seed,
                 device_id=args.device_id,
                 default_dtype=args.datatype)

batch_size: 128 
vocab_size: 20000 
sentence_length: 100 
embedding_dim: 128 
hidden_size: 128


In [None]:
# my own view at the pickle file
# a = pickle.load(open(os.path.join(data_root, 'train_valid_text_index_in_binary_label.pickle'), "rb"))
# pickle.dump(a, open(os.path.join(data_root, 'train_valid_text_index_in_binary_label.pickle'), "wb"),
#             protocol=pickle.HIGHEST_PROTOCOL)

In [60]:
# load train set
(X_train, y_train) = Text.pad_data(os.path.join(data_root, 'train_valid_text_index_in_binary_label_complete.pickle'),
                                   vocab_size=vocab_size, 
                                   sentence_length=sentence_length,
                                   test_split=0.0)
print "# of train sentences", X_train.shape[0]

# make train_set (for the callbacks)
train_set = DataIterator(X_train, y_train, nclass=2)

# of train sentences 1000000


In [27]:
X_test = Text.pad_data(os.path.join(data_root, 'test_text_index.pickle'),
                       vocab_size=vocab_size, 
                       sentence_length=sentence_length,
                       test_split=0.0,
                       feature_only=True)
y_test = np.zeros((X_test.shape[0], 1))
test_set = DataIterator(X_test, y_test, nclass=2)

In [23]:
X_test.shape

(50000, 100)

In [20]:
# weight initialization
init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
init_glorot = GlorotUniform()

layers = [
    LookupTable(
        vocab_size=vocab_size, embedding_dim=embedding_dim, init=init_emb),
    LSTM(hidden_size, init_glorot, activation=Tanh(),
         gate_activation=Logistic(), reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(2, init_glorot, bias=init_glorot, activation=Softmax())
]

print(layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
metric = Accuracy()

##########################################################################

model = Model(layers=layers)
optimizer = Adagrad(learning_rate=0.01, clip_gradients=clip_gradients)
callbacks = Callbacks(model, train_set, args, eval_set=train_set)

model.load_weights(os.path.join(args.data_dir, '128128_49_model_e0.pkl'))

model.initialized = False
model.initialize(train_set, cost=cost)



[<neon.layers.layer.LookupTable object at 0x7fec640dec90>, <neon.layers.recurrent.LSTM object at 0x7fec5e486250>, <neon.layers.recurrent.RecurrentSum object at 0x7fec5e486290>, <neon.layers.layer.Dropout object at 0x7fec5e4863d0>, [<neon.layers.layer.Linear object at 0x7fec5e4864d0>, <neon.layers.layer.Bias object at 0x7fec5e486410>, <neon.layers.layer.Activation object at 0x7fec5e486650>]]


In [59]:
# output result
test_ratio_predicitons = []
for x, _ in test_set:
    x = model.fprop(x, inference=True)
    test_ratio_predicitons += list(x.get()[1])
test_ratio_predicitons = test_ratio_predicitons[:50000]

In [61]:
# output result
all_ratio_predicitons = []
for x, _ in train_set:
    x = model.fprop(x, inference=True)
    all_ratio_predicitons += list(x.get()[1])
all_ratio_predicitons = all_ratio_predicitons[:1000000]

In [65]:
print(len(all_ratio_predicitons), len(test_ratio_predicitons))

(1000000, 50000)


In [66]:
pickle.dump((all_ratio_predicitons, test_ratio_predicitons), 
            open("all_ratio_predict_test_ratio_predict.pickle", "wb"), 
            protocol = pickle.HIGHEST_PROTOCOL)