In [1]:
import numpy as np
import tensorflow as tf
import tensorlayer as tl
import gzip
import os
import time

  from ._conv import register_converters as _register_converters


In [2]:
tf.logging.set_verbosity(tf.logging.DEBUG)
tl.logging.set_verbosity(tl.logging.DEBUG)

In [3]:
def make_dataset(images, labels, num_epochs=1, shuffle_data_seed=0):
    ds1 = tf.data.Dataset.from_tensor_slices(images)
    ds2 = tf.data.Dataset.from_tensor_slices(np.array(labels, dtype=np.int64))
    dataset = tf.data.Dataset.zip((ds1, ds2))
    dataset = dataset.repeat(num_epochs).shuffle(buffer_size=10000, seed=shuffle_data_seed)
    return dataset

In [4]:
def model(x, is_train):
    with tf.variable_scope('mlp', reuse=tf.AUTO_REUSE):
        network = tl.layers.InputLayer(x, name='input')
        network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1', is_fix=True, is_train=is_train)
        network = tl.layers.DenseLayer(network, 800, tf.nn.relu, name='relu1')
        network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2', is_fix=True, is_train=is_train)
        network = tl.layers.DenseLayer(network, 800, tf.nn.relu, name='relu2')
        network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3', is_fix=True, is_train=is_train)
        network = tl.layers.DenseLayer(network, n_units=10, act=tf.identity, name='output')
    return network

In [5]:
def build_train(x, y_):
    net = model(x, is_train=True)
    cost = tl.cost.cross_entropy(net.outputs, y_, name='cost_train')
    accurate_prediction = tf.equal(tf.argmax(net.outputs, 1), y_)
    accuracy = tf.reduce_mean(tf.cast(accurate_prediction, tf.float32), name='accuracy_train')
    log_tensors = {'cost': cost, 'accuracy': accuracy}
    return net, cost, log_tensors

In [6]:
def build_validation(x, y_):
    net = model(x, is_train=False)
    cost = tl.cost.cross_entropy(net.outputs, y_, name='cost_test')
    accurate_prediction = tf.equal(tf.argmax(net.outputs, 1), y_)
    accuracy = tf.reduce_mean(tf.cast(accurate_prediction, tf.float32), name='accuracy_test')
    return net, [cost, accuracy]

In [7]:
def _load_mnist_dataset(shape, path, name='mnist', url='http://yann.lecun.com/exdb/mnist/'):
    """A generic function to load mnist-like dataset.

    Parameters:
    ----------
    shape : tuple
        The shape of digit images.
    path : str
        The path that the data is downloaded to.
    name : str
        The dataset name you want to use(the default is 'mnist').
    url : str
        The url of dataset(the default is 'http://yann.lecun.com/exdb/mnist/').
    """
    path = os.path.join(path, name)

    # Define functions for loading mnist-like data's images and labels.
    # For convenience, they also download the requested files if needed.
    def load_mnist_images(path, filename):
        #filepath = maybe_download_and_extract(filename, path, url)
        filepath=path+'/'+filename
#         logging.info(filepath)
        # Read the inputs in Yann LeCun's binary format.
        with gzip.open(filepath, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=16)
        # The inputs are vectors now, we reshape them to monochrome 2D images,
        # following the shape convention: (examples, channels, rows, columns)
        data = data.reshape(shape)
        # The inputs come as bytes, we convert them to float32 in range [0,1].
        # (Actually to range [0, 255/256], for compatibility to the version
        # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
        return data / np.float32(256)

    def load_mnist_labels(path, filename):
        #filepath = maybe_download_and_extract(filename, path, url)
        # Read the labels in Yann LeCun's binary format.
        filepath=path+'/'+filename
        with gzip.open(filepath, 'rb') as f:
            data = np.frombuffer(f.read(), np.uint8, offset=8)
        # The labels are vectors of integers now, that's exactly what we want.
        return data

    # Download and read the training and test set images and labels.
#     logging.info("Load or Download {0} > {1}".format(name.upper(), path))
    X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz')
    y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz')
    X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz')
    y_test = load_mnist_labels(path, 't10k-labels-idx1-ubyte.gz')

    # We reserve the last 10000 training examples for validation.
    X_train, X_val = X_train[:-10000], X_train[-10000:]
    y_train, y_val = y_train[:-10000], y_train[-10000:]

    # We just return all the arrays in order, as expected in main().
    # (It doesn't matter how we do this as long as we can read them again.)
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32)
    X_val = np.asarray(X_val, dtype=np.float32)
    y_val = np.asarray(y_val, dtype=np.int32)
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32)
    return X_train, y_train, X_val, y_val, X_test, y_test

In [8]:
X_train, y_train, X_val, y_val, X_test, y_test = _load_mnist_dataset(shape=(-1, 784),
                                path='/opt/algor/gongxf/python3_pj/nlp_practice/0_greatgod_series/1_tensorlayer/data/')

In [9]:
training_dataset = make_dataset(X_train, y_train)

In [10]:
trainer = tl.distributed.Trainer(
        build_training_func=build_train, training_dataset=training_dataset, optimizer=tf.train.AdamOptimizer,
        optimizer_args={'learning_rate': 0.001}, batch_size=500, prefetch_size=500,max_iteration=1000
        # validation_dataset=validation_dataset, build_validation_func=build_validation
    )

[TL] InputLayer  mlp/input: (?, 784)
[TL] DropoutLayer mlp/drop1: keep: 0.800000 is_fix: True
[TL] DenseLayer  mlp/relu1: 800 relu
[TL] DropoutLayer mlp/drop2: keep: 0.500000 is_fix: True
[TL] DenseLayer  mlp/relu2: 800 relu
[TL] DropoutLayer mlp/drop3: keep: 0.500000 is_fix: True
[TL] DenseLayer  mlp/output: 10 No Activation
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [11]:
start_time=time.time()
c=0
while not trainer.session.should_stop():
        try:
            # Run a training step synchronously.
            trainer.train_on_batch()
            # TODO: do whatever you like to the training session.
#             print("22222",trainer.session.should_stop())
            c+=1
        except tf.errors.OutOfRangeError:
            # The dataset would throw the OutOfRangeError when it reaches the end
            break
print("use_time",time.time()-start_time,c)

INFO:tensorflow:accuracy = 0.112, cost = 9.657014, global_step = 1
INFO:tensorflow:accuracy = 0.174, cost = 6.6361055, global_step = 2 (0.076 sec)
INFO:tensorflow:accuracy = 0.168, cost = 6.464143, global_step = 3 (0.083 sec)
INFO:tensorflow:accuracy = 0.246, cost = 5.466085, global_step = 4 (0.074 sec)
INFO:tensorflow:accuracy = 0.29, cost = 4.6469584, global_step = 5 (0.065 sec)
INFO:tensorflow:accuracy = 0.32, cost = 4.0450735, global_step = 6 (0.069 sec)
INFO:tensorflow:accuracy = 0.352, cost = 3.3064048, global_step = 7 (0.061 sec)
INFO:tensorflow:accuracy = 0.388, cost = 3.0487711, global_step = 8 (0.080 sec)
INFO:tensorflow:accuracy = 0.454, cost = 2.759209, global_step = 9 (0.068 sec)
INFO:tensorflow:accuracy = 0.46, cost = 2.609946, global_step = 10 (0.074 sec)
INFO:tensorflow:accuracy = 0.49, cost = 2.3225017, global_step = 11 (0.073 sec)
INFO:tensorflow:accuracy = 0.54, cost = 1.9011089, global_step = 12 (0.074 sec)
INFO:tensorflow:accuracy = 0.492, cost = 2.1626005, global_

In [13]:
tl.distributed.Trainer?

In [14]:
import os

In [16]:
os.environ.get('HOROVOD_MPICXX_SHOW', 'mpicxx -show')

'mpicxx -show'

In [None]:
X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 784))