In [1]:
import tensorflow as tf
import tensorlayer as tl

In [2]:
sess = tf.InteractiveSession()

In [3]:
# prepare data
X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset((-1,784), "MNIST_data")

Load or Download MNIST > MNIST_data
MNIST_data/train-images-idx3-ubyte.gz
MNIST_data/t10k-images-idx3-ubyte.gz


In [4]:
# define placeholder
x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_')

In [5]:
# define the network
network = tl.layers.InputLayer(x, name='input_layer')
network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
network = tl.layers.DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
network = tl.layers.DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2')
network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
# the softmax is implemented internally in tl.cost.cross_entropy(y, y_) to
# speed up computation, so we use identity here.
# see tf.nn.sparse_softmax_cross_entropy_with_logits()
network = tl.layers.DenseLayer(network, n_units=10, act = tf.identity, name='output_layer')

  tensorlayer:Instantiate InputLayer  input_layer: (?, 784)
  tensorlayer:Instantiate DropoutLayer drop1: keep: 0.800000 is_fix: False
  tensorlayer:Instantiate DenseLayer  relu1: 800, relu
  tensorlayer:Instantiate DropoutLayer drop2: keep: 0.500000 is_fix: False
  tensorlayer:Instantiate DenseLayer  relu2: 800, relu
  tensorlayer:Instantiate DropoutLayer drop3: keep: 0.500000 is_fix: False
  tensorlayer:Instantiate DenseLayer  output_layer: 10, identity


In [6]:
# define cost function and metric.
y = network.outputs
cost = tl.cost.cross_entropy(y, y_)
correct_prediction = tf.equal(tf.argmax(y, 1), y_)
acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
y_op = tf.argmax(tf.nn.softmax(y), 1)

In [7]:
# define the optimizer
train_params = network.all_params
train_op = tf.train.AdamOptimizer(learning_rate=0.0001, 
                                  beta1=0.9, 
                                  beta2=0.999,
                                  epsilon=1e-08, 
                                  use_locking=False).minimize(cost, var_list=train_params)

In [8]:
# initialize all variables in the session
tl.layers.initialize_global_variables(sess)

# print network information
network.print_params()
network.print_layers()

  param   0: (784, 800)      (mean: -9.653186134528369e-05, median: -0.00011573695519473404, std: 0.08799777179956436)   relu1/W:0
  param   1: (800,)          (mean: 0.0               , median: 0.0               , std: 0.0               )   relu1/b:0
  param   2: (800, 800)      (mean: -1.8094489860231988e-05, median: -1.3125110854161903e-05, std: 0.08800297975540161)   relu2/W:0
  param   3: (800,)          (mean: 0.0               , median: 0.0               , std: 0.0               )   relu2/b:0
  param   4: (800, 10)       (mean: 0.00020693676196970046, median: -0.00037986214738339186, std: 0.08725804090499878)   output_layer/W:0
  param   5: (10,)           (mean: 0.0               , median: 0.0               , std: 0.0               )   output_layer/b:0
  num of params: 1276810
  layer 0: Tensor("drop1/mul:0", shape=(?, 784), dtype=float32)
  layer 1: Tensor("relu1/Relu:0", shape=(?, 800), dtype=float32)
  layer 2: Tensor("drop2/mul:0", shape=(?, 800), dtype=float32)
  layer 3: 

In [13]:
# train the network
tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_,
             acc=acc, batch_size=5, n_epoch=20, print_freq=5,
             X_val=X_val, y_val=y_val, eval_train=False)

Start training the network ...
Epoch 1 of 20 took 91.714205s
   val loss: 0.102000
   val acc: 0.971200
Epoch 5 of 20 took 95.402296s
   val loss: 0.083910
   val acc: 0.975600
Epoch 10 of 20 took 95.267128s
   val loss: 0.073865
   val acc: 0.980200
Epoch 15 of 20 took 100.388078s
   val loss: 0.069307
   val acc: 0.980900
Epoch 20 of 20 took 93.259413s
   val loss: 0.066839
   val acc: 0.983200
Total training time: 1886.120926s


In [14]:
# evaluation
tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost)

Start testing the network ...
   test loss: 0.062311
   test acc: 0.984100


In [16]:
# save the network to .npz file
tl.files.save_npz(network.all_params , name='logs/model.npz')
sess.close()

Model is saved to: logs/model.npz
