## Multi-layer Perceptron

In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

### Load data

In [2]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


### Helper functions

In [3]:
def init_weights(shape, name):
    return tf.Variable(tf.random_normal(shape, stddev=0.01), name=name)

# This network is the same as the previous one except with an extra hidden layer + dropout
def model(X, w_h1, w_h2, w_o, p_keep_input, p_keep_hidden):
    # Add layer name scopes for better graph visualization
    with tf.name_scope("layer1"):
        X = tf.nn.dropout(X, p_keep_input)
        h = tf.nn.relu(tf.matmul(X, w_h1))
    with tf.name_scope("layer2"):
        h = tf.nn.dropout(h, p_keep_hidden)
        h2 = tf.nn.relu(tf.matmul(h, w_h2))
    with tf.name_scope("layer3"):
        h2 = tf.nn.dropout(h2, p_keep_hidden)
        return tf.matmul(h2, w_o)

### Build graph

In [4]:
X = tf.placeholder("float", [None, 784], name="X")
Y = tf.placeholder("float", [None, 10], name="Y")

w_h1 = init_weights([784, 625], "w_h1")
w_h2 = init_weights([625, 625], "w_h2")
w_o = init_weights([625, 10], "w_o")

# Add histogram summaries for weights
tf.histogram_summary("w_h1", w_h1)
tf.histogram_summary("w_h2", w_h2)
tf.histogram_summary("w_o", w_o)

p_keep_input = tf.placeholder("float", name="p_keep_input")
p_keep_hidden = tf.placeholder("float", name="p_keep_hidden")
py_x = model(X, w_h1, w_h2, w_o, p_keep_input, p_keep_hidden)

with tf.name_scope("cost"):
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(py_x, Y))
    train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)
    # Add scalar summary for cost
    tf.scalar_summary("cost", cost)

with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(Y, 1), tf.argmax(py_x, 1)) # Count correct predictions
    acc_op = tf.reduce_mean(tf.cast(correct_pred, "float")) # Cast boolean to float to average
    # Add scalar summary for accuracy
    tf.scalar_summary("accuracy", acc_op)

### Launch graph

In [5]:
with tf.Session() as sess:
    # create a log writer. run 'tensorboard --logdir=./logs/multi-layer-perceptron'
    writer = tf.train.SummaryWriter("./logs/multi-layer-perceptron", sess.graph)
    merged = tf.merge_all_summaries()

    # you need to initialize all variables
    tf.initialize_all_variables().run()

    for i in range(100):
        for start, end in zip(range(0, len(trX), 128), range(128, len(trX)+1, 128)):
            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end],
                                          p_keep_input: 0.8, p_keep_hidden: 0.5})
        summary, acc = sess.run([merged, acc_op], feed_dict={X: teX, Y: teY,
                                          p_keep_input: 1.0, p_keep_hidden: 1.0})
        writer.add_summary(summary, i)
        print(i, acc)

(0, 0.93290001)
(1, 0.96249998)
(2, 0.96890002)
(3, 0.97399998)
(4, 0.97530001)
(5, 0.9745)
(6, 0.97750002)
(7, 0.97759998)
(8, 0.9806)
(9, 0.97930002)
(10, 0.98000002)
(11, 0.98110002)
(12, 0.98199999)
(13, 0.98250002)
(14, 0.98150003)
(15, 0.98110002)
(16, 0.98280001)
(17, 0.98400003)
(18, 0.98329997)
(19, 0.98259997)
(20, 0.98199999)
(21, 0.98339999)
(22, 0.98140001)
(23, 0.98280001)
(24, 0.98180002)
(25, 0.98189998)
(26, 0.98220003)
(27, 0.98360002)
(28, 0.98379999)
(29, 0.98400003)
(30, 0.98439997)
(31, 0.98400003)
(32, 0.98199999)
(33, 0.98379999)
(34, 0.98409998)
(35, 0.98320001)
(36, 0.98470002)
(37, 0.98339999)
(38, 0.98280001)
(39, 0.98509997)
(40, 0.98439997)
(41, 0.98269999)
(42, 0.98269999)
(43, 0.98449999)
(44, 0.98439997)
(45, 0.98409998)
(46, 0.98329997)
(47, 0.9835)
(48, 0.98360002)
(49, 0.98430002)
(50, 0.98400003)
(51, 0.98589998)
(52, 0.98369998)
(53, 0.98269999)
(54, 0.98509997)
(55, 0.98360002)
(56, 0.9842)
(57, 0.98479998)
(58, 0.9842)
(59, 0.98180002)
(60, 0.984