In [1]:
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf

# Download MNIST data
mnist = input_data.read_data_sets('/tmp/data/', one_hot=True)
sess = tf.InteractiveSession()
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
x_image = tf.reshape(x, [-1,28,28,1])

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [2]:
def train_mnist(y, iterations=1000, batch_size=100):
    # Define loss and optimizer
    cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
    train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

    # Train the model
    tf.initialize_all_variables().run()
    for i in range(iterations):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        train_step.run({x: batch_xs, y_: batch_ys})

    # Test the model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    validation_accuracy = accuracy.eval({x: mnist.validation.images, y_: mnist.validation.labels})
    test_accuracy = accuracy.eval({x: mnist.test.images, y_: mnist.test.labels})
    return validation_accuracy, test_accuracy

In [3]:
# Grid search for hyper parameters
import itertools
import sys

def optimize_mnist(get_model, *hyperparameters):
    print('validation, test, hyperparameter')
    best = None
    
    for hyperparameter in itertools.product(*hyperparameters):
        model = get_model(*hyperparameter)
        validation_accuracy, test_accuracy = train_mnist(model)
        print(validation_accuracy, test_accuracy, hyperparameter)
        sys.stdout.flush()
        
        if best is None or validation_accuracy > best[0]:
            best = (validation_accuracy, test_accuracy, hyperparameter)
    print('best setting')
    print(*best)

In [4]:
# "Deep MNIST for Experts" tutorial
stddev = 0.01
b_init = 0.01

W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=stddev))
b_conv1 = tf.Variable(tf.constant(b_init, shape=[32]))
o_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], padding='SAME')

h_conv1 = tf.nn.relu(o_conv1 + b_conv1)
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=stddev))
b_conv2 = tf.Variable(tf.constant(b_init, shape=[64]))
o_conv2 = tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], padding='SAME')

h_conv2 = tf.nn.relu(o_conv2 + b_conv2)
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=stddev))
b_fc1 = tf.Variable(tf.constant(b_init, shape=[1024]))

h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

W_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=stddev))
b_fc2 = tf.Variable(tf.constant(b_init, shape=[10]))

y = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)

train_mnist(y)

(0.98479998, 0.98710001)

In [6]:
# Simple convolution + softmax layer with various settings
stddev = 0.01
b_init = 0.01

def simple_cnn(channel, patch, stride, pool):
    W_conv = tf.Variable(tf.truncated_normal([patch, patch, 1, channel], stddev=stddev))
    b_conv = tf.Variable(tf.constant(b_init, shape=[channel]))
    o_conv = tf.nn.conv2d(x_image, W_conv, strides=[1, stride, stride, 1], padding='SAME')
    h_conv = tf.nn.relu(o_conv + b_conv)
    h_pool = tf.nn.max_pool(h_conv, ksize=[1, pool, pool, 1], strides=[1, pool, pool, 1], padding='SAME')
    h_size = int(28 / stride / pool) ** 2 * channel
    h_pool_flat = tf.reshape(h_pool, [-1, h_size])

    W_fc = tf.Variable(tf.truncated_normal([h_size, 10], stddev=stddev))
    b_fc = tf.Variable(tf.constant(b_init, shape=[10]))

    y = tf.nn.softmax(tf.matmul(h_pool_flat, W_fc) + b_fc)
    return y

channel = [16, 32, 64]
patch = [5, 10, 20, 27]
stride = [1]
pool = [1, 2, 4]
optimize_mnist(simple_cnn, channel, patch, stride, pool)

validation, test, hyperparameter
0.9726 0.9766 (16, 5, 1, 1)
0.968 0.9711 (16, 5, 1, 2)
0.9614 0.96 (16, 5, 1, 4)
0.9808 0.9813 (16, 10, 1, 1)
0.9786 0.9811 (16, 10, 1, 2)
0.974 0.9767 (16, 10, 1, 4)
0.986 0.986 (16, 20, 1, 1)
0.9832 0.9826 (16, 20, 1, 2)
0.9802 0.9811 (16, 20, 1, 4)
0.9842 0.9819 (16, 27, 1, 1)
0.9816 0.9836 (16, 27, 1, 2)
0.9798 0.9792 (16, 27, 1, 4)
0.9766 0.9788 (32, 5, 1, 1)
0.9766 0.9771 (32, 5, 1, 2)
0.9724 0.9734 (32, 5, 1, 4)
0.9844 0.9811 (32, 10, 1, 1)
0.9796 0.9799 (32, 10, 1, 2)
0.9788 0.9776 (32, 10, 1, 4)
0.9844 0.9861 (32, 20, 1, 1)
0.9852 0.9852 (32, 20, 1, 2)
0.986 0.9822 (32, 20, 1, 4)
0.9836 0.9845 (32, 27, 1, 1)
0.9848 0.9864 (32, 27, 1, 2)
0.9838 0.9837 (32, 27, 1, 4)
0.9806 0.9815 (64, 5, 1, 1)
0.9786 0.9818 (64, 5, 1, 2)
0.9766 0.9766 (64, 5, 1, 4)
0.9836 0.9851 (64, 10, 1, 1)
0.9858 0.9855 (64, 10, 1, 2)
0.9816 0.985 (64, 10, 1, 4)
0.9818 0.9817 (64, 20, 1, 1)
0.9842 0.9861 (64, 20, 1, 2)
0.9864 0.9873 (64, 20, 1, 4)
0.9846 0.9855 (64, 27, 1, 1