In [1]:
# This notebook somewhat follows the tutorial from here:
# https://www.tensorflow.org/versions/0.6.0/tutorials/mnist/pros/index.html
#
# The current working directory is expected to be $PROJECT_ROOT/notebooks

import sys, os
if '../code' not in sys.path: sys.path.append('../code')
import mnist_downloader
import numpy as np
import regression

In [2]:
train, test = mnist_downloader.read_data_sets('../data', one_hot=True)
import tensorflow as tf
from data_set import DataSet
from tabulate import tabulate

tbl = tabulate([['train', train.x.shape, train.y.shape],
                ['test', test.x.shape, test.y.shape]],
               ['Data Set', 'x', 'y'], tablefmt='grid')
print('\n{}\n\nx {!s} y {!s}'.format(tbl, train.x.dtype, train.y.dtype))

Extracting ../data/train-images-idx3-ubyte.gz
Extracting ../data/train-labels-idx1-ubyte.gz
Extracting ../data/t10k-images-idx3-ubyte.gz
Extracting ../data/t10k-labels-idx1-ubyte.gz

+------------+--------------+-------------+
| Data Set   | x            | y           |
| train      | (60000, 784) | (60000, 10) |
+------------+--------------+-------------+
| test       | (10000, 784) | (10000, 10) |
+------------+--------------+-------------+

x float32 y float32


In [3]:
# TODO: switch to tf.reset_default_graph() and use with tf.Session() statements
with tf.Graph().as_default():
    x = tf.placeholder("float", shape=[None, 784]) # any batch size on flattened pixel values
    y = tf.placeholder("float", shape=[None, 10])

    sm = regression.SoftMax(x, y)

    global_step = tf.Variable(0, trainable=False) # don't train a global step value
    initial_learning_rate, decay_fraction, decay_period = 0.01, 0.95, 10000
    learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,
                                               decay_period, decay_fraction, staircase=True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate) \
                 .minimize(sm.cross_entropy, global_step=global_step)

    sess = tf.InteractiveSession()

    NUM_EPOCHS = 100
    NUM_FOLDS = 4
    UPDATE_COARSENESS = 5
    BATCH_SIZE = 50

    saver = tf.train.Saver()
    top_models = []

    # Cross validate on random restarts
    for fold, validate in enumerate(train.cross_validation(NUM_FOLDS), 1):
        print('Starting fold {}'.format(fold))
        sess.run(tf.initialize_all_variables()) # random restart
        best_in_fold_validation = np.inf
        best_in_fold_epoch = None
        best_in_fold_model = None
        for epoch in range(1, 1 + NUM_EPOCHS):
            for batch in train.new_epoch(BATCH_SIZE):
                train_step.run(feed_dict={x: batch[0], y: batch[1]})
            if epoch == NUM_EPOCHS or UPDATE_COARSENESS and epoch % UPDATE_COARSENESS == 0:
                valid_err = validate.multiclass_error(x, sm.y, y)
                print('  Epoch {:3} error {}'.format(epoch, valid_err))
                if valid_err > best_in_fold_validation: break
                best_in_fold_validation = valid_err
                best_in_fold_epoch = epoch
                best_in_fold_model = saver.save(sess, '/tmp/tf-mnist-batch-sgd-fold-{}.ckpt'.format(fold))
        top_models.append((fold, best_in_fold_epoch, best_in_fold_model))

    # We select the model based on the entire training data set rather than the error from the
    # cross-validation fold because that error was used for early stopping. It's unfair to
    # compare across the folds the same accuracy since some folds might be easier than others.
    # Doing this helps the test accuracy by about 0.5%.
    best_fold, best_epoch, best_model = None, None, None
    best_error = np.inf
    for fold, epoch, model in top_models:
        saver.restore(sess, model)
        error = train.multiclass_error(x, sm.y, y)
        if best_error > error:
            best_error, best_fold, best_epoch, best_model = error, fold, epoch, model

    print('Fold {}/{} epoch {}/{} with inside training dataset-error {}'
          .format(best_fold, NUM_FOLDS, best_epoch, NUM_EPOCHS, best_error))

    saver.restore(sess, best_model)

    print('Test error', test.multiclass_error(x, sm.y, y))
    sess.close()

Starting fold 1
  Epoch   5 error 0.08633333444595337
  Epoch  10 error 0.08560001850128174
  Epoch  15 error 0.08633333444595337
Starting fold 2
  Epoch   5 error 0.09353333711624146
  Epoch  10 error 0.08240002393722534
  Epoch  15 error 0.08939999341964722
Starting fold 3
  Epoch   5 error 0.09146666526794434
  Epoch  10 error 0.08893334865570068
  Epoch  15 error 0.08619999885559082
  Epoch  20 error 0.0878000259399414
Starting fold 4
  Epoch   5 error 0.07946664094924927
  Epoch  10 error 0.08373332023620605
Fold 3/4 epoch 15/100 with inside training dataset-error 0.07136666774749756
Test error 0.0795999765396


In [4]:
cnn_prefix = 'mnist-cnn-'
cnn_suffix = '.ckpt'
available_epochs = [x for x in os.listdir('../data')
                    if x.startswith(cnn_prefix) and x.endswith(cnn_suffix)]
print(available_epochs)

def extract_epoch_number(s):
    return int(s[len(cnn_prefix):-len(cnn_suffix)])

available_epochs = [extract_epoch_number(s) for s in available_epochs]
max_epoch = max(available_epochs or [None])
print('{} saved epoch files; using largest epoch ({}) as start'
      .format(len(available_epochs), max_epoch))

def epoch_to_filename(e):
    return '../data/' + cnn_prefix + str(e) + cnn_suffix


['mnist-cnn-35.ckpt']
1 saved epoch files; using largest epoch (35) as start


In [5]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')
# TODO: would be interesting to see how a max filter performs instead

with tf.Graph().as_default():
    x = tf.placeholder("float", shape=[None, 784]) # any batch size on flattened pixel values
    y = tf.placeholder("float", shape=[None, 10])
    
    x_image = tf.reshape(x, [-1,28,28,1])
    
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)
    
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)
    
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    
    # Don't drop out when testing by setting keep_prob to 1.0
    keep_prob = tf.placeholder("float")
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
    
    output = regression.SoftMax(h_fc1_drop, y)
    
    train_step = tf.train.AdamOptimizer(1e-4).minimize(output.cross_entropy)
    
    sess = tf.InteractiveSession()
    
    saver = tf.train.Saver()
    if max_epoch:
        saver.restore(sess, epoch_to_filename(max_epoch))
        print('Restoring CNN from epoch {}'.format(max_epoch))
    else:
        print('Starting CNN training from scratch')
        sess.run(tf.initialize_all_variables())
        max_epoch = 0
    
    BATCH_SIZE = 100
    NUM_EPOCHS = 35
    UPDATE_COARSENESS = 5
    for epoch in range(max_epoch + 1, NUM_EPOCHS + 1):
        for i, batch in enumerate(train.new_epoch(BATCH_SIZE), 1):
            tot_batches = train.size // BATCH_SIZE
            two_percent_done = i * 50 // tot_batches
            print(('\rEpoch {}/{} [' + two_percent_done * '-' + (50 - two_percent_done) * ' '
                   + '] {}/{}').format(epoch, NUM_EPOCHS, i, tot_batches), end='')
            train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
        
        print(']')
        name = saver.save(sess, epoch_to_filename(epoch))
        print('  Saved to', name)
        if epoch == NUM_EPOCHS or UPDATE_COARSENESS and epoch % UPDATE_COARSENESS == 0:
            err = train.multiclass_error(x, output.y, y, feed_dict={keep_prob:1.0})
            print('  Epoch {}/{} training error {}'.format(epoch, NUM_EPOCHS, err))
    
    print('Test error {}'.format(test.multiclass_error(x, output.y, y, feed_dict={keep_prob:1.0})))
    
    sess.close()

Restoring CNN from epoch 35
Test error 0.008800029754638672
