In [1]:
%pylab inline
import tensorflow as tf
import re

Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 22 days


Populating the interactive namespace from numpy and matplotlib


In [2]:
from fuel.datasets.cifar10 import CIFAR10
from fuel.transformers import ScaleAndShift, Cast, Flatten, Mapping
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme, ShuffledScheme

In [3]:
CIFAR10.default_transformers = (
    (ScaleAndShift, [2.0 / 255.0, -1], {'which_sources': 'features'}),
    (Cast, [np.float32], {'which_sources': 'features'})
)


cifar10_train = CIFAR10(("train",), subset=slice(None, 40000))
cifar10_train_stream = DataStream.default_stream(
    cifar10_train,
    iteration_scheme=ShuffledScheme(cifar10_train.num_examples, 10)
)

cifar10_validation = CIFAR10(("train",), subset=slice(40000, None))
cifar10_validation_stream = DataStream.default_stream(
    cifar10_validation, 
    iteration_scheme=SequentialScheme(cifar10_validation.num_examples, 100)
)

cifar10_test = CIFAR10(("test",))
cifar10_test_stream = DataStream.default_stream(
    cifar10_test,
    iteration_scheme=SequentialScheme(cifar10_test.num_examples, 100)
)

In [4]:
print "The streams return batches containing %s" % (cifar10_train_stream.sources,)

print "Each trainin batch consits of a tuple containing:"
for element in next(cifar10_train_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)
    
print "Validation/test batches consits of tuples containing:"
for element in next(cifar10_test_stream.get_epoch_iterator()):
    print " - an array of size %s containing %s" % (element.shape, element.dtype)

The streams return batches containing (u'features', u'targets')
Each trainin batch consits of a tuple containing:
 - an array of size (10, 3, 32, 32) containing float32
 - an array of size (10, 1) containing uint8
Validation/test batches consits of tuples containing:
 - an array of size (100, 3, 32, 32) containing float32
 - an array of size (100, 1) containing uint8


In [5]:
def affine_layer(X, num_hidden, activation=None, name=None):
    name = name or "affine_layer"
    with tf.variable_scope(None, default_name=name):
        W = tf.get_variable('W', (X.shape[1].value, num_hidden), 'float32',
                            initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('b', (1, num_hidden), 'float32',
                            initializer=tf.zeros_initializer())
        Y = tf.matmul(X, W) + b
        if activation:
            Y = activation(Y)
        return Y

def conv2d_layer(X, filter_shape, num_filters, activation=None, name=None):
    name = name or "conv2d_layer"
    num_channels = X.shape[3].value
    with tf.variable_scope(None, default_name=name):
        F = tf.get_variable('F', (filter_shape[0], filter_shape[1], num_channels, num_filters), 'float32',
                            initializer=tf.contrib.layers.xavier_initializer_conv2d())
        b = tf.get_variable('b', (1, 1, 1, num_filters), 'float32',
                            initializer=tf.zeros_initializer())
        Y = tf.nn.conv2d(X, F, (1, 1, 1, 1), padding='VALID') + b
        if activation:
            Y = activation(Y)
        return Y

In [6]:
X = tf.placeholder(shape=(None, 3, 32, 32), dtype=np.float32, name='X')
X_NHWC = tf.transpose(X, [0, 2, 3, 1])
Y = tf.placeholder(shape=(None, 1), dtype=np.int32, name='Y')

X_flat = tf.reshape(X, (-1, np.prod(X.shape.as_list()[1:])))

In [7]:
if 0:
    L = X_flat
else:
    L = X_NHWC
    print ("Input shape %s" % (L.shape,))
    for filter_size, num_filters in [(5, 32), (3, 64), (3, 128)]:
        L = conv2d_layer(L, (filter_size, filter_size), num_filters, activation=tf.nn.relu)
        print ("After conv shape %s" % (L.shape,))
        L = tf.nn.max_pool(L, (1, 2, 2, 1), (1, 2, 2, 1), padding='VALID')
        L = tf.nn.dropout(L, keep_prob=0.8)
        print ("After pool shape %s" % (L.shape,))
    L = tf.reshape(L, (-1, np.prod(L.shape.as_list()[1:])))
    print ("After flattening %s" % (L.shape,))
    
for layer_dim in [1000, 1000]:
    L = affine_layer(L, layer_dim, activation=tf.nn.relu)
    print ("After affine %s" % (L.shape,))
    L = tf.nn.dropout(L, keep_prob=0.8)
L = affine_layer(L, 10)

per_example_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=L, labels=tf.reshape(Y, (-1,)))
xentropy_loss = tf.reduce_mean(per_example_loss)

weight_decay_loss = 0.0
for V in tf.trainable_variables():
    if re.match(V.name, ".*/W.*"):
        weight_decay_loss += 1.0e-5 * tf.reduce_sum(V**2)


batch_loss = xentropy_loss + weight_decay_loss
classification = tf.argmax(L, axis=1)

Input shape (?, 32, 32, 3)
After conv shape (?, 28, 28, 32)
After pool shape (?, 14, 14, 32)
After conv shape (?, 12, 12, 64)
After pool shape (?, 6, 6, 64)
After conv shape (?, 4, 4, 128)
After pool shape (?, 2, 2, 128)
After flattening (?, 512)
After affine (?, 1000)
After affine (?, 1000)


In [9]:
learning_rate = tf.get_variable('learning_rate', shape=(), dtype='float32', trainable=False)
global_step = tf.get_variable('global_step', shape=(), dtype='int32', trainable=False)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(batch_loss)
train_op = tf.group(train_op, tf.assign_add(global_step, 1))
initialize_op = tf.global_variables_initializer()

In [10]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(initialize_op)

train_loss_history = []
valid_loss_history = []
epoch = 0

In [11]:
sess.run(learning_rate.assign(1e-4))

9.9999997e-05

In [12]:
# train for a while
global_step_v = sess.run(global_step)

while global_step_v < 100000:
    epoch += 1
    
    for batch_X, batch_Y in cifar10_train_stream.get_epoch_iterator():
        feed_dict = {X: batch_X, Y: batch_Y}
        _, global_step_v, loss_v = sess.run([train_op, global_step, batch_loss], feed_dict=feed_dict)
        if (global_step_v % 1000) == 1:
            train_loss_history.append((epoch, global_step_v, loss_v,))
            print ("epoch: %d, step: %d, loss: %g" % (epoch, global_step_v, loss_v,))
    
    test_stats = []
    for batch_X, batch_Y in cifar10_validation_stream.get_epoch_iterator():
        feed_dict = {X: batch_X, Y: batch_Y}
        classification_v, batch_loss_v = sess.run([classification, batch_loss], feed_dict=feed_dict)
        batch_accuracy = np.mean(classification_v == batch_Y[:,0])
        test_stats.append((batch_accuracy, batch_loss_v)) 
        
    valid_acc, valid_batch_loss = np.mean(test_stats, axis=0)
    print ("epoch: %d, step: %d, valid_loss: %g, valid_acc: %f" % (epoch, global_step_v, valid_batch_loss, valid_acc))
    valid_loss_history.append((epoch, global_step_v, valid_batch_loss, valid_acc))

epoch: 1, step: 1, loss: 2.34483
epoch: 1, step: 1001, loss: 1.90192
epoch: 1, step: 2001, loss: 1.70728
epoch: 1, step: 3001, loss: 1.46581
epoch: 1, step: 4000, valid_loss: 1.50077, valid_acc: 0.457400
epoch: 2, step: 4001, loss: 2.03333
epoch: 2, step: 5001, loss: 1.88278
epoch: 2, step: 6001, loss: 1.57185
epoch: 2, step: 7001, loss: 1.17679
epoch: 2, step: 8000, valid_loss: 1.29506, valid_acc: 0.539600
epoch: 3, step: 8001, loss: 1.03921
epoch: 3, step: 9001, loss: 1.01623
epoch: 3, step: 10001, loss: 1.02932
epoch: 3, step: 11001, loss: 0.895262
epoch: 3, step: 12000, valid_loss: 1.20752, valid_acc: 0.575800
epoch: 4, step: 12001, loss: 1.53113
epoch: 4, step: 13001, loss: 1.33286
epoch: 4, step: 14001, loss: 1.13804
epoch: 4, step: 15001, loss: 1.0964
epoch: 4, step: 16000, valid_loss: 1.15056, valid_acc: 0.593900
epoch: 5, step: 16001, loss: 0.706062
epoch: 5, step: 17001, loss: 0.725947
epoch: 5, step: 18001, loss: 0.873986
epoch: 5, step: 19001, loss: 1.36948
epoch: 5, step: 

In [13]:
test_accuracy = []
for batch_X, batch_Y in cifar10_test_stream.get_epoch_iterator():
    feed_dict = {X: batch_X, Y: batch_Y}
    classification_v = sess.run(classification, feed_dict=feed_dict)
    batch_accuracy = np.mean(classification_v == batch_Y[:, 0])
    test_accuracy.append(batch_accuracy)
    
print("test accuracy %g" % np.mean(test_accuracy))

test accuracy 0.7205


In [7]:
x = tf.placeholder(tf.float32, shape=[None, 3072])
y_ = tf.placeholder(tf.int64, shape=[None, 10])
W = tf.Variable(tf.zeros([3072, 10]))
b = tf.Variable(tf.zeros([10]))
y = tf.matmul(x, W) + b

In [8]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

In [9]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [10]:
conv1_f = 32
conv2_f = 64
# conv3_f = 128
fc1_w = 1024
fc2_w = 10

af_conv = 8

In [11]:
x_image = tf.reshape(x, [-1, 32, 32, 3])

In [12]:
W_conv1 = weight_variable([5, 5, 3, conv1_f])
b_conv1 = bias_variable([conv1_f])

h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
print h_pool1.shape

(?, 16, 16, 32)


In [13]:
W_conv2 = weight_variable([5, 5, conv1_f, conv2_f])
b_conv2 = bias_variable([conv2_f])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
print h_pool2.shape

(?, 8, 8, 64)


In [14]:
# W_conv3 = weight_variable([5, 5, conv2_f, conv3_f])
# b_conv3 = bias_variable([conv3_f])

# h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
# h_pool3 = max_pool_2x2(h_conv3)

In [15]:
W_fc1 = weight_variable([af_conv * af_conv * conv2_f, fc1_w])
b_fc1 = bias_variable([fc1_w])

h_pool2_flat = tf.reshape(h_pool2, [-1, af_conv * af_conv * conv2_f])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

In [16]:
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

In [17]:
W_fc2 = weight_variable([fc1_w, fc2_w])
b_fc2 = bias_variable([fc2_w])

y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)

In [18]:
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)
)
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [19]:
# counter = 0

# try:
#     for b in cifar10_train_stream.get_epoch_iterator():
#         print "batch 1"
#         while 1:
#             print 'n', next(b)
#         counter += 1
#     print counter
# except:
#     print counter, sys.exc_info()[0]

In [20]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [33]:
validation_errors = []

for i in range(10000):
    batch = next(cifar10_train_stream.get_epoch_iterator())
    X = next(batch)
    Y = next(batch)
    # Y = np.array(np.arange(10, 100) == Y[0], dtype=np.uint8)
    Y = np.array(np.arange(10) == Y, dtype=np.uint8)

    if i % 100 == 0:
        train_accuracy = accuracy.eval(
            feed_dict={x: X, y_: Y, keep_prob: 1.0}
        )
        print("step %d, training accuracy %g" % (i, train_accuracy))
        
        val_acc = []
        for v_X, v_Y in cifar10_validation_stream.get_epoch_iterator():
            v_Y = np.array(np.arange(10) == v_Y, dtype=np.uint8)
            acc = accuracy.eval(feed_dict={x: v_X, y_: v_Y, keep_prob: 1.0})
            val_acc.append(acc)
        print("     %d, valid accuracy: %g" % (i, np.mean(val_acc)))
        
    train_step.run(feed_dict={x: X, y_: Y, keep_prob: 0.5})
    
    

# print("test accuracy %g" % accuracy.eval(
#     feed_dict={x: cifar10.test.images, y_: cifar10.test.labels}
# ))

step 0, training accuracy 0.96
     0, valid accuracy: 0.607
step 100, training accuracy 0.76
     100, valid accuracy: 0.6015
step 200, training accuracy 0.8
     200, valid accuracy: 0.6014
step 300, training accuracy 0.84
     300, valid accuracy: 0.6061
step 400, training accuracy 0.76
     400, valid accuracy: 0.6038
step 500, training accuracy 0.8
     500, valid accuracy: 0.61
step 600, training accuracy 0.8
     600, valid accuracy: 0.606
step 700, training accuracy 0.8
     700, valid accuracy: 0.6031
step 800, training accuracy 0.76
     800, valid accuracy: 0.6046
step 900, training accuracy 0.76
     900, valid accuracy: 0.606
step 1000, training accuracy 0.84
     1000, valid accuracy: 0.6013
step 1100, training accuracy 0.92
     1100, valid accuracy: 0.6031
step 1200, training accuracy 0.96
     1200, valid accuracy: 0.6054
step 1300, training accuracy 0.8
     1300, valid accuracy: 0.6063
step 1400, training accuracy 0.92
     1400, valid accuracy: 0.6037
step 1500, tra

In [36]:
test_accuracy = []
for X, Y in cifar10_test_stream.get_epoch_iterator():
    Y = np.array(np.arange(10) == Y, dtype=np.uint8)
    test_acc = accuracy.eval(feed_dict={x: X, y_: Y, keep_prob: 1.0})
    # print("step %d, test accuracy %g" % (len(test_accuracy), test_acc))
    test_accuracy.append(test_acc)
    
print("test accuracy %g" % np.mean(test_accuracy))

NameError: name 'accuracy' is not defined

In [None]:
# subplot(2,1,1)
# train_loss = np.array(train_loss)
# semilogy(train_loss[:,0], train_loss[:,1], label='batch train loss')
# legend()

# subplot(2,1,2)
# train_erros = np.array(train_erros)
# plot(train_erros[:,0], train_erros[:,1], label='batch train error rate')
# validation_errors = np.array(validation_errors)
# plot(validation_errors[:,0], validation_errors[:,1], label='validation error rate', color='r')
# ylim(0,0.2)
# legend()