In [1]:
import tensorflow as tf
import os
import sys
import numpy as np

import math
import tarfile
from six.moves import urllib

In [2]:
def maybe_download_and_extract(dest_directory, url):
    """
    Download the dataset and extract the data
    """
    
    if not os.path.exists(dest_directory):
        os.makedirs(dest_directory)
    file_name = 'cifar-10-binary.tar.gz'
    file_path = os.path.join(dest_directory, file_name)
    # if have not downloaded yet
    if not os.path.exists(file_path):
        def _progress(count, block_size, total_size):
            sys.stdout.write('\r%.1f%%' % 
                (float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()  # flush the buffer

        print('>> Downloading %s ...' % file_name)
        file_path, _ = urllib.request.urlretrieve(url, file_path, _progress)
        file_size = os.stat(file_path).st_size
        print('\r>> Total %d bytes' % file_size)
    extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
    if not os.path.exists(extracted_dir_path):
        # Open for reading with gzip compression, then extract all
        tarfile.open(file_path, 'r:gz').extractall(dest_directory)
    print('>> Done')



In [3]:
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
DEST_DIRECTORY = 'data/cifar10'
DATA_DIRECTORY = DEST_DIRECTORY + '/cifar-10-batches-bin'

IMAGE_HEIGHT = 32
IMAGE_WIDTH = 32
IMAGE_DEPTH = 3
IMAGE_SIZE_CROPPED = 24

BATCH_SIZE = 128
NUM_CLASSES = 10 
LABEL_BYTES = 1
IMAGE_BYTES = 32 * 32 * 3

NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

# download it
maybe_download_and_extract(DEST_DIRECTORY, DATA_URL)

>> Done


In [4]:
class CNN_Model(object):
    def __init__(self, model_hps):
        self.image_size = model_hps.image_size
        self.batch_size = model_hps.batch_size
        self.num_classes = model_hps.num_classes
        self.num_training_example = model_hps.num_training_example
        self.num_epoch_per_decay = model_hps.num_epoch_per_decay
        self.init_lr = model_hps.init_lr  # initial learn rate
        self.moving_average_decay = model_hps.moving_average_decay
        self.ckpt_dir = model_hps.ckpt_dir

        self.build_model()

    def build_model(self):
        # op for training
        self.global_step = tf.contrib.framework.get_or_create_global_step()

        with tf.variable_scope('model'):
            self.images = tf.placeholder(tf.float32,[self.batch_size, self.image_size, self.image_size, 3]) 
            self.labels = tf.placeholder(tf.int32)

            self.logits = self.inference(self.images)
            self.top_k_op = tf.nn.in_top_k(self.logits, self.labels, 1) 
            self.total_loss = self.loss(self.logits, self.labels)
            self.total_accu = self.accuracy(self.logits, self.labels)
            self.train_op = self.train(self.total_loss, self.global_step)
            

    def _variable_on_cpu(self, name, shape, initializer):
        with tf.device('/cpu:0'):
            var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)

        return var

    def _variable_with_weight_decay(self, name, shape, stddev, wd=0.0):
        """ Helper to create an initialized Variable with weight decay.
            Note that the Variable is initialized with a truncated normal 
            distribution. A weight decay is added only if one is specified.
            -----
            Args:
                name: 
                    name of the variable
                shape: 
                    a list of ints
                stddev: 
                    standard deviation of a truncated Gaussian
                wd: 
                    add L2Loss weight decay multiplied by this float. If None, weight
                    decay is not added for this Variable.
            Returns:
                Variable Tensor
        """
        initializer = tf.truncated_normal_initializer(
            stddev=stddev, dtype=tf.float32)
        var = self._variable_on_cpu(name, shape, initializer)
        # deal with weight decay
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)
        return var

    def _conv_block(self, inp, scope, kernel_width, kernel_height, inp_channel, out_channel, strides = [1, 1, 1, 1], padding='SAME'):
        with tf.variable_scope(scope) as scope:
            kernel = self._variable_with_weight_decay('weights', [kernel_width, kernel_width, inp_channel, out_channel], 5e-2)
            biases = self._variable_on_cpu('bias', [out_channel], tf.constant_initializer(0.0))

            conv = tf.nn.conv2d(inp, kernel, strides=strides, padding=padding)
            pre_activation = tf.nn.bias_add(conv, biases)
            return tf.nn.relu(pre_activation, name=scope.name)

    def _fully_connected_layer(self, inp, scope, in_dim, out_dim, relu = True):
        with tf.variable_scope(scope) as scope:
            weights = self._variable_with_weight_decay('weights', [in_dim, out_dim], 0.04, 0.004)
            biases = self._variable_on_cpu('biases', [out_dim], tf.constant_initializer(0.1))
            if relu:
                return tf.nn.relu(tf.matmul(inp, weights) + biases, name=scope.name)
            else:
                return tf.matmul(inp, weights) + biases

    def inference(self, images):
        """ build the model
            -----
            Args:
                images with shape [batch_size,24,24,3]
            Return:
                logits with shape [batch_size,10]
        """
        conv_1 = self._conv_block(images, 'conv_1', 5, 5, 3, 64)
        # pool_1
        pool_1 = tf.nn.max_pool(conv_1,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='SAME',name='pool_1')
        # norm_1 (local_response_normalization)
        norm_1 = tf.nn.lrn(pool_1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm_1')

        # conv2
        conv_2 = self._conv_block(norm_1, 'conv_2', 5, 5, 64, 64)
        # norm2
        norm_2 = tf.nn.lrn(conv_2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm_2')
        # pool2
        pool_2 = tf.nn.max_pool(norm_2,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='SAME',name='pool_2')

        # Flatten feature maps before fully connected layers
        flat_features = tf.reshape(pool_2, [self.batch_size, -1])
        dim = flat_features.get_shape()[1].value
        # FC_1 (fully-connected layer)
        fc_1 = self._fully_connected_layer(flat_features, 'fc1', dim, 384)

        # FC_2
        fc_2 = self._fully_connected_layer(fc_1, 'fc2', 384, 192)

        logits = self._fully_connected_layer(fc_2, 'softmax_linear', 192, self.num_classes, relu = False)
        return logits

    def loss(self, logits, labels):
        '''calculate the loss'''
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=labels, logits=logits, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)
        # The total loss is defined as the cross entropy loss plus all of the weight
        # decay terms (L2 loss).
        return tf.add_n(tf.get_collection('losses'), name='total_loss')

    def train(self, total_loss, global_step):
        '''Return training operation of one step'''
        num_batches_per_epoch = self.num_training_example / self.batch_size
        decay_steps = int(num_batches_per_epoch * self.num_epoch_per_decay)
        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(
            self.init_lr, global_step, decay_steps, decay_rate=0.1, staircase=True)
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        # Track the moving averages of all trainable variables.
        # This step just records the moving average weights but not uses them
        ema = tf.train.ExponentialMovingAverage(self.moving_average_decay,
                                                global_step)
        self.ema = ema
        variables_averages_op = ema.apply(tf.trainable_variables())
        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')
        return train_op
    
    def accuracy(self, logits, labels):
        labels = tf.cast(labels, tf.int64)
        print(labels)
        pred = tf.argmax(tf.nn.softmax(logits),1)
        print(pred)
        accuracy = tf.contrib.metrics.accuracy(pred,labels)
        return accuracy

In [5]:
def cifar10_record_distort_parser(record):
    ''' Parse the record into label, cropped and distorted image
    -----
    Args:
        record: 
            a record containing label and image.
    Returns:
        label: 
            the label in the record.
        image: 
            the cropped and distorted image in the record.
    '''
    record_uint8 = tf.decode_raw(record, tf.uint8)
    # get the label and cast it to int32
    label = tf.cast(
      tf.strided_slice(record_uint8, [0], [LABEL_BYTES]), tf.int32)
    # [depth, height, width], uint8
    depth_major = tf.reshape(
      tf.strided_slice(record_uint8, [LABEL_BYTES],
                       [LABEL_BYTES + IMAGE_BYTES]),
      [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
    # change to [height, width, depth], uint8
    image = tf.transpose(depth_major, [1, 2, 0])
    
    # (7) image preprocessing for training
    height = IMAGE_SIZE_CROPPED
    width = IMAGE_SIZE_CROPPED
    float_image = tf.cast(image, tf.float32)
    distorted_image = tf.random_crop(float_image, [height, width, 3])
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(
      distorted_image, lower=0.2, upper=1.8)
    # standardization: subtract off the mean and divide by the variance of the pixels
    distorted_image = tf.image.per_image_standardization(distorted_image)
    # Set the shapes of tensors.
    distorted_image.set_shape([height, width, 3])
    
    return distorted_image, label


def cifar10_record_crop_parser(record):
    ''' Parse the record into label, cropped image
    -----
    Args:
        record: 
            a record containing label and image.
    Returns:
        label: 
            the label in the record.
        image: 
            the cropped image in the record.
    '''
    record_uint8 = tf.decode_raw(record, tf.uint8)
    # get the label and cast it to int32
    label = tf.cast(
      tf.strided_slice(record_uint8, [0], [LABEL_BYTES]), tf.int32)
    # [depth, height, width], uint8
    depth_major = tf.reshape(
      tf.strided_slice(record_uint8, [LABEL_BYTES],
                       [LABEL_BYTES + IMAGE_BYTES]),
      [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
    # change to [height, width, depth], uint8
    image = tf.transpose(depth_major, [1, 2, 0])
    
    # image preprocessing for training
    height = IMAGE_SIZE_CROPPED
    width = IMAGE_SIZE_CROPPED
    float_image = tf.cast(image, tf.float32)
    resized_image = tf.image.resize_image_with_crop_or_pad(float_image, height, width)
    image_eval = tf.image.per_image_standardization(resized_image)
    image_eval.set_shape([height, width, 3])

    return image_eval, label



def cifar10_iterator(filenames, batch_size, cifar10_record_parser):
    ''' Create a dataset and return a tf.contrib.data.Iterator 
    which provides a way to extract elements from this dataset.
    -----
    Args:
        filenames: 
            a tensor of filenames.
        batch_size: 
            batch size.
    Returns:
        iterator: 
            an Iterator providing a way to extract elements from the created dataset.
        output_types: 
            the output types of the created dataset.
        output_shapes: 
            the output shapes of the created dataset.
    '''

    # TODO3

    dataset = tf.data.FixedLengthRecordDataset(filenames, IMAGE_BYTES+LABEL_BYTES)
    dataset = dataset.map(cifar10_record_parser, num_parallel_calls=16) #image, label
    dataset = dataset.shuffle(4).batch(batch_size, drop_remainder=True)
    
    iterator = dataset.make_initializable_iterator()

    return iterator, dataset.output_types, dataset.output_shapes

In [6]:
model_hps_cifar = tf.contrib.training.HParams(
  image_size = IMAGE_SIZE_CROPPED,
  batch_size = BATCH_SIZE,
  num_classes = NUM_CLASSES,
  num_training_example = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN,
  num_epoch_per_decay = 350.0,
  init_lr = 0.1,
  moving_average_decay = 0.9999,
  ckpt_dir = './model/'
)

In [7]:
tf.reset_default_graph()

#define training/testing data files
training_files = [os.path.join(DATA_DIRECTORY, 'data_batch_%d.bin' % i) for i in range(1, 6)]
testing_files = [os.path.join(DATA_DIRECTORY, 'test_batch.bin')]
filenames_train = tf.constant(training_files)
filenames_test = tf.constant(testing_files)


# Training data iterator
iterator_train, types, shapes = cifar10_iterator(filenames_train, BATCH_SIZE, cifar10_record_distort_parser)
# Testing data iterator
iterator_test, _, _ = cifar10_iterator(filenames_test, BATCH_SIZE, cifar10_record_crop_parser)

# use to handle training and testing
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle, types, shapes)
labels_images_pairs = iterator.get_next()

In [8]:
# CNN model
model = CNN_Model(model_hps_cifar)

Instructions for updating:
Please switch to tf.train.get_or_create_global_step
Tensor("model/Cast_1:0", dtype=int64)
Tensor("model/ArgMax:0", shape=(128,), dtype=int64)


In [10]:
%%time
# TODO4:
# 1. train the CNN model 10 epochs
# 2. show the loss per epoch
# 3. get the accuracy of this 10-epoch model
# 4. measure the time using '%%time' instruction
# tips:
# use placeholder handle to determine if training or testing.

num_epoch = 10
num_batch_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / BATCH_SIZE

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    training_handle = sess.run(iterator_train.string_handle())
    model.loss_each_epoch = []

    #start training
    for i in range(num_epoch):
        _loss = []
        sess.run( iterator_train.initializer )
        while True:
            try:
                images, labels = sess.run(labels_images_pairs, feed_dict={ handle: training_handle })
                labels = labels.reshape((-1,))
                l, _ = sess.run([model.total_loss, model.train_op], feed_dict = {model.images:images, model.labels:labels})
                _loss.append(l)
            except tf.errors.OutOfRangeError:
                break
        loss_this_epoch = np.sum(_loss)
        gs = model.global_step.eval()
        print('loss of epoch %d: %f' % (i, loss_this_epoch))
        model.loss_each_epoch.append(loss_this_epoch)

    print('Done training %d epochs' %num_epoch)
    
    test_handle = sess.run(iterator_test.string_handle())
    sess.run( iterator_test.initializer )
    _loss = []
    _accu = []
    while True:
        try:
            images, labels = sess.run(labels_images_pairs, feed_dict={ handle: test_handle })
            labels = labels.reshape((-1,))
            l, a = sess.run([model.total_loss, model.total_accu], feed_dict = {model.images:images, model.labels:labels})
            _loss.append(l)
            _accu.append(a)
        except tf.errors.OutOfRangeError:
            break
    test_loss = np.sum(_loss)
    test_accu = np.mean(_accu)
    print('\nfinal test accuracy: %f' % test_accu)
    print('final test lost: %f\n' % test_loss)
    

loss of epoch 0: 1507.272217
loss of epoch 1: 1184.635986
loss of epoch 2: 969.849243
loss of epoch 3: 817.133179
loss of epoch 4: 705.322144
loss of epoch 5: 618.319763
loss of epoch 6: 561.352905
loss of epoch 7: 514.064575
loss of epoch 8: 483.336365
loss of epoch 9: 456.905487
Done training 10 epochs

final test accuracy: 0.713241
final test lost: 82.878860

CPU times: user 1h 7min 9s, sys: 3min 33s, total: 1h 10min 42s
Wall time: 22min 7s
