# CIFAR-10 Image Classification

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import cifar10_lib
from cifar10_lib import *

### Read and save data

In [2]:
cifar10_dataset_folder_path = 'cifar-10-batches-py'

class DLProgress(tqdm):
    last_block = 0

    def hook(self, block_num=1, block_size=1, total_size=None):
        self.total = total_size
        self.update((block_num - self.last_block) * block_size)
        self.last_block = block_num

if not isfile('cifar-10-python.tar.gz'):
    with DLProgress(unit='B', unit_scale=True, miniters=1, desc='CIFAR-10 Dataset') as pbar:
        urlretrieve(
            'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz',
            'cifar-10-python.tar.gz',
            pbar.hook)

if not isdir(cifar10_dataset_folder_path):
    with tarfile.open('cifar-10-python.tar.gz') as tar:
        tar.extractall()
        tar.close()


tests.test_folder_path(cifar10_dataset_folder_path)

helper.preprocess_and_save_data(cifar10_dataset_folder_path, normalize, one_hot_encode)


All files found!


### Load saved data

In [3]:
# Load the Preprocessed Validation data
valid_features, valid_labels = pickle.load(open('preprocess_validation.p', mode='rb'))

### Build network

In [4]:
def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, keep_prob=None, dropout=False, relu=True):
    """
    Apply convolution then max pooling to x_tensor
    :param x_tensor: TensorFlow Tensor
    :param conv_num_outputs: Number of outputs for the convolutional layer
    :param conv_strides: Stride 2-D Tuple for convolution
    :param pool_ksize: kernal size 2-D Tuple for pool
    :param pool_strides: Stride 2-D Tuple for pool
    : return: A tensor that represents convolution and max pooling of x_tensor
    """
    # TODO: Implement Function
    # ---- convolution layer ----
    print("Add conv2d_maxpool: ReLU[%d] dropout[%d] conv kernel[%dx%d] stride[%dx%d] pool kernal[%dx%d] stride[%dx%d] output[%d]" % \
           (relu, dropout, \
            conv_ksize[0],conv_ksize[1],conv_strides[0],conv_strides[1],\
            pool_ksize[0], pool_ksize[1],pool_strides[0], pool_strides[1],conv_num_outputs))
    conv_input_size = getTensorSize(x_tensor)
    conv_output_size = (x_tensor.get_shape().as_list()[1] / conv_strides[0]) ** 2 * conv_num_outputs
    print("  conv2d_maxpool input: ",x_tensor.get_shape().as_list(),"\tsize =",conv_input_size)
    n_channels = x_tensor.get_shape().as_list()[3]
    
    shape_W = [conv_ksize[0],conv_ksize[1],n_channels,conv_num_outputs]
    shape_b = [conv_num_outputs]
    # F_W = tf.Variable(tf.random_normal(shape_W))
    # F_b = tf.Variable(tf.random_normal(shape_b))
    
    F_W = tf.Variable(xavier_init(shape_W, conv_input_size, conv_output_size))
    F_b = tf.Variable(xavier_init(shape_b, conv_input_size, conv_output_size))
    #F_W = tf.get_variable(shape=shape_W, initializer=tf.contrib.layers.xavier_initializer())
    #F_b = tf.get_variable(shape=shape_b, initializer=tf.contrib.layers.xavier_initializer())
    
    x_tensor = tf.nn.conv2d(x_tensor, F_W, [1,conv_strides[0],conv_strides[1],1], 'SAME') + F_b
    if relu:
        x_tensor = tf.nn.relu(x_tensor)
    print("  conv2d_maxpool cov2d: ",x_tensor.get_shape().as_list(),"\tsize =",getTensorSize(x_tensor))
    if dropout:
        print("  conv2d_maxpool add dropout")
        x_tensor = tf.nn.dropout(x_tensor, keep_prob)
    
    # ---- max pooling layer ----
    x_tensor = tf.nn.max_pool(x_tensor, ksize=[1, pool_ksize[0], pool_ksize[1], 1],\
                              strides=[1, pool_strides[0], pool_strides[1], 1], padding='SAME')
    print("  conv2d_maxpool  pool: ",x_tensor.get_shape().as_list(),"\tsize =",getTensorSize(x_tensor))
    return x_tensor 

def flatten(x_tensor):
    """
    Flatten x_tensor to (Batch Size, Flattened Image Size)
    : x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
    : return: A tensor of size (Batch Size, Flattened Image Size).
    """
    # TODO: Implement Function
    flat_size = getTensorSize(x_tensor)
    print("Add flatten: size[%d]" % (flat_size))
    return tf.reshape(x_tensor, [-1, flat_size])

def fully_conn(x_tensor, num_outputs):
    """
    Apply a fully connected layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # TODO: Implement Function
    input_size = x_tensor.get_shape().as_list()[1]
    print("Add fully_conn: input[%d] output[%d]" % (input_size,num_outputs))
    
    # F_W = tf.Variable(tf.random_normal([input_size, num_outputs]))
    # F_b = tf.Variable(tf.random_normal([num_outputs]))
    
    shape_W = [input_size, num_outputs]
    shape_b = [num_outputs]
    F_W = tf.Variable(xavier_init(shape_W, input_size, num_outputs))
    F_b = tf.Variable(xavier_init(shape_b, input_size, num_outputs))
    
    x_tensor = tf.add(tf.matmul(x_tensor, F_W), F_b)
    x_tensor = tf.nn.relu(x_tensor)
    return x_tensor

def output(x_tensor, num_outputs):
    """
    Apply a output layer to x_tensor using weight and bias
    : x_tensor: A 2-D tensor where the first dimension is batch size.
    : num_outputs: The number of output that the new tensor should be.
    : return: A 2-D tensor where the second dimension is num_outputs.
    """
    # TODO: Implement Function
    input_size = x_tensor.get_shape().as_list()[1]
    print("Add output: input[%d] output[%d]" % (input_size,num_outputs))
    F_W = tf.Variable(tf.random_normal([input_size, num_outputs]))
    F_b = tf.Variable(tf.random_normal([num_outputs]))
    x_tensor = tf.add(tf.matmul(x_tensor, F_W), F_b)
    return x_tensor

# Define conv-max-pool layer parameters
convParams1 = pd.DataFrame({
    'layer':range(4),
    'conv_kernal':[5]*4,
    'conv_stride':[1]*4,
    'pool_kernal':[2]*4,
    'pool_stride':[2]*4,
    'num_output':[16*(2**number) for number in range(0,4)]
})

convParams2 = pd.DataFrame({
    'layer':range(4),
    'conv_kernal':[5]*4,
    'conv_stride':[1]*4,
    'pool_kernal':[2]*4,
    'pool_stride':[2]*4,
    'num_output':[32*(2**number) for number in range(0,4)]
})

convParams3 = pd.DataFrame({
    'layer':range(4),
    'conv_kernal':[4]*4,
    'conv_stride':[2]*4,
    'pool_kernal':[2]*4,
    'pool_stride':[2]*4,
    'num_output':[32*(4**number) for number in range(0,4)]
})

convParamsList = [convParams1,convParams2,convParams3]

fullParams = [[1024],[512],[512, 128], [1024, 128],[1024, 256, 64]]


def conv_net(x, keep_prob, conv_plan=1, conv_depth=1, full_plan=1, dropout_fc=True, dropout_conv=False, relu=True):
    """
    Create a convolutional neural network model
    : x: Placeholder tensor that holds image data.
    : keep_prob: Placeholder tensor that hold dropout keep probability.
    : return: Tensor that represents logits
    """
    # TODO: Apply 1, 2, or 3 Convolution and Max Pool layers
    #    Play around with different number of outputs, kernel size and stride
    print("Build conv_net conv_plan[%d] conv_depth[%d] full_plan[%d]" % (conv_plan,conv_depth,full_plan))
    # Function Definition from Above:
    #    conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
    convParams = convParamsList[conv_plan-1]
    for i in range(conv_depth):
        # def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, keep_prob=None, dropout=False, relu=True):
        conv_num_outputs = convParams.at[i,'num_output']
        conv_ksize = [convParams.at[i,'conv_kernal'],convParams.at[i,'conv_kernal']]
        conv_strides = [convParams.at[i,'conv_stride'],convParams.at[i,'conv_stride']]
        pool_ksize = [convParams.at[i,'pool_kernal'],convParams.at[i,'pool_kernal']]
        pool_strides = [convParams.at[i,'pool_stride'],convParams.at[i,'pool_stride']]
        x = conv2d_maxpool(x, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides, keep_prob, dropout_conv, relu)

    # TODO: Apply a Flatten Layer
    # Function Definition from Above:
    #   flatten(x_tensor)
    x = flatten(x)

    # TODO: Apply 1, 2, or 3 Fully Connected Layers
    #    Play around with different number of outputs
    # Function Definition from Above:
    #   fully_conn(x_tensor, num_outputs)
    full_params = fullParams[full_plan-1]
    if dropout_fc:
        x = fully_conn(x, full_params[0])
        x = tf.nn.dropout(x, keep_prob)
        print("Add dropout layer")
        for i in range(1,len(full_params)):
            x = fully_conn(x, full_params[i])
    else:
        for i in range(len(full_params)):
             x = fully_conn(x, full_params[i])
    
    # TODO: Apply an Output Layer
    #    Set this to the number of classes
    # Function Definition from Above:
    #   output(x_tensor, num_outputs)
    n_classes = 10
    x = output(x, n_classes)
    
    # TODO: return output
    print("")
    return x

##############################
## Build the Neural Network ##
##############################

# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()

# Inputs
x = neural_net_image_input((32, 32, 3))
y = neural_net_label_input(10)
keep_prob = neural_net_keep_prob_input()

# Model
# logits = conv_net(x, keep_prob, conv_plan=2, conv_depth=4, full_plan=4, dropout_fc=True, dropout_conv=False, relu=True)
# logits = conv_net(x, keep_prob, conv_plan=3, conv_depth=2, full_plan=4, dropout_fc=True, dropout_conv=True, relu=True)
# logits = conv_net(x, keep_prob, conv_plan=2, conv_depth=3, full_plan=4, dropout_fc=True, dropout_conv=False, relu=False)
logits = conv_net(x, keep_prob, conv_plan=3, conv_depth=3, full_plan=5, dropout_fc=True, dropout_conv=True, relu=True)

# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')

# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')


Build conv_net conv_plan[3] conv_depth[3] full_plan[5]
Add conv2d_maxpool: ReLU[1] dropout[1] conv kernel[4x4] stride[2x2] pool kernal[2x2] stride[2x2] output[32]
  conv2d_maxpool input:  [None, 32, 32, 3] 	size = 3072
  conv2d_maxpool cov2d:  [None, 16, 16, 32] 	size = 8192
  conv2d_maxpool add dropout
  conv2d_maxpool  pool:  [None, 8, 8, 32] 	size = 2048
Add conv2d_maxpool: ReLU[1] dropout[1] conv kernel[4x4] stride[2x2] pool kernal[2x2] stride[2x2] output[128]
  conv2d_maxpool input:  [None, 8, 8, 32] 	size = 2048
  conv2d_maxpool cov2d:  [None, 4, 4, 128] 	size = 2048
  conv2d_maxpool add dropout
  conv2d_maxpool  pool:  [None, 2, 2, 128] 	size = 512
Add conv2d_maxpool: ReLU[1] dropout[1] conv kernel[4x4] stride[2x2] pool kernal[2x2] stride[2x2] output[512]
  conv2d_maxpool input:  [None, 2, 2, 128] 	size = 512
  conv2d_maxpool cov2d:  [None, 1, 1, 512] 	size = 512
  conv2d_maxpool add dropout
  conv2d_maxpool  pool:  [None, 1, 1, 512] 	size = 512
Add flatten: size[512]
Add fully_

### Train network

In [None]:
epochs = 100
batch_size = 64
keep_probability = 0.75
def print_stats(session, feature_batch, label_batch, cost, accuracy):
    """
    Print information about loss and validation accuracy
    : session: Current TensorFlow session
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    : cost: TensorFlow cost function
    : accuracy: TensorFlow accuracy function
    """
    # TODO: Implement Function
    # Calculate batch loss and accuracy
    loss = session.run(cost, feed_dict={'x:0': feature_batch, 'y:0': label_batch, 'keep_prob:0': 1.})
    valid_acc = session.run(accuracy, feed_dict={'x:0': valid_features, 'y:0': valid_labels, 'keep_prob:0': 1.})

    print('Loss: {:>10.4f} Validation Accuracy: {:.6f}'.format(loss,valid_acc))

print("epochs = %d" % epochs)
print("batch_size = %d" % batch_size)
print("keep_probability = %.2f" % keep_probability)
print('Checking the Training on a Single Batch...')
start_time = time.time()

save_model_path = './image_class_model_orig'

print('Training...')
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    
    # Training cycle
    for epoch in range(epochs):
        # Loop over all batches
        n_batches = 5
        for batch_i in range(1, n_batches + 1):
            for batch_features, batch_labels in helper.load_preprocess_training_batch(batch_i, batch_size):
                train_neural_network(sess, optimizer, keep_probability, batch_features, batch_labels)
            print('Epoch {:>2}, CIFAR-10 Batch {}:  '.format(epoch + 1, batch_i), end='')
            print_stats(sess, batch_features, batch_labels, cost, accuracy)
            
    # Save Model
    saver = tf.train.Saver()
    save_path = saver.save(sess, save_model_path)

print("--- %s seconds ---" % (time.time() - start_time))


epochs = 100
batch_size = 64
keep_probability = 0.75
Checking the Training on a Single Batch...
Training...
Epoch  1, CIFAR-10 Batch 1:  Loss:     2.1929 Validation Accuracy: 0.232600
Epoch  1, CIFAR-10 Batch 2:  Loss:     1.9065 Validation Accuracy: 0.358800
Epoch  1, CIFAR-10 Batch 3:  Loss:     1.4687 Validation Accuracy: 0.365000
Epoch  1, CIFAR-10 Batch 4:  Loss:     1.6329 Validation Accuracy: 0.422000
Epoch  1, CIFAR-10 Batch 5:  Loss:     1.7648 Validation Accuracy: 0.391600
Epoch  2, CIFAR-10 Batch 1:  Loss:     1.8185 Validation Accuracy: 0.411800
Epoch  2, CIFAR-10 Batch 2:  Loss:     1.6295 Validation Accuracy: 0.457400
Epoch  2, CIFAR-10 Batch 3:  Loss:     1.2104 Validation Accuracy: 0.449200
Epoch  2, CIFAR-10 Batch 4:  Loss:     1.3570 Validation Accuracy: 0.482000
Epoch  2, CIFAR-10 Batch 5:  Loss:     1.4765 Validation Accuracy: 0.490200
Epoch  3, CIFAR-10 Batch 1:  Loss:     1.4684 Validation Accuracy: 0.479200
Epoch  3, CIFAR-10 Batch 2:  Loss:     1.4286 Validation