In [1]:
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf
import numpy as np
import os as os
import time
import tarfile
import pickle

cifar10_dataset_tar_gz = 'cifar-10-python.tar.gz'
cifar10_dataset_folder_path = 'cifar-10-batches-py'

# untar cifar10 dataset
if not os.path.isdir(cifar10_dataset_folder_path):
    with tarfile.open(cifar10_dataset_tar_gz) as tar:
        tar.extractall()
        tar.close()

In [2]:
def one_hot_encode(x):
    enc = OneHotEncoder(n_values = 10)
    x = np.array(x).reshape(-1, 1)
    
    return enc.fit_transform(x).toarray()

In [3]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='latin1')
    return dict

In [4]:
# preprocess Training, Validation, and Testing Data
temp_train_features = []
temp_train_labels = []
temp_valid_features = []
temp_valid_labels = []
test_proportion = 0.8

# original data
for i in range(1,6):
    batch = unpickle(cifar10_dataset_folder_path + '/data_batch_' + str(i))
    
    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    labels = batch['labels']
    test_count = int(len(features) * test_proportion)
    
    temp_train_features.extend(features[:test_count])
    temp_train_labels.extend(labels[:test_count]) 
    temp_valid_features.extend(features[test_count:])
    temp_valid_labels.extend(labels[test_count:])
    
'''
# add data Augmentation: flip train images left and right 
for i in range(1,6):
    batch = unpickle(cifar10_dataset_folder_path + '/data_batch_' + str(i))
    
    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    labels = batch['labels']
    test_count = int(len(features) * test_proportion)
    
    temp_train_features.extend(np.flip(features[:test_count], axis=2))
    temp_train_labels.extend(labels[:test_count]) 

# add data Augmentation: flip train images up and down
for i in range(1,6):
    batch = unpickle(cifar10_dataset_folder_path + '/data_batch_' + str(i))
    
    features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
    labels = batch['labels']
    test_count = int(len(features) * test_proportion)
    
    temp_train_features.extend(np.flip(features[:test_count], axis=1))
    temp_train_labels.extend(labels[:test_count]) 
'''

train_features = np.array(temp_train_features) / 255
train_labels = one_hot_encode(np.array(temp_train_labels))
valid_features = np.array(temp_valid_features) / 255
valid_labels = one_hot_encode(np.array(temp_valid_labels))

batch = unpickle(cifar10_dataset_folder_path + '/test_batch')
features = batch['data'].reshape((len(batch['data']), 3, 32, 32)).transpose(0, 2, 3, 1)
labels = batch['labels']

test_features = np.array(features) / 255
test_labels = one_hot_encode(np.array(labels))

In [5]:
def conv2d(x_tensor, conv_num_outputs, conv_ksize, conv_strides):
    weight = tf.Variable(tf.truncated_normal([*conv_ksize, x_tensor.shape[3].value, conv_num_outputs], stddev=0.1))
    bias = tf.Variable(tf.zeros(conv_num_outputs))
    
    conv_layer = tf.nn.conv2d(x_tensor, weight, strides=[1, *conv_strides, 1], padding = "SAME")
    conv_layer = tf.nn.bias_add(conv_layer, bias)
    conv_layer = tf.nn.relu(conv_layer)

    return conv_layer

def conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides):
    conv_layer = conv2d(x_tensor, conv_num_outputs, conv_ksize, conv_strides)
    conv_layer = tf.nn.max_pool(conv_layer, ksize=[1, *pool_ksize, 1], strides=[1, *pool_strides, 1], padding='SAME')

    return conv_layer

def flatten(x_tensor):
    flattened_image_size = 1
    for shape in x_tensor.get_shape().as_list()[1:]:
        flattened_image_size = flattened_image_size * shape
        
    return tf.reshape(x_tensor, [-1, flattened_image_size])

def fully_connect(x_tensor, num_outputs, keep_prob):
    weight = tf.Variable(tf.truncated_normal([x_tensor.shape[1].value, num_outputs], stddev=0.1))
    bias = tf.Variable(tf.zeros(num_outputs))
    
    fully_connect_layer = tf.matmul(x_tensor, weight)
    fully_connect_layer = tf.add(fully_connect_layer, bias)
    fully_connect_layer = tf.nn.relu(fully_connect_layer)
    fully_connect_layer = tf.nn.dropout(fully_connect_layer, keep_prob)
    
    return fully_connect_layer

def output(x_tensor, num_outputs):
    weight = tf.Variable(tf.truncated_normal([x_tensor.shape[1].value, num_outputs], stddev=0.1))
    bias = tf.Variable(tf.zeros(num_outputs))
    
    output_layer = tf.matmul(x_tensor, weight)
    output_layer = tf.add(output_layer, bias)
    
    return output_layer

In [6]:
def alexnet(x, keep_prob):
    # convolutional layer 1
    x = conv2d_maxpool(x, 32, conv_ksize = (4, 4), conv_strides = (2, 2), pool_ksize = (4, 4), pool_strides = (2,2))
    
    # convolutional layer 2, comment this layer if shallow CNN
    x = conv2d_maxpool(x, 128, conv_ksize = (4, 4), conv_strides = (2, 2), pool_ksize = (4, 4), pool_strides = (2,2))
    
    # convolutioanl layer 3, comment this layer if shallow CNN
    x = conv2d(x, 256, conv_ksize = (4, 4), conv_strides = (2, 2))
    
    # convolutional layer 4, comment this layer if shallow CNN
    x = conv2d(x, 256, conv_ksize = (4, 4), conv_strides = (2, 2))
    
    # convolutional layer 5, comment this layer if shallow CNN
    x = conv2d_maxpool(x, 128, conv_ksize = (4, 4), conv_strides = (2, 2), pool_ksize = (4, 4), pool_strides = (2,2))

    # flatten tensor
    x = flatten(x)    

    # fully connected layer 1, comment this layer if shallow CNN
    x = fully_connect(x, 1024, keep_prob)
    
    # fully connected layer 2
    x = fully_connect(x, 1024, keep_prob)
    
    #  fully connected layer 3 (output layer)
    x = output(x, 10)
    
    return x

In [7]:
# remove previous settings
tf.reset_default_graph()

# placeholders for inputs, outputs, and keep probability
x = tf.placeholder(tf.float32, shape=[None, 32, 32, 3])
y = tf.placeholder(tf.float32, shape=[None, 10])
keep_prob = tf.placeholder(tf.float32)

# model
logits = alexnet(x, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer().minimize(cost)
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



In [8]:
valid_error_rate_without_data_aug = []
#valid_error_rate_with_left_right_data = []
#valid_error_rate_with_up_down_data = []
#valid_error_rate_shallow = []

In [9]:
# set hyperparameter
epochs = 40
batch_size = 128
keep_probability = 0.5

In [10]:
print("Start training")
start_time = time.clock()
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # training alexnet
    for epoch in range(epochs):
        for start in range(0, len(train_features), batch_size):
            end = min(start + batch_size, len(train_features))
            batch_features = train_features[start:end]
            batch_labels = train_labels[start:end]
            sess.run(optimizer, feed_dict={x: batch_features, y: batch_labels, keep_prob: keep_probability})
            
        # validation error rate on validation set
        loss = sess.run(cost, feed_dict={x: batch_features, y: batch_labels, keep_prob: 1.})
        valid_acc = sess.run(accuracy, feed_dict={x: valid_features, y: valid_labels, keep_prob: 1.})
        valid_error_rate_without_data_aug.append(1 - valid_acc)
        #valid_error_rate_with_left_right_data.append(1 - valid_acc)
        #valid_error_rate_with_up_down_data.append(1 - valid_acc)
        #valid_error_rate_shallow.append(1 - valid_acc)
        print("Epoch: %2d Loss: %.8f Validation error rate: %.8f" % (epoch + 1, loss, 1 - valid_acc))
    
    end_time = time.clock()
    print("Training ends")
    # validation error rate on test set
    test_batch_acc_total = 0
    test_batch_count = 0
    
    for start in range(0, len(test_features), batch_size):
        end = min(start + batch_size, len(test_features))
        test_feature_batch = test_features[start:end]
        test_label_batch = test_labels[start:end]
        test_batch_acc_total += sess.run(accuracy, feed_dict={x: test_feature_batch, y: test_label_batch, keep_prob: 1.0})
        test_batch_count += 1
        
    print('Testing error rate : %.8f' % (1 - test_batch_acc_total/test_batch_count))

Start training
Epoch:  1 Loss: 1.38694155 Validation error rate: 0.58109999
Epoch:  2 Loss: 1.08432484 Validation error rate: 0.50690001
Epoch:  3 Loss: 0.95274985 Validation error rate: 0.47899997
Epoch:  4 Loss: 1.00005651 Validation error rate: 0.45169997
Epoch:  5 Loss: 0.89519370 Validation error rate: 0.42479998
Epoch:  6 Loss: 0.89223546 Validation error rate: 0.43519998
Epoch:  7 Loss: 0.72558546 Validation error rate: 0.39600003
Epoch:  8 Loss: 0.75619006 Validation error rate: 0.40890002
Epoch:  9 Loss: 0.55213779 Validation error rate: 0.40020001
Epoch: 10 Loss: 0.51213378 Validation error rate: 0.37519997
Epoch: 11 Loss: 0.40274268 Validation error rate: 0.36849999
Epoch: 12 Loss: 0.31951755 Validation error rate: 0.37199998
Epoch: 13 Loss: 0.32067901 Validation error rate: 0.37239999
Epoch: 14 Loss: 0.31022343 Validation error rate: 0.37279999
Epoch: 15 Loss: 0.27726069 Validation error rate: 0.36739999
Epoch: 16 Loss: 0.24779756 Validation error rate: 0.36129999
Epoch: 17