In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import cv2
import pickle
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf

tf.reset_default_graph()

<h3>Load data</h3>

In [2]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [3]:
def load_cifar10(data_path):

    train_data = None
    train_labels = []
    test_data = None
    test_labels = None


    for i in range(1, 6):
        data_dict = unpickle(data_path + "data_batch_" + str(i))
        if (i == 1):
            train_data = data_dict[b'data']
        else:
            train_data = np.vstack((train_data, data_dict[b'data']))
        train_labels += data_dict[b'labels']

    test_data_dict = unpickle(data_path + "test_batch")
    test_data = test_data_dict[b'data']
    test_labels = test_data_dict[b'labels']

    train_data = train_data.reshape((50000, 3, 32, 32))
    train_data = np.rollaxis(train_data, 1, 4)
    train_labels = np.array(train_labels)
    
    test_data = test_data.reshape((10000, 3, 32, 32))
    test_data = np.rollaxis(test_data, 1, 4)
    test_labels = np.array(test_labels)
    
    return train_data, train_labels, test_data, test_labels

In [19]:
data_path = "../../cifar-10-batches-py/"
train_data, train_labels, test_data, test_labels = load_cifar10(data_path)

In [20]:
print("train_data.shape:", train_data.shape, "train_labels.shape:", train_labels.shape)
print("test_data.shape:", test_data.shape, "test_labels.shape:", test_labels.shape)

train_data.shape: (50000, 32, 32, 3) train_labels.shape: (50000,)
test_data.shape: (10000, 32, 32, 3) test_labels.shape: (10000,)


<h3>Data preprocessing</h3>

In [21]:
def scale_data(data):
    """ 
    Scale the row pixel intensities to the range [0, 1]
    """
    data = data.astype(np.float32) / 255.0
    return data

train_data = scale_data(train_data)
test_data = scale_data(test_data)

In [22]:
def convert_grayscale(data_image):
    """
    Convert image to grayscale
    """
    output = np.zeros((data_image.shape[:-1]))
    for i, image in enumerate(data_image):
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        output[i] = gray_image

    return output.reshape(-1, 32, 32, 1)
    
train_data = convert_grayscale(train_data)
test_data = convert_grayscale(test_data)

print("train_data.shape: ", train_data.shape)
print("test_data.shape: ", test_data.shape)

train_data.shape:  (50000, 32, 32, 1)
test_data.shape:  (10000, 32, 32, 1)


In [13]:
# def one_hot_labels(label_data):
#     """
#     One hot encode the labels
#     """
#     label_data = tf.one_hot(label_data, depth=10)
    
#     return label_data

# train_labels = one_hot_labels(train_labels)
# test_labels = one_hot_labels(test_labels)

In [35]:
# with tf.Session() as sess:
#     print(type(sess.run(train_labels)))
#     print(sess.run(train_labels[:5]))

In [23]:
def one_hot_labels(label_data, num_classes=10):
    """
    One hot encode the labels
    """
    label_data = np.eye(num_classes)[label_data.reshape(-1)]
    
    return label_data

train_labels = one_hot_labels(train_labels)
test_labels = one_hot_labels(test_labels)

In [24]:
train_labels[:5]

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]])

<h3>Train/validation split</h3>

In [25]:
X_train, X_valid, y_train, y_valid = train_test_split(train_data, train_labels, \
                                                      test_size=0.1, \
                                                      random_state=42)

print("X_train.shape:", X_train.shape, "y_train.shape", y_train.shape)
print("X_valid.shape:", X_valid.shape, "y_valid.shape", y_valid.shape)

X_train.shape: (45000, 32, 32, 1) y_train.shape (45000, 10)
X_valid.shape: (5000, 32, 32, 1) y_valid.shape (5000, 10)


<h3>LeNet model</h3>

In [26]:
def conv2d(input, num_input_channels, num_filters, filter_shape=[5, 5], strides=[1, 1], name="conv"):
	""" Convolution layer """
	with tf.name_scope(name):
		w = tf.Variable(tf.truncated_normal([filter_shape[0], filter_shape[1], num_input_channels, num_filters], \
			stddev=0.02), name=name+"_W")
		b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name=name+"_b")

		# conv, bias and then activation
		output = tf.nn.conv2d(input, w, strides=[1, strides[0], strides[1], 1], padding='VALID')
		output = tf.nn.bias_add(output, b)
		output = tf.tanh(output)

		return output

def avgpool2d(input, filter_shape=[2, 2], strides=[2, 2], name="avgpool"):
	""" Average Pooling layer """
	with tf.name_scope(name):
		return tf.nn.pool(input, pooling_type='AVG', \
			window_shape=[filter_shape[0], filter_shape[1]], \
			strides=[strides[0], strides[1]], padding='VALID', \
			name=name)

def fully_connected(input, neurons_in, neurons_out, act=True, name="fc"):
	""" Fully connected layer """
	with tf.name_scope(name):
		W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev=0.02), name=name+"_W")
		b = tf.Variable(tf.constant(0.1, shape=[neurons_out]), name=name+"_b")

		output = tf.add(tf.matmul(input, W), b)

		if act: # if True, use tanh activation function, otherwise do not apply any activation
			return tf.tanh(output)
		else:
			return output

In [27]:
def LeNet(input_image, name="LeNet"):

	with tf.name_scope(name):
		# 1st conv layer : CONV + TANH + AVERAGE POOL 
		conv_1 = conv2d(input_image, num_input_channels=1, num_filters=6, \
			filter_shape=[5, 5], strides=[1, 1], name="conv1")
		avgpool_1 = avgpool2d(conv_1, filter_shape=[2, 2], \
			strides=[2, 2], name="avgpool1")

		# 2nd conv layer : CONV + TANH + AVERAGE POOL
		conv_2 = conv2d(avgpool_1, num_input_channels=6, num_filters=16, \
			filter_shape=[5, 5], strides=[1, 1], name="conv_2")
		avgpool_2 = avgpool2d(conv_2, filter_shape=[2, 2], \
			strides=[2, 2], name="avgpool2")

		# 3rd conv layer : CONV + TANH
		conv_3 = conv2d(avgpool_2, num_input_channels=16, num_filters=120, \
			filter_shape=[5, 5], strides=[1, 1], name="conv3")

		# Flatten 
		flattened = tf.reshape(conv_3, [-1, 1 * 1 * 120])

		# Fully connected layer 1 : DENSE + TANH
		fc_1 = fully_connected(flattened, 120, 84, act=True, name="fc1")

		# Output, Fully connected layer 2 : DENSE + RBF
		fc_2 = fully_connected(fc_1, 84, 10, act=False, name="fc2")

		output = fc_2 
		# output = rbf(fc_2, name="rbf") 

		return output 

Define the TensorFlow operations for training

In [28]:
# tf Graph input
X = tf.placeholder(tf.float32, [None, 32, 32, 1])
y = tf.placeholder(tf.float32, [None, 10])

logits = LeNet(X)
predictions = tf.nn.softmax(logits)

print("logits.shape: ", logits.shape)
# Evaluate model


logits.shape:  (?, 10)


In [29]:
def compute_loss_xent(logits, targets):
	""" Compute cross entropy as our loss function """
	with tf.name_scope("cross_entropy"):
		# Get rid of extra dimensions and cast targets into integers
		targets = tf.squeeze(tf.cast(targets, tf.int32))
		# Calculate cross entropy from logits and targets
		cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( \
			logits=logits, labels=targets)
		# Take the average loss across batch size
		cross_entropy_mean = tf.reduce_mean(cross_entropy, name="cross_entropy")

		return(cross_entropy_mean)

def train_optimizer(loss_value, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08):
	""" Use an AdamOptimizer to train the network """
	with tf.name_scope("optimizer"):
		# Create optimizer
		my_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1, \
			beta2=beta2, epsilon=epsilon)
		# Initialize train step
		train_step = my_optimizer.minimize(loss_value)

		return train_step

def compute_accuracy(logits, targets):
	""" Compute the accuracy """
	with tf.name_scope("accuracy"):
		correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(targets, 1)) # or tf.nn.in_top_k(logits, targets, 1)
		accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

		return accuracy


In [30]:
# Define the loss function and optimizer
cost = compute_loss_xent(logits, targets=y)
optimizer = train_optimizer(cost)

# Evaluate model
accuracy = compute_accuracy(logits, targets=y)

In [31]:
## Initializing all variables
init = tf.global_variables_initializer()

<h3>Launch the execution Graph</h3>

In [37]:
epochs = 20
num_batches = 74
batch_size = 128
display_step = 1

with tf.Session() as sess:
    sess.run(init)
    for i in range(epochs):
        for j in range(num_batches):
            batch_x = X_train[(j * batch_size):((j + 1) * batch_size)]
            batch_y = y_train[(j * batch_size):((j + 1) * batch_size)]
            sess.run(optimizer, feed_dict={X:batch_x, y:batch_y})
            loss, acc = sess.run([cost, accuracy], feed_dict={X:batch_x, y:batch_y})
        
        if (epochs % display_step) == 0:
            print("Epoch:", "%03d" % (i + 1), \
                  "loss=", "%.5f" % (loss), \
                  "Training accuracy:", "%.5f" % (acc))
    print("Training complete")

Epoch: 001 loss= 2.12400 Training accuracy: 0.26562
Epoch: 002 loss= 2.10584 Training accuracy: 0.28906
Epoch: 003 loss= 2.09809 Training accuracy: 0.31250
Epoch: 004 loss= 2.10334 Training accuracy: 0.32031
Epoch: 005 loss= 2.09133 Training accuracy: 0.31250
Epoch: 006 loss= 2.06612 Training accuracy: 0.31250
Epoch: 007 loss= 2.03537 Training accuracy: 0.29688
Epoch: 008 loss= 2.00568 Training accuracy: 0.32812
Epoch: 009 loss= 1.97046 Training accuracy: 0.32812
Epoch: 010 loss= 1.93769 Training accuracy: 0.35156
Epoch: 011 loss= 1.90964 Training accuracy: 0.37500
Epoch: 012 loss= 1.88287 Training accuracy: 0.35938
Epoch: 013 loss= 1.85540 Training accuracy: 0.35156
Epoch: 014 loss= 1.82537 Training accuracy: 0.36719
Epoch: 015 loss= 1.79135 Training accuracy: 0.38281
Epoch: 016 loss= 1.75475 Training accuracy: 0.39062
Epoch: 017 loss= 1.71714 Training accuracy: 0.39062
Epoch: 018 loss= 1.67875 Training accuracy: 0.41406
Epoch: 019 loss= 1.63970 Training accuracy: 0.46094
Epoch: 020 l