# Smile Classification

The object is to recognize smile / non-smile image.

In [2]:
from __future__ import print_function
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import os
import sys
import tarfile
from scipy import ndimage
from six.moves.urllib.request import urlretrieve
from six.moves import cPickle as pickle
import random
import hashlib

Load the data in more manageable format, load each class into a separate dataset, store them on disk. Later we will merge into a single dataset.

We will convert the entire dataset into a 3D array(image index, x, y) of floating point values, normalize to have approximately **zero mean** and standard **deviation ~0.5** to make training easier.

In [3]:
num_classes = 2
train_folders = ['datasets/train_folder/0', 'datasets/train_folder/1']
test_folders = ['datasets/test_folder/0', 'datasets/test_folder/1']

image_size = 64
pixel_depth = 255.0
image_depth = 3

def load_image(folder, min_num_images):
  """Load the image for a single smile/non-smile lable."""
  image_files = os.listdir(folder)
  dataset = np.ndarray(shape=(len(image_files), image_size, image_size, image_depth),
                         dtype=np.float32)
  image_index = 0
  for image in os.listdir(folder):
    image_file = os.path.join(folder, image)
    try:
      image_data = (ndimage.imread(image_file).astype(float) - 
                    pixel_depth / 2) / pixel_depth
      # print(image_data)
      if image_data.shape != (image_size, image_size, image_depth):
        raise Exception('Unexpected image shape: %s' % str(image_data.shape))
      dataset[image_index, :, :, :] = image_data
      image_index += 1
    except IOError as e:
      print('Could not read:', image_file, ':', e, '- it\'s ok, skipping.')
    
  num_images = image_index
  dataset = dataset[0:num_images, :, :]
  if num_images < min_num_images:
    raise Exception('Many fewer images than expected: %d < %d' %
                    (num_images, min_num_images))
    
  print('Full dataset tensor:', dataset.shape)
  print('Mean:', np.mean(dataset))
  print('Standard deviation:', np.std(dataset))
  return dataset
# Pickling datasets/train_folder/0.pickle.
# Full dataset tensor: (1238, 64, 64, 3)
# Mean: -0.0335986
# Standard deviation: 0.247544
# Pickling datasets/train_folder/1.pickle.
# Full dataset tensor: (1562, 64, 64, 3)
# Mean: -0.0137995
# Standard deviation: 0.249232
# Pickling datasets/test_folder/0.pickle.
# Full dataset tensor: (600, 64, 64, 3)
# Mean: -0.0210533
# Standard deviation: 0.249451
# Pickling datasets/test_folder/1.pickle.
# Full dataset tensor: (600, 64, 64, 3)
# Mean: -0.00345457
# Standard deviation: 0.249467

def maybe_pickle(data_folders, min_num_images_per_class, force=False):
  dataset_names = []
  for folder in data_folders:
    set_filename = folder + '.pickle'
    dataset_names.append(set_filename)
    if os.path.exists(set_filename) and not force:
      # You may override by setting force=True.
      print('%s already present - Skipping pickling.' % set_filename)
    else:
      print('Pickling %s.' % set_filename)
      dataset = load_image(folder, min_num_images_per_class)
      try:
        with open(set_filename, 'wb') as f:
          pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
      except Exception as e:
        print('Unable to save data to', set_filename, ':', e)
  
  return dataset_names

train_datasets = maybe_pickle(train_folders, 1200)
test_datasets = maybe_pickle(test_folders, 500)

datasets/train_folder/0.pickle already present - Skipping pickling.
datasets/train_folder/1.pickle already present - Skipping pickling.
datasets/test_folder/0.pickle already present - Skipping pickling.
datasets/test_folder/1.pickle already present - Skipping pickling.


A bit problem is that the classes are not well balanced. Smile size: 2162, non-smile size: 1838

So we will prune smile dataset to make both classes balanced.
Also create a validation dataset for later tuning. Eg: early termination...

In [4]:
def make_arrays(nb_rows, img_size, img_depth=3):
  if nb_rows:
    dataset = np.ndarray((nb_rows, img_size, img_size, img_depth), dtype=np.float32)
    labels = np.ndarray(nb_rows, dtype=np.int32)
  else:
    dataset, labels = None, None
  return dataset, labels

def merge_datasets(pickle_files, train_size, valid_size=0):
  num_classes = len(pickle_files)

  valid_dataset, valid_labels = make_arrays(valid_size, image_size)
  train_dataset, train_labels = make_arrays(train_size, image_size)
  vsize_per_class = valid_size // num_classes
  tsize_per_class = train_size // num_classes
    
  start_v, start_t = 0, 0
  end_v, end_t = vsize_per_class, tsize_per_class
  end_l = vsize_per_class+tsize_per_class
  for label, pickle_file in enumerate(pickle_files):
    # print(pickle_file)
    
    try:
      with open(pickle_file, 'rb') as f:
        smile_nonsmile_set = pickle.load(f)
    
        # let's shuffle the smile / nonsmile class
        # to have random validation and training set
        np.random.shuffle(smile_nonsmile_set)
        if valid_dataset is not None:
          valid_smile_nonsmile = smile_nonsmile_set[:vsize_per_class, :, :]
          valid_dataset[start_v:end_v, :, :] = valid_smile_nonsmile
          valid_labels[start_v:end_v] = label
          start_v += vsize_per_class
          end_v += vsize_per_class
                    
        train_smile_nonsmile = smile_nonsmile_set[vsize_per_class:end_l, :, :]
        train_dataset[start_t:end_t, :, :] = train_smile_nonsmile
        train_labels[start_t:end_t] = label

        start_t += tsize_per_class
        end_t += tsize_per_class
    except Exception as e:
      print('Unable to process data from', pickle_file, ':', e)
      raise
  return valid_dataset, valid_labels, train_dataset, train_labels

# train_size = 2800
train_size = 2400
valid_size = 600
test_size = 600

_, _, train_dataset, train_labels = merge_datasets(
  train_datasets, train_size)
valid_dataset, valid_labels, test_dataset, test_labels = merge_datasets(
  test_datasets, test_size, valid_size)

print('Training:', train_dataset.shape, train_labels.shape)
print('Validation:', valid_dataset.shape, valid_labels.shape)
print('Testing:', test_dataset.shape, test_labels.shape)

Training: (2400, 64, 64, 3) (2400,)
Validation: (600, 64, 64, 3) (600,)
Testing: (600, 64, 64, 3) (600,)


Next, we'll randomize the data. It's important to have the labels well shuffled for the training and test distributions to match.

In [5]:
def randomize(dataset, labels):
  permutation = np.random.permutation(labels.shape[0])
  shuffled_dataset = dataset[permutation,:,:]
  shuffled_labels = labels[permutation]
  return shuffled_dataset, shuffled_labels
train_dataset, train_labels = randomize(train_dataset, train_labels)
test_dataset, test_labels = randomize(test_dataset, test_labels)
valid_dataset, valid_labels = randomize(valid_dataset, valid_labels)

# pretty_labels = {0: 'non-smile', 1: 'smile'}
# def disp_sample_dataset(dataset, labels):
#   print(labels)
#   print(labels.shape)
#   print(dataset)
#   print(dataset.shape)
#   items = random.sample(range(len(labels)), 8)
#   for i, item in enumerate(items):
#     print(item)
#     plt.subplot(2, 4, i+1)
#     plt.axis('off')
#     plt.title(pretty_labels[labels[item]])
#     plt.imshow(dataset[item],interpolation='nearest')
#     plt.show()
# disp_sample_dataset(train_dataset, train_labels)

Save the data for later reuse.

In [6]:
pickle_file = 'GENKI4K.pickle'

try:
  f = open(pickle_file, 'wb')
  save = {
    'train_dataset': train_dataset,
    'train_labels': train_labels,
    'valid_dataset': valid_dataset,
    'valid_labels': valid_labels,
    'test_dataset': test_dataset,
    'test_labels': test_labels,
    }
  pickle.dump(save, f, pickle.HIGHEST_PROTOCOL)
  f.close()
except Exception as e:
  print('Unable to save data to', pickle_file, ':', e)
  raise

statinfo = os.stat(pickle_file)
print('Compressed pickle size:', statinfo.st_size)

Compressed pickle size: 176962046


Reformat into a TensorFlow-friendly shape:
* convolutions need the image data formatted as a cube (width by height by #channels)
* labels as float 1-hot encodings.

In [7]:
num_labels = 2
num_channels = image_depth # = 3 (RGB)
def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (2400, 64, 64, 3) (2400, 2)
Validation set (600, 64, 64, 3) (600, 2)
Test set (600, 64, 64, 3) (600, 2)


In [8]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

First let's build a small network with two convolution layers, followed by one fully connected layer:
* conv2d+ReLU - 64x64x16
* maxpooling  - 32x32x16
* conv2d+ReLU - 32x32x16
* maxpooling  - 16x16x16
* FC + ReLU   - 4096x64
* FC +softmax - 64x2

In [10]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  # depth: so filter
  # 64x64x16

  layer1_biases = tf.Variable(tf.zeros([depth]))

  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  # 32x32x16

  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  # 16x16x16x64
  # 4096x64

  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  # 64x2
  
  # Model.
  def model(data):
    conv1 = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
    bias1 = tf.nn.relu(conv1 + layer1_biases)
    pool1 = tf.nn.max_pool(bias1, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 1, 1, 1], padding='SAME')
    bias2 = tf.nn.relu(conv2 + layer2_biases)
    pool2 = tf.nn.max_pool(bias2, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

num_steps = 1001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))

  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 0.771831
Minibatch accuracy: 62.5%
Validation accuracy: 50.0%
Minibatch loss at step 50: 0.706261
Minibatch accuracy: 43.8%
Validation accuracy: 55.8%
Minibatch loss at step 100: 0.600213
Minibatch accuracy: 62.5%
Validation accuracy: 56.8%
Minibatch loss at step 150: 0.759039
Minibatch accuracy: 43.8%
Validation accuracy: 58.5%
Minibatch loss at step 200: 0.685244
Minibatch accuracy: 62.5%
Validation accuracy: 59.8%
Minibatch loss at step 250: 0.636330
Minibatch accuracy: 62.5%
Validation accuracy: 67.8%
Minibatch loss at step 300: 0.512270
Minibatch accuracy: 87.5%
Validation accuracy: 66.7%
Minibatch loss at step 350: 0.528772
Minibatch accuracy: 81.2%
Validation accuracy: 72.3%
Minibatch loss at step 400: 0.658931
Minibatch accuracy: 68.8%
Validation accuracy: 70.8%
Minibatch loss at step 450: 0.400845
Minibatch accuracy: 87.5%
Validation accuracy: 71.2%
Minibatch loss at step 500: 0.399185
Minibatch accuracy: 81.2%
Validation accuracy: 71.2%
M

Next we will implement dropout and learning rate decay:

In [12]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
beta_regul = 1e-3
drop_out = 0.5

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  global_step = tf.Variable(0)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  # depth: so filter
  # 64x64x16

  layer1_biases = tf.Variable(tf.zeros([depth]))

  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  # 32x32x16

  layer3_weights = tf.Variable(tf.truncated_normal(
      [image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  # 16x16x16x64
  # 4096x64

  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  # 64x2
  
  # Model.
  def model(data, keep_prob):
    conv1 = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='SAME')
    bias1 = tf.nn.relu(conv1 + layer1_biases)
    pool1 = tf.nn.max_pool(bias1, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 1, 1, 1], padding='SAME')
    bias2 = tf.nn.relu(conv2 + layer2_biases)
    pool2 = tf.nn.max_pool(bias2, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME')
    shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    drop = tf.nn.dropout(hidden, keep_prob)
    return tf.matmul(drop, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset, drop_out)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
    
  # Optimizer.
  learning_rate = tf.train.exponential_decay(0.05, global_step, 1000, 0.85, staircase=True)
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset, 1.0))
  test_prediction = tf.nn.softmax(model(tf_test_dataset, 1.0))

num_steps = 5001

with tf.Session(graph=graph) as session:
  tf.global_variables_initializer().run()
  print('Initialized')
  
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))

  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Initialized
Minibatch loss at step 0: 4.148318
Minibatch accuracy: 18.8%
Validation accuracy: 50.0%
Minibatch loss at step 50: 0.657512
Minibatch accuracy: 62.5%
Validation accuracy: 52.7%
Minibatch loss at step 100: 0.713337
Minibatch accuracy: 50.0%
Validation accuracy: 53.7%
Minibatch loss at step 150: 0.701928
Minibatch accuracy: 62.5%
Validation accuracy: 61.0%
Minibatch loss at step 200: 0.709721
Minibatch accuracy: 50.0%
Validation accuracy: 60.0%
Minibatch loss at step 250: 0.657887
Minibatch accuracy: 50.0%
Validation accuracy: 58.2%
Minibatch loss at step 300: 0.765534
Minibatch accuracy: 37.5%
Validation accuracy: 55.2%
Minibatch loss at step 350: 0.631889
Minibatch accuracy: 62.5%
Validation accuracy: 58.3%
Minibatch loss at step 400: 0.746199
Minibatch accuracy: 43.8%
Validation accuracy: 60.2%
Minibatch loss at step 450: 0.781229
Minibatch accuracy: 75.0%
Validation accuracy: 63.5%
Minibatch loss at step 500: 0.502035
Minibatch accuracy: 87.5%
Validation accuracy: 63.7%
M

Next we will try to implement LeNet5 architecture, but slightly diffrent, consist of two sets of convolutional, followed by 2 fully-connected layers:
* Input - 64x64x3
* conv1 + ReLU - 60x60x16
* avg_pool - 30x30x16
* conv3 + ReLU - 26x26x16
* avg_pool - 13x13x16
* FC 64 + ReLU
* FC 64x2 + softmax

In [None]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  
  # Variables.
  layer1_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, num_channels, depth], stddev=0.1))
  layer1_biases = tf.Variable(tf.zeros([depth]))
  layer2_weights = tf.Variable(tf.truncated_normal(
      [patch_size, patch_size, depth, depth], stddev=0.1))
  layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
  size3 = ((image_size - patch_size + 1) // 2 - patch_size + 1) // 2
  layer3_weights = tf.Variable(tf.truncated_normal(
      [size3 * size3 * depth, num_hidden], stddev=0.1))
  layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
  layer4_weights = tf.Variable(tf.truncated_normal(
      [num_hidden, num_labels], stddev=0.1))
  layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
  # Model.
  def model(data):
    # C1 input 28 x 28
    conv1 = tf.nn.conv2d(data, layer1_weights, [1, 1, 1, 1], padding='VALID')
    bias1 = tf.nn.relu(conv1 + layer1_biases)
    # S2 input 24 x 24
    pool2 = tf.nn.avg_pool(bias1, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
    # C3 input 12 x 12
    conv3 = tf.nn.conv2d(pool2, layer2_weights, [1, 1, 1, 1], padding='VALID')
    bias3 = tf.nn.relu(conv3 + layer2_biases)
    # S4 input 8 x 8
    pool4 = tf.nn.avg_pool(bias3, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID')
    # F6 input 4 x 4
    shape = pool4.get_shape().as_list()
    reshape = tf.reshape(pool4, [shape[0], shape[1] * shape[2] * shape[3]])
    hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
    return tf.matmul(hidden, layer4_weights) + layer4_biases
  
  # Training computation.
  logits = model(tf_train_dataset)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
  # Optimizer.
  optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
  test_prediction = tf.nn.softmax(model(tf_test_dataset))

num_steps = 20001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

The next version we'll build a model inspired by VGG16 architecture:
* conv1_1 + ReLU - 64x64x64
* conv1_2 + ReLU - 64x64x64
* MP - 32x32x64
* conv2_1 + ReLU - 32x32x128
* conv2_2 + ReLU - 32x32x128
* MP - 16x16x128
* conv3_1 + ReLU - 16x16x256
* conv3_2 + ReLU - 16x16x256
* conv3_3 + ReLU - 16x16x256
* MP - 8x8x512
* FC 4096 + ReLU
* FC 4096x1000 + ReLU
* FC 1000x2 + softmax

In [11]:
batch_size = 16
patch_size = 3
drop_out = 0.5

graph = tf.Graph()

with graph.as_default():

  # Input data.
  tf_train_dataset = tf.placeholder(
    tf.float32, shape=(batch_size, image_size, image_size, num_channels))
  tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
  tf_valid_dataset = tf.constant(valid_dataset)
  tf_test_dataset = tf.constant(test_dataset)
  global_step = tf.Variable(0)
  
  # Variables
  layer1_1weights = tf.Variable(tf.truncated_normal(
      [3, 3, 3, 32], stddev=0.1)) 
  layer1_1biases = tf.Variable(tf.zeros([32]))

  layer1_2weights = tf.Variable(tf.truncated_normal(
      [3, 3, 32, 32], stddev=0.1))
  layer1_2biases = tf.Variable(tf.constant(1.0, shape=[32]))
  
  layer2_1weights = tf.Variable(tf.truncated_normal(
      [3, 3, 32, 64], stddev=0.1))
  layer2_1biases = tf.Variable(tf.constant(1.0, shape=[64]))

  layer2_2weights = tf.Variable(tf.truncated_normal(
      [3, 3, 64, 64], stddev=0.1))
  layer2_2biases = tf.Variable(tf.constant(1.0, shape=[64]))

  layer3_1weights = tf.Variable(tf.truncated_normal(
      [3, 3, 64, 128], stddev=0.1))
  layer3_1biases = tf.Variable(tf.constant(1.0, shape=[128]))

  layer3_2weights = tf.Variable(tf.truncated_normal(
      [3, 3, 128, 128], stddev=0.1))
  layer3_2biases = tf.Variable(tf.constant(1.0, shape=[128]))

  layer3_3weights = tf.Variable(tf.truncated_normal(
      [3, 3, 128, 128], stddev=0.1))
  layer3_3biases = tf.Variable(tf.constant(1.0, shape=[128]))

  layer4_1weights = tf.Variable(tf.truncated_normal(
      [3, 3, 128, 256], stddev=0.1))
  layer4_1biases = tf.Variable(tf.constant(1.0, shape=[256]))

  layer4_2weights = tf.Variable(tf.truncated_normal(
      [3, 3, 256, 256], stddev=0.1))
  layer4_2biases = tf.Variable(tf.constant(1.0, shape=[256]))

  layer4_3weights = tf.Variable(tf.truncated_normal(
      [3, 3, 256, 256], stddev=0.1))
  layer4_3biases = tf.Variable(tf.constant(1.0, shape=[256]))

  # big_shape = image_size // 4 * image_size // 4 * image_size // 4 * image_size // 4 * 512
  big_shape = 4096

  fc1w = tf.Variable(tf.truncated_normal(
      [big_shape, 4096], dtype=tf.float32, stddev=0.1))
  fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32))

#   fc2w = tf.Variable(tf.truncated_normal(
#     [4096, 4096], dtype=tf.float32, stddev=0.1))
#   fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32))

#   fc3w = tf.Variable(tf.truncated_normal(
#       [4096, 1000], dtype=tf.float32, stddev=0.1))
#   fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32))
  
  fc4w = tf.Variable(tf.truncated_normal(
      [4096, 2], dtype=tf.float32, stddev=0.1))
  fc4b = tf.Variable(tf.constant(1.0, shape=[2], dtype=tf.float32))
  
  # Model.
  def model(data, keep_prob):
    # conv1
    conv1_1 = tf.nn.conv2d(data, layer1_1weights, [1,1,1,1], padding='SAME')    
    bias1_1 = tf.nn.relu(conv1_1 + layer1_1biases)

    conv1_2 = tf.nn.conv2d(bias1_1, layer1_2weights, [1,1,1,1], padding='SAME')    
    bias1_2 = tf.nn.relu(conv1_2 + layer1_2biases)
    
    pool1 = tf.nn.max_pool(bias1_2, [1,2,2,1], [1,2,2,1], padding='SAME')

    # conv2
    conv2_1 = tf.nn.conv2d(pool1, layer2_1weights, [1,1,1,1], padding='SAME')    
    bias2_1 = tf.nn.relu(conv2_1 + layer2_1biases)

    conv2_2 = tf.nn.conv2d(bias2_1, layer2_2weights, [1,1,1,1], padding='SAME')    
    bias2_2 = tf.nn.relu(conv2_2 + layer2_2biases)
    
    pool2 = tf.nn.max_pool(bias2_2, [1,2,2,1], [1,2,2,1], padding='SAME')

    # conv3
    conv3_1 = tf.nn.conv2d(pool2, layer3_1weights, [1,1,1,1], padding='SAME')    
    bias3_1 = tf.nn.relu(conv3_1 + layer3_1biases)

    conv3_2 = tf.nn.conv2d(bias3_1, layer3_2weights, [1,1,1,1], padding='SAME')    
    bias3_2 = tf.nn.relu(conv3_2 + layer3_2biases)

    conv3_3 = tf.nn.conv2d(bias3_2, layer3_3weights, [1,1,1,1], padding='SAME')    
    bias3_3 = tf.nn.relu(conv3_3 + layer3_3biases)
    
    pool3 = tf.nn.max_pool(bias3_3, [1,2,2,1], [1,2,2,1], padding='SAME')

    # conv4
    conv4_1 = tf.nn.conv2d(pool3, layer4_1weights, [1,1,1,1], padding='SAME')    
    bias4_1 = tf.nn.relu(conv4_1 + layer4_1biases)

    conv4_2 = tf.nn.conv2d(bias4_1, layer4_2weights, [1,1,1,1], padding='SAME')    
    bias4_2 = tf.nn.relu(conv4_2 + layer4_2biases)

    conv4_3 = tf.nn.conv2d(bias4_2, layer4_3weights, [1,1,1,1], padding='SAME')    
    bias4_3 = tf.nn.relu(conv4_3 + layer4_3biases)
    
    pool4 = tf.nn.max_pool(bias4_3, [1,2,2,1], [1,2,2,1], padding='SAME')
    
    # shape = int(np.prod(pool4.get_shape()[1:]))
    shape = int(np.prod(pool4.get_shape()[1:]))

    # fully-connected layer
    # fc1
    # fc1w = tf.Variable(tf.truncated_normal(
    #   [shape, 4096], dtype=tf.float32, stddev=0.1))
    # fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32))
    # pool4_flat = tf.reshape(pool4, [-1, shape])    
    # fc1 = tf.nn.relu(tf.matmul(pool4_flat, fc1w) + fc1b)
    pool3_flat = tf.reshape(pool4, [-1, shape])    
    fc1 = tf.nn.relu(tf.matmul(pool3_flat, fc1w) + fc1b)
    drop1 = tf.nn.dropout(fc1, keep_prob)

    # fc2
    # fc2w = tf.Variable(tf.truncated_normal(
    #   [4096, 4096], dtype=tf.float32, stddev=0.1))
    # fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32))
#     fc2 = tf.nn.relu(tf.matmul(drop1, fc2w) + fc2b)
#     drop2 = tf.nn.dropout(fc2, keep_prob)

    # fc3
    # fc3w = tf.Variable(tf.truncated_normal(
    #   [4096, 1000], dtype=tf.float32, stddev=0.1))
    # fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32))
    # fc3 = tf.nn.relu(tf.matmul(fc2, fc3w) + fc3b)
#     fc3 = tf.nn.relu(tf.matmul(drop1, fc3w) + fc3b)
#     drop3 = tf.nn.dropout(fc3, keep_prob)

    # fc4
    # fc4w = tf.Variable(tf.truncated_normal(
    #   [1000, 2], dtype=tf.float32, stddev=0.1))
    # fc4b = tf.Variable(tf.constant(1.0, shape=[2], dtype=tf.float32))
    return tf.matmul(drop1, fc4w) + fc4b
  
  # Training computation.
  logits = model(tf_train_dataset, drop_out)
  loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    
  # Optimizer.
  learning_rate = tf.train.exponential_decay(1e-5, global_step, 1000, 0.85, staircase=True)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
  
  # Predictions for the training, validation, and test data.
  train_prediction = tf.nn.softmax(logits)
  valid_prediction = tf.nn.softmax(model(tf_valid_dataset, 1.0))
  test_prediction = tf.nn.softmax(model(tf_test_dataset, 1.0))

num_steps = 20001

with tf.Session(graph=graph) as session:
  tf.initialize_all_variables().run()
  print('Initialized')
  for step in range(num_steps):
    offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
    batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
    batch_labels = train_labels[offset:(offset + batch_size), :]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions = session.run(
      [optimizer, loss, train_prediction], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), valid_labels))
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))

Instructions for updating:
Use `tf.global_variables_initializer` instead.
Initialized
Minibatch loss at step 0: 10884.597656
Minibatch accuracy: 31.2%
Validation accuracy: 50.0%
Minibatch loss at step 50: 164.331238
Minibatch accuracy: 50.0%
Validation accuracy: 49.8%
Minibatch loss at step 100: 115.626846
Minibatch accuracy: 62.5%
Validation accuracy: 50.0%
Minibatch loss at step 150: 105.265442
Minibatch accuracy: 50.0%
Validation accuracy: 50.2%
Minibatch loss at step 200: 53.830330
Minibatch accuracy: 68.8%
Validation accuracy: 52.0%
Minibatch loss at step 250: 128.446320
Minibatch accuracy: 37.5%
Validation accuracy: 50.0%
Minibatch loss at step 300: 73.770050
Minibatch accuracy: 62.5%
Validation accuracy: 54.2%
Minibatch loss at step 350: 87.293640
Minibatch accuracy: 62.5%
Validation accuracy: 50.3%
Minibatch loss at step 400: 32.292915
Minibatch accuracy: 62.5%
Validation accuracy: 60.0%
Minibatch loss at step 450: 32.448914
Minibatch accuracy: 75.0%
Validation accuracy: 49.5%


Validation accuracy: 73.2%
Minibatch loss at step 4350: 7.420481
Minibatch accuracy: 62.5%
Validation accuracy: 73.8%
Minibatch loss at step 4400: 8.748432
Minibatch accuracy: 68.8%
Validation accuracy: 74.0%
Minibatch loss at step 4450: 6.192060
Minibatch accuracy: 62.5%
Validation accuracy: 71.3%
Minibatch loss at step 4500: 8.558382
Minibatch accuracy: 56.2%
Validation accuracy: 71.8%
Minibatch loss at step 4550: 9.273440
Minibatch accuracy: 56.2%
Validation accuracy: 72.2%
Minibatch loss at step 4600: 4.444213
Minibatch accuracy: 81.2%
Validation accuracy: 75.3%
Minibatch loss at step 4650: 10.172523
Minibatch accuracy: 62.5%
Validation accuracy: 74.0%
Minibatch loss at step 4700: 7.580828
Minibatch accuracy: 68.8%
Validation accuracy: 74.0%
Minibatch loss at step 4750: 4.831738
Minibatch accuracy: 75.0%
Validation accuracy: 73.7%
Minibatch loss at step 4800: 6.579143
Minibatch accuracy: 75.0%
Validation accuracy: 73.8%
Minibatch loss at step 4850: 10.035216
Minibatch accuracy: 62.

The result is getting better. But this net has so many meta parameters to tuning: initial learning rate, learning rate decay, decay step, decay rate, batch size, weight initialization, ... Basically I just set it by guessing. Maybe I should find a way to fine tuning all this parameters.

I will do so in a next version.