In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range


pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)


In [2]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale

import numpy as np

def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

Training set (200000, 28, 28, 1) (200000, 10)
Validation set (10000, 28, 28, 1) (10000, 10)
Test set (10000, 28, 28, 1) (10000, 10)


In [3]:
# Utils for Tensorboard
def variable_summaries(var):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.scalar_summary('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.scalar_summary('stddev', stddev)
        tf.scalar_summary('max', tf.reduce_max(var))
        tf.scalar_summary('min', tf.reduce_min(var))
#        tf.summary.histogram('histogram', var)



In [43]:
batch_size = 64
patch_size = 5
depth = 3
num_hidden = 64
learning_rate=0.05

conv_stride=1
maxpool_ksize=2
maxpool_stride=2
maxpool_padding='VALID'



graph = tf.Graph()

with graph.as_default():
    
    # learning rate
    learning_rate=0.1
    decay_rate=0.9
    decay_steps=1000
    global_step = tf.Variable(0)  # count the number of steps taken.
    decayed_learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, global_step=global_step, 
                                                   decay_steps=decay_steps, decay_rate=decay_rate, staircase=True )


    # Input data.
    tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels),name="train_data")
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels),name="train_labels")
    tf_valid_dataset = tf.constant(valid_dataset, name="valid_data")
    tf_test_dataset = tf.constant(test_dataset,name="test_data")
  
    # Variables.
    layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1),name="layer1_weight")
    layer1_biases = tf.Variable(tf.zeros([depth]))
    layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
    layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
    layer3_weights = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden], stddev=0.1))
    layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
    layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
    layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
  
    # Model.
    def model_2(data,train=True):
        #print("initial data shape:%s"%data.get_shape().as_list())
        
        with tf.name_scope("hidden_1"):
            conv = tf.nn.conv2d(data, layer1_weights, [1, conv_stride, conv_stride, 1], padding='SAME')
            hidden=tf.nn.relu(conv + layer1_biases)
            if train:
                variable_summaries(layer1_weights,"layer1_weights")
                variable_summaries(layer1_biases,"layer1_biases")
                
                grid = put_kernels_on_grid (layer1_weights)
                tf.summary.image('layer1_weights', grid)
            
            
        
        with tf.name_scope("maxpool_1"):
            maxpool=tf.nn.max_pool(hidden,ksize=[1, maxpool_ksize, maxpool_ksize, 1],
                                   strides=[1, maxpool_stride, maxpool_stride, 1], padding=maxpool_padding)
        
        with tf.name_scope("hidden_2"):
            #print("shape after first max pool:%s"%maxpool.get_shape().as_list())
            conv = tf.nn.conv2d(maxpool, layer2_weights, [1, conv_stride, conv_stride, 1], padding='SAME')
            hidden=tf.nn.relu(conv + layer2_biases)
            #hidden = tf.Variable(tf.nn.relu(conv + layer2_biases),name="hidden_2")
        #print("shape after second conv:%s"%hidden.get_shape().as_list())
        maxpool=tf.nn.max_pool(hidden,ksize=[1, maxpool_ksize, maxpool_ksize, 1], 
                               strides=[1, maxpool_stride, maxpool_stride, 1], padding=maxpool_padding)
        #print("shape after second max pool:%s"%maxpool.get_shape().as_list())
        shape = maxpool.get_shape().as_list()
        #print("shape before flattening:%s"%shape)
        #print("shape of weights on layer 3 (outdated):%s"%layer3_weights.get_shape().as_list())
        reshape = tf.reshape(maxpool, [shape[0], shape[1] * shape[2] * shape[3]])
        #print("current shape after pooling and flattening:%s"% reshape.get_shape().as_list())
        hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
        return tf.matmul(hidden, layer4_weights) + layer4_biases
  
    # Training computation.
    logits = model_2(tf_train_dataset)
    
    with tf.name_scope('total'):
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
    
    tf.summary.scalar('loss',loss)
    
    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=decayed_learning_rate).minimize(loss,global_step=global_step)
  
    
    # Predictions for the training, validation, and test data.
    train_prediction = tf.nn.softmax(logits)
    
    correct_train_predictions=tf.equal(tf.argmax(train_prediction, 1), tf.argmax(tf_train_labels, 1))
    with tf.name_scope('accuracy_train'):
        train_accuracy = tf.reduce_mean(tf.cast(correct_train_predictions, tf.float32))

    valid_prediction = tf.nn.softmax(model_2(tf_valid_dataset,train=False))
    correct_valid_predictions=tf.equal(tf.argmax(valid_prediction, 1), tf.argmax(valid_labels, 1))
    with tf.name_scope('accuracy_valid'):
        valid_accuracy=tf.reduce_mean(tf.cast(correct_valid_predictions, tf.float32))
            
    tf.summary.scalar('accuracy_train',train_accuracy)
    tf.summary.scalar('accuracy_valid',valid_accuracy)

    test_prediction = tf.nn.softmax(model_2(tf_test_dataset))

    merged = tf.summary.merge_all()
    init_op = tf.global_variables_initializer()

Who would enter a prime number of filters
grid: 3 = (1, 3)
Who would enter a prime number of filters
grid: 3 = (1, 3)


In [44]:
num_steps = 51

run_id="test44"
logdir='/Users/dabravolski/github/machine-learning/projects/deep_learning/logs'

with tf.Session(graph=graph) as session:
    session.run(init_op)
    train_writer = tf.summary.FileWriter(logdir_util(logdir,run_id),session.graph)

    print('Initialized')
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        summary,_,l, predictions = session.run([merged,optimizer, loss, valid_prediction], feed_dict=feed_dict)
        train_writer.add_summary(summary, step)
        
        if (step % 10 == 0):
            #collect accuracies for every other step
            #valid_accuracy=accuracy(valid_prediction.eval(), valid_labels)
            #valid_accuracy=tf.summary.scalar('valid_accuracy',valid_accuracy)
            #train_writer.add_summary(valid_accuracy, step)
            print('Minibatch loss at step %d: %f' % (step, l))
            #print('Minibatch accuracy: %.1f%%' % train_accuracy)
            #print('Validation accuracy: %.1f%%' % valid_accuracy)
    #print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
    train_writer.close()

Initialized
Minibatch loss at step 0: 2.689489
Minibatch loss at step 10: 2.113735
Minibatch loss at step 20: 1.753071
Minibatch loss at step 30: 1.485521
Minibatch loss at step 40: 1.084816
Minibatch loss at step 50: 1.432543


In [7]:
import os
def logdir_util(parent_dir, run_id):
    new_dir=parent_dir+"/"+run_id
    if not os.path.exists(new_dir):
        os.makedirs(new_dir)
        return new_dir
    else:
        for dirpath, dirnames, files in os.walk(new_dir):
            if files:
                raise Exception("Non unique run id. Please make it unique.")
            if not files:
                return new_dir


In [29]:
def variable_summaries(var,subscope,scope="summaries"):
  """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
  with tf.name_scope(scope):
        with tf.name_scope(subscope):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
            with tf.name_scope('stddev'):
              stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)

In [40]:
from math import sqrt

def put_kernels_on_grid (kernel, pad = 1):

    '''Visualize conv. features as an image (mostly for the 1st layer).
    Place kernel into a grid, with some paddings between adjacent filters.
    Args:
      kernel:            tensor of shape [Y, X, NumChannels, NumKernels]
      (grid_Y, grid_X):  shape of the grid. Require: NumKernels == grid_Y * grid_X
                           User is responsible of how to break into two multiples.
      pad:               number of black pixels around each filter (between them)
    Return:
      Tensor of shape [(Y+2*pad)*grid_Y, (X+2*pad)*grid_X, NumChannels, 1].
    '''
    # get shape of the grid. NumKernels == grid_Y * grid_X
    def factorization(n):
        for i in range(int(sqrt(float(n))), 0, -1):
            if n % i == 0:
                if i == 1: print('Who would enter a prime number of filters')
                return (i, int(n / i))
    (grid_Y, grid_X) = factorization (kernel.get_shape()[3].value)
    print ('grid: %d = (%d, %d)' % (kernel.get_shape()[3].value, grid_Y, grid_X))

    x_min = tf.reduce_min(kernel)
    x_max = tf.reduce_max(kernel)

    kernel1 = (kernel - x_min) / (x_max - x_min)

    # pad X and Y
    x1 = tf.pad(kernel1, tf.constant( [[pad,pad],[pad, pad],[0,0],[0,0]] ), mode = 'CONSTANT')

    # X and Y dimensions, w.r.t. padding
    Y = kernel1.get_shape()[0] + 2 * pad
    X = kernel1.get_shape()[1] + 2 * pad

    channels = kernel1.get_shape()[2]

    # put NumKernels to the 1st dimension
    x2 = tf.transpose(x1, (3, 0, 1, 2))
    # organize grid on Y axis
    x3 = tf.reshape(x2, tf.pack([grid_X, Y * grid_Y, X, channels]))

    # switch X and Y axes
    x4 = tf.transpose(x3, (0, 2, 1, 3))
    # organize grid on X axis
    x5 = tf.reshape(x4, tf.pack([1, X * grid_X, Y * grid_Y, channels]))

    # back to normal order (not combining with the next step for clarity)
    x6 = tf.transpose(x5, (2, 1, 3, 0))

    # to tf.image_summary order [batch_size, height, width, channels],
    #   where in this case batch_size == 1
    x7 = tf.transpose(x6, (3, 0, 1, 2))

    # scaling to [0, 255] is not necessary for tensorboard
    return x7