In [1]:
import tensorflow as tf
import xml
import numpy as np
from pylab import *
%matplotlib inline
import xml.etree.ElementTree as ET
import cv2
import os
from matplotlib import patches
from random import shuffle, randint

In [2]:
home_dir = os.path.expanduser('~') + "/"
datasets_dir = home_dir + "external_drive/"
cifar_dir = datasets_dir + "cifar10/cifar-10-batches-bin"

NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
min_fraction_of_examples_in_queue = 0.1

# TURN THESE INTO TENSORFLOW VARIABLES SO THEY CAN BE DYNAMICALLY CHANGED

# The width and height of the image
image_size = 24 # Must be divisible by the pooling layers
# Image depth
image_depth = 3
# The batch size
batch_size = 128
test_batch_size = 256
# number of classes
num_classes = 10

In [4]:

"""Reads and parses examples from CIFAR10 data files.
Recommendation: if you want N-way read parallelism, call this function
N times.  This will give you N independent Readers reading different
files & positions within those files, which will give better mixing of
examples.
Args:
filename_queue: A queue of strings with the filenames to read from.
Returns:
An object representing a single example, with the following fields:
  height: number of rows in the result (32)
  width: number of columns in the result (32)
  depth: number of color channels in the result (3)
  key: a scalar string Tensor describing the filename & record number
    for this example.
  label: an int32 Tensor with the label in the range 0..9.
  uint8image: a [height, width, depth] uint8 Tensor with the image data
"""
class CIFAR10Record(object):
    pass






"""Construct input for CIFAR evaluation using the Reader ops.
Args:
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
    
filenames = [os.path.join(cifar_dir, 'data_batch_%d.bin' % i)
             for i in range(1, 6)]

num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN

for f in filenames:
    if not tf.gfile.Exists(f):
        raise ValueError('Failed to find file: ' + f)
        
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames) # this doesn't work at the moment! bug?
"""filename_queue = tf.RandomShuffleQueue(1000, 1, tf.string)
enqueue_ops = []
for x in range(100):
    for item in filenames:
        enqueue_ops.append(filename_queue.enqueue(item))
"""


result = CIFAR10Record()

# Dimensions of the images in the CIFAR-10 dataset.
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
label_bytes = 1  # 2 for CIFAR-100
result.height = 32
result.width = 32
result.depth = 3
image_bytes = result.height * result.width * result.depth # 3072



# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
record_bytes = label_bytes + image_bytes #3073



# Read a record, getting filenames from the filename_queue.  No
# header or footer in the CIFAR-10 format, so we leave header_bytes
# and footer_bytes at their default of 0.
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
result.key, value = reader.read(filename_queue)



# Convert from a string to a vector of uint8 that is record_bytes long.
record_bytes = tf.decode_raw(value, tf.uint8)

# The first bytes represent the label, which we convert from uint8->int32.
result.label = tf.cast(
  tf.slice(record_bytes, [0], [label_bytes]), tf.int32)

# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]),
                       [result.depth, result.height, result.width])
# Convert from [depth, height, width] to [height, width, depth].
result.uint8image = tf.transpose(depth_major, [1, 2, 0])





# Read examples from files in the filename queue.
read_input = result
reshaped_image = tf.cast(read_input.uint8image, tf.float32)



height = image_size
width = image_size

# Image processing for evaluation.
# Crop the central [height, width] of the image.
resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                     width, height)

# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_whitening(resized_image)

# Ensure that the random shuffling has good mixing properties.

min_queue_examples = int(num_examples_per_epoch *
                       min_fraction_of_examples_in_queue)

num_preprocess_threads = 16
x, y_ = tf.train.shuffle_batch(
  [float_image, read_input.label],
  batch_size=batch_size,
  num_threads=num_preprocess_threads,
  capacity=min_queue_examples + 3 * batch_size,
  min_after_dequeue=min_queue_examples)

y_ = tf.reshape(y_, [batch_size])

# Display the training images in the visualizer.
tf.image_summary('images', x)



<tf.Tensor 'ImageSummary:0' shape=() dtype=string>

In [5]:
result.label

<tf.Tensor 'Cast:0' shape=(1,) dtype=int32>

In [7]:
tf.convert_to_tensor([1])

<tf.Tensor 'Const_1:0' shape=(1,) dtype=int32>

In [4]:
# Helper functions for defining networks
def weight_variable(shape, wd):
    initial = tf.truncated_normal(shape, stddev=0.01)
    weight_decay = tf.mul(tf.nn.l2_loss(initial), wd)
    tf.add_to_collection('losses', weight_decay)
    return tf.Variable(initial)

def bias_variable(shape, value):
    initial = tf.constant(float(value), shape=shape)
    return tf.Variable(initial)

def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [5]:
# Need to implement image resizing!!! (and maybe also various augmentations)
# Bring from 0, 255 to 0, 1
# Subtract mean
# Then, bounding boxes will need re-scaled also..

# Mostly, but not completely, done. Also, image means might be slightly wrong due to BGR vs RGB ordering

In [6]:
# Something like Alexnet
sess = tf.InteractiveSession()

# Placeholder for the image size
#image_size_variable = tf.placeholder(tf.float32)

# Batched input
#x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, image_depth], name="Input_Image_Batch") # batch size, image size, image size, image depth
#y_ = tf.placeholder(tf.float32, shape=[None, num_classes], name="Input_Classes") # batch size, num_classes

#x, y_ = inputs(cifar_dir, batch_size)

# First Convolutional Layer
# Variables
W_conv1 = weight_variable([5, 5, image_depth, 64], 0) # filter size, filter size, input channels (image depth), output channels
b_conv1 = bias_variable([64], 0)
# Layers
h_conv1 = tf.nn.relu(conv2d(x, W_conv1, stride=1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# Second Convolutional Layer
# Variables
W_conv2 = weight_variable([5, 5, 64, 64], 0)
b_conv2 = bias_variable([64], 0)
# Layers
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, stride=1) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# They did some tricky flattening shit here - might need to do that. Might also be faster that way. jk we might already do that

# Fully Connected 1
# Weights
W_fc1 = weight_variable([int(image_size/4) * int(image_size/4) * 64, 384], .004)
b_fc1 = bias_variable([384], 0)
# Layers
h_pool5_flat = tf.reshape(h_pool2, [-1, int(image_size/4) * int(image_size/4)*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool5_flat, W_fc1) + b_fc1)

# Fully Connected 2
# Weights
W_fc2 = weight_variable([384, 192], .004)
b_fc2 = bias_variable([192], 0)
# Layers
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)

# Softmax
# Weights
W_fc3 = weight_variable([192, num_classes], 0)
b_fc3 = bias_variable([num_classes], 0)
# Layers
softmax_linear = tf.matmul(h_fc2, W_fc3) + b_fc3
y_conv=tf.nn.softmax(softmax_linear)


In [7]:
#x, w, b = sess.run([h_pool5_flat, W_fc1, b_fc1])

In [8]:
#np.dot(x, w) + b

In [9]:
# Loss function 
#cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
#cross_entropy = -tf.reduce_sum(tf.cast(y_, tf.float32)*tf.log(tf.clip_by_value(y_conv,1e-10,1.0))) # this fix was needed for some reason..
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
      softmax_linear, tf.to_int64(y_), name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy_thingy')


# Learning rate 
learning_rate = tf.placeholder(tf.float32)

# Optimization Algorithm
#train_step = tf.train.MomentumOptimizer(learning_rate, .9).minimize(cross_entropy_mean)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy) #should be mean????
#train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# Accuracy function
#correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
correct_prediction = tf.nn.in_top_k(y_conv, y_, 1)
accuracy = tf.reduce_sum(tf.cast(correct_prediction, tf.int32)) / batch_size

In [10]:
# Make summary of cross entropy loss
tf.scalar_summary("cross entropy", cross_entropy_mean)

# Make a summary of training accuracy
tf.scalar_summary("accuracy", accuracy)

# Learning rate summary
tf.scalar_summary("learning rate", learning_rate)

# Make a bunch of histogram summaries
tf.histogram_summary("image inputs", x)
tf.histogram_summary("class inputs", y_)
tf.histogram_summary("conv1_weights", W_conv1)
tf.histogram_summary("conv1_biases", b_conv1)
tf.histogram_summary("conv2_weights", W_conv2)
tf.histogram_summary("conv2_biases", b_conv2)
tf.histogram_summary("fc1_weights", W_fc1)
tf.histogram_summary("fc1_biases", b_fc1)
tf.histogram_summary("fc2_weights", W_fc2)
tf.histogram_summary("fc2_biases", b_fc2)
tf.histogram_summary("softmax_weights", W_fc3)
tf.histogram_summary("softmax_biases", b_fc3)
tf.histogram_summary("conv1_outputs", h_conv1)
tf.histogram_summary("conv2_outputs", h_conv2)
tf.histogram_summary("pool1_outputs", h_pool1)
tf.histogram_summary("pool2_outputs", h_pool2)
tf.histogram_summary("fc1_outputs", h_fc1)
tf.histogram_summary("fc2_outputs", h_fc2)
tf.histogram_summary("softmax_linear_outputs", softmax_linear)
tf.histogram_summary("final_predictions", y_conv)

# Merge all the summaries and write them out to /tmp/mnist_logs
merged = tf.merge_all_summaries()


In [11]:
# Fix log directory name

In [12]:
sess.run(tf.initialize_all_variables())
tf.train.start_queue_runners(sess=sess)
i = 0
save_name = "cifar_7"
writer = tf.train.SummaryWriter(home_dir + "projects/deep_learning/tensorflow/tmp/alexnet_logs/"+save_name, sess.graph_def, flush_secs=10)
#saver = tf.train.Saver()

In [19]:
learning_rate_value = .0001
dropout = .5

while(True):
    i += 1
    if i%5 == 0:
        summary, accuracy_value = sess.run([merged, accuracy], 
                    feed_dict={learning_rate: learning_rate_value})
        writer.add_summary(summary, i)
        print("Accuracy at step %s: %s" % (i, accuracy_value))
    else:
        train_step.run(feed_dict={learning_rate: learning_rate_value})

Accuracy at step 10605: 1.0
Accuracy at step 10610: 1.0
Accuracy at step 10615: 1.0
Accuracy at step 10620: 1.0
Accuracy at step 10625: 1.0
Accuracy at step 10630: 1.0
Accuracy at step 10635: 1.0
Accuracy at step 10640: 1.0
Accuracy at step 10645: 1.0
Accuracy at step 10650: 1.0
Accuracy at step 10655: 1.0
Accuracy at step 10660: 1.0
Accuracy at step 10665: 1.0
Accuracy at step 10670: 1.0
Accuracy at step 10675: 1.0
Accuracy at step 10680: 1.0
Accuracy at step 10685: 1.0
Accuracy at step 10690: 1.0
Accuracy at step 10695: 1.0
Accuracy at step 10700: 1.0
Accuracy at step 10705: 1.0
Accuracy at step 10710: 1.0
Accuracy at step 10715: 1.0
Accuracy at step 10720: 1.0
Accuracy at step 10725: 1.0
Accuracy at step 10730: 1.0
Accuracy at step 10735: 1.0


KeyboardInterrupt: 

In [15]:
saver = tf.train.Saver()

In [21]:
# Save weights to file
save_number = 1
save_path = saver.save(sess, "tmp/checkpoints/{save_name}_{save_number}.ckpt".format(save_name=save_name, save_number=save_number))
print("Model saved in file: %s" % save_path)

Model saved in file: tmp/checkpoints/cifar_7_1.ckpt


In [None]:
# Restore weights from file
#saver.restore(sess, "/tmp/model.ckpt")

In [None]:
# TODOS
# Weight decay, batch normalization, bigger network, different filter sizes, inception filters
# sgd with momentum seems standard

In [None]:
def inputs(data_dir, batch_size):
    """Construct input for CIFAR evaluation using the Reader ops.
    Args:
    data_dir: Path to the CIFAR-10 data directory.
    batch_size: Number of images per batch.
    Returns:
    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
    """
    
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
                 for i in range(1, 6)]
    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
    
    for f in filenames:
        if not tf.gfile.Exists(f):
            raise ValueError('Failed to find file: ' + f)

    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames)

    # Read examples from files in the filename queue.
    read_input = read_cifar10(filename_queue)
    reshaped_image = tf.cast(read_input.uint8image, tf.float32)

    height = image_size
    width = image_size

    # Image processing for evaluation.
    # Crop the central [height, width] of the image.
    resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
                                                         width, height)

    # Subtract off the mean and divide by the variance of the pixels.
    float_image = tf.image.per_image_whitening(resized_image)

    # Ensure that the random shuffling has good mixing properties.
    
    min_queue_examples = int(num_examples_per_epoch *
                           min_fraction_of_examples_in_queue)

    # Generate a batch of images and labels by building up a queue of examples.
    return _generate_image_and_label_batch(float_image, read_input.label,
                                         min_queue_examples, batch_size)

In [None]:
def read_cifar10(filename_queue):
    """Reads and parses examples from CIFAR10 data files.
    Recommendation: if you want N-way read parallelism, call this function
    N times.  This will give you N independent Readers reading different
    files & positions within those files, which will give better mixing of
    examples.
    Args:
    filename_queue: A queue of strings with the filenames to read from.
    Returns:
    An object representing a single example, with the following fields:
      height: number of rows in the result (32)
      width: number of columns in the result (32)
      depth: number of color channels in the result (3)
      key: a scalar string Tensor describing the filename & record number
        for this example.
      label: an int32 Tensor with the label in the range 0..9.
      uint8image: a [height, width, depth] uint8 Tensor with the image data
    """
    class CIFAR10Record(object):
        pass
    result = CIFAR10Record()

    # Dimensions of the images in the CIFAR-10 dataset.
    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
    # input format.
    label_bytes = 1  # 2 for CIFAR-100
    result.height = 32
    result.width = 32
    result.depth = 3
    image_bytes = result.height * result.width * result.depth
    # Every record consists of a label followed by the image, with a
    # fixed number of bytes for each.
    record_bytes = label_bytes + image_bytes

    # Read a record, getting filenames from the filename_queue.  No
    # header or footer in the CIFAR-10 format, so we leave header_bytes
    # and footer_bytes at their default of 0.
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    result.key, value = reader.read(filename_queue)

    # Convert from a string to a vector of uint8 that is record_bytes long.
    record_bytes = tf.decode_raw(value, tf.uint8)

    # The first bytes represent the label, which we convert from uint8->int32.
    result.label = tf.cast(
      tf.slice(record_bytes, [0], [label_bytes]), tf.int32)

    # The remaining bytes after the label represent the image, which we reshape
    # from [depth * height * width] to [depth, height, width].
    depth_major = tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]),
                           [result.depth, result.height, result.width])
    # Convert from [depth, height, width] to [height, width, depth].
    result.uint8image = tf.transpose(depth_major, [1, 2, 0])

    return result


In [None]:

def _generate_image_and_label_batch(image, label, min_queue_examples,
                                    batch_size):
    """Construct a queued batch of images and labels.
    Args:
    image: 3-D Tensor of [height, width, 3] of type.float32.
    label: 1-D Tensor of type.int32
    min_queue_examples: int32, minimum number of samples to retain
      in the queue that provides of batches of examples.
    batch_size: Number of images per batch.
    Returns:
    images: Images. 4D tensor of [batch_size, height, width, 3] size.
    labels: Labels. 1D tensor of [batch_size] size.
    """
    # Create a queue that shuffles the examples, and then
    # read 'batch_size' images + labels from the example queue.
    num_preprocess_threads = 16
    images, label_batch = tf.train.shuffle_batch(
      [image, label],
      batch_size=batch_size,
      num_threads=num_preprocess_threads,
      capacity=min_queue_examples + 3 * batch_size,
      #min_after_dequeue=min_queue_examples
        min_after_dequeue=1)

    # Display the training images in the visualizer.
    tf.image_summary('images', images)

    #return images, tf.reshape(label_batch, [batch_size])
    return images, label_batch