In [91]:
import tensorflow as tf
import xml
import numpy as np
from pylab import *
%matplotlib inline
import xml.etree.ElementTree as ET
import cv2
import os
from matplotlib import patches
from random import shuffle, randint

In [3]:
# Imagenet Data import
home_dir = os.path.expanduser('~') + "/"
datasets_dir = home_dir + "external_drive/"
imagenet_dir = datasets_dir + "imagenet/"

# Get list of training images
train_filenames = []
with open(imagenet_dir + "2012_train_filenames.txt", "r") as file:
    for line in file:
        train_filenames.append(line.strip())    

In [4]:
# Get all class names from filenames, and index them for use in one-hot labeling
classes = set()
for filename in train_filenames:
    classes.add(filename.split("_")[0])
index_to_class = list(classes)
class_to_index = {}
for index, classname in enumerate(index_to_class):
    class_to_index[classname] = index

In [None]:
%%timeit
# How long does it take to load a batch?
x_1, y_1 = get_batch(train_filenames, batch_size, image_size, num_classes)

In [103]:
%%time
for x in range(10):
    i = randint(0, len(train_filenames))
    filename = train_filenames[i]
    image = read_jpeg(filename)
    #image = crop_resize_image(image, 224)

CPU times: user 92 ms, sys: 4 ms, total: 96 ms
Wall time: 359 ms


In [85]:
def crop_resize_image(image, desired_dimension):
    width, height, depth = image.shape
    if width >= height:
        cropped_image = image[int(width / 2) - int(height/2):int(width / 2) + int(height/2),:,:]
    else:
        cropped_image = image[:,int(height / 2) - int(width/2):int(height / 2) + int(width/2),:]
    return cv2.resize(cropped_image, (desired_dimension, desired_dimension))

# Returns a [height, width, depth] image in RGB pixel order
def read_jpeg(filename):
    image = cv2.imread(imagenet_dir + "2012_train/{}".format(filename))
    if image is None:
        print("Failed to read image! This is probably bad")
    # Convert BGR to RGB. 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Subtract image means from all of imagenet. I think these are RGB, but might be BGR.....
    image = image - np.array([104, 116, 122])
    image = image / 255.0
    return  image

# Plots an image from either filename or numpy array. 
def show_image(image):
    if isinstance(image, str):
        image = read_jpeg(image)
    fig = plt.figure()
    ax = fig.add_subplot(111, aspect='equal')
    ax.imshow(image)

image_index = 0
# Gets a batch of images from a given list of filenames
def get_batch(filenames, num_images, img_size, num_classes):
    global image_index
    #shuffle(filenames)
    images = []
    classes = []
    if image_index > len(filenames) - 2 * num_images:
        print("Finished epoch, shuffling filenames!")
        shuffle(filenames)
        image_index = 0
    while len(images) < num_images:
        filename = filenames[image_index]
        image = read_jpeg(filename)
        image = crop_resize_image(image, img_size)
        images.append(image)
        classes.append(class_to_index[filename.split("_")[0]])
        image_index += 1
    return np.array(images), dense_to_one_hot(np.array(classes), num_classes)

def dense_to_one_hot(labels_dense, num_classes=10):
    """Convert class labels from scalars to one-hot vectors."""
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

In [80]:
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.01)
    return tf.Variable(initial)

def bias_variable(shape, value):
    initial = tf.constant(float(value), shape=shape)
    return tf.Variable(initial)

def conv2d(x, W, stride):
    return tf.nn.conv2d(x, W, strides=[1, stride, stride, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [81]:
# Need to implement image resizing!!! (and maybe also various augmentations)
# Bring from 0, 255 to 0, 1
# Subtract mean
# Then, bounding boxes will need re-scaled also..

In [82]:
# TURN THESE INTO TENSORFLOW VARIABLES SO THEY CAN BE DYNAMICALLY CHANGED

# The width and height of the image
image_size = 224 # Must be divisible by the pooling layers
# Image depth
image_depth = 3
# The batch size
batch_size = 128
test_batch_size = 256
# number of classes
num_classes = 1000


In [9]:
# Something like Alexnet
sess = tf.InteractiveSession()

# Placeholder for the image size
#image_size_variable = tf.placeholder(tf.float32)

# Batched input
x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, image_depth]) # batch size, image size, image size, image depth
y_ = tf.placeholder(tf.float32, shape=[None, num_classes]) # batch size, num_classes

# First Convolutional Layer
# Variables
W_conv1 = weight_variable([11, 11, image_depth, 96]) # filter size, filter size, input channels (image depth), output channels
b_conv1 = bias_variable([96], 0)
# Layers
h_conv1 = tf.nn.relu(conv2d(x, W_conv1, stride=4) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# Second Convolutional Layer
# Variables
W_conv2 = weight_variable([5, 5, 96, 256])
b_conv2 = bias_variable([256], 1)
# Layers
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2, stride=1) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# Third Convolutional Layer
# Variables
W_conv3 = weight_variable([3, 3, 256, 384])
b_conv3 = bias_variable([384], 0)
# Layers
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3, stride=1) + b_conv3)

# Fourth Convolutional Layer
# Variables
W_conv4 = weight_variable([3, 3, 384, 384])
b_conv4 = bias_variable([384], 1)
# Layers
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4, stride=1) + b_conv4)

# Fifth Convolutional Layer
# Variables
W_conv5 = weight_variable([3, 3, 384, 256])
b_conv5 = bias_variable([256], 1)
# Layers
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5, stride=1) + b_conv5)
h_pool5 = max_pool_2x2(h_conv5)

# Fully Connected 1
# Weights
W_fc1 = weight_variable([int(image_size/32) * int(image_size/32) * 256, 4096])
b_fc1 = bias_variable([4096], 1)
# Layers
h_pool5_flat = tf.reshape(h_pool5, [-1, int(image_size/32) * int(image_size/32)*256])
h_fc1 = tf.nn.relu(tf.matmul(h_pool5_flat, W_fc1) + b_fc1)

# Fully Connected 2
# Weights
W_fc2 = weight_variable([4096, 4096])
b_fc2 = bias_variable([4096], 1)
# Layers
h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)

# Dropout (fix this and apply to both layers)
keep_prob = tf.placeholder(tf.float32)
#h_fc2_drop = tf.nn.dropout(h_fc2, keep_prob)

# Softmax
# Weights
W_fc3 = weight_variable([4096, num_classes])
b_fc3 = bias_variable([num_classes], 0)
# Layers
y_conv=tf.nn.softmax(tf.matmul(h_fc2, W_fc3) + b_fc3)


In [10]:
  
#cross_entropy = -tf.reduce_sum(y_*tf.log(y_conv))
cross_entropy = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y_conv,1e-10,1.0)))

learning_rate = tf.placeholder(tf.float32)
train_step = tf.train.MomentumOptimizer(learning_rate, .9).minimize(cross_entropy)
#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
#train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)


correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [11]:
# Make summary of cross entropy loss
ce_summ = tf.scalar_summary("cross entropy", cross_entropy)

# Make a summary of training accuracy
#correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
#accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
accuracy_summary = tf.scalar_summary("accuracy", accuracy)

# Learning rate summary
learning_rate_summary = tf.scalar_summary("learning rate", learning_rate)

# Merge all the summaries and write them out to /tmp/mnist_logs
merged = tf.merge_all_summaries()


In [12]:
sess.run(tf.initialize_all_variables())
i = 0
save_name = "imagenet_1"
writer = tf.train.SummaryWriter(home_dir + "projects/deep_learning/tensorflow/tmp/mnist_logs/"+save_name, sess.graph_def, flush_secs=10)

In [None]:
learning_rate_value = .01
dropout = .5

while(True):
    i += 1
    if i%10 == 0:
        
        batch = get_batch(train_filenames, test_batch_size, image_size, num_classes)
        #train_accuracy = accuracy.eval(feed_dict={x:batch[0], y_: batch[1], keep_prob: 1.0})
        #print("step %d, training accuracy %g"%(i, train_accuracy))
        #print(sess.run([cross_entropy], feed_dict={x: batch[0], y_: batch[1], keep_prob: 1})[0])
        summary, accuracy_value = sess.run([merged, accuracy], 
                    feed_dict={x: batch[0], y_: batch[1], keep_prob: 1., learning_rate: learning_rate_value})
        #print(summary)
        writer.add_summary(summary, i)
        print("Accuracy at step %s: %s" % (i, accuracy_value))
    else:
        batch = get_batch(train_filenames, batch_size, image_size, num_classes)
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: dropout, learning_rate: learning_rate_value})

#print("test accuracy %g"%accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0}))

Accuracy at step 20: 0.0
Accuracy at step 30: 0.0
Accuracy at step 40: 0.0
Accuracy at step 50: 0.0
Accuracy at step 60: 0.0
Accuracy at step 70: 0.0
Accuracy at step 80: 0.0
Accuracy at step 90: 0.0
Accuracy at step 100: 0.0
Accuracy at step 110: 0.0
Accuracy at step 120: 0.0
Accuracy at step 130: 0.0
Accuracy at step 140: 0.0
Accuracy at step 150: 0.0
Accuracy at step 160: 0.0
Accuracy at step 170: 0.0
Accuracy at step 180: 0.0
Accuracy at step 190: 0.0
Accuracy at step 200: 0.0
Accuracy at step 210: 0.0
Accuracy at step 220: 0.0
Accuracy at step 230: 0.0
Accuracy at step 240: 0.0
Accuracy at step 250: 0.0
Accuracy at step 260: 0.0
Accuracy at step 270: 0.0
Accuracy at step 280: 0.0
Accuracy at step 290: 0.0
Accuracy at step 300: 0.0
Accuracy at step 310: 0.0
Accuracy at step 320: 0.0
Accuracy at step 330: 0.0
Accuracy at step 340: 0.0
Accuracy at step 350: 0.0
Accuracy at step 360: 0.0
Accuracy at step 370: 0.0
Accuracy at step 380: 0.0
Accuracy at step 390: 0.0
Accuracy at step 400

In [None]:
def one_hot_to_class(one_hot):
    for key, value in enumerate(one_hot):
        if value > 0:
            return indexes_to_classes[key]

In [None]:
x_1, y_1 = get_batch(train_filenames, batch_size, image_size, num_classes, True)
results = y_conv.eval(feed_dict={x: x_1, y_: y_1, keep_prob: 1., })

In [None]:
for a in range(20):
    show_image(x_1[a,:,:,:])
    print("Actual: " + one_hot_to_class(y_1[a]))
    print("Guess: " + one_hot_to_class(results[a]))

In [15]:
%%timeit
x_1, y_1 = get_batch(train_filenames, batch_size, image_size, num_classes)

1 loop, best of 3: 3.16 s per loop


In [None]:
# Weight decay, batch normalization, bigger network, different filter sizes, inception filters
# sgd with momentum seems standard

In [None]:
# Simple network definition (intended for CIFAR-10?)
sess = tf.InteractiveSession()

# Placeholder for the image size
#image_size_variable = tf.placeholder(tf.float32)

# Batched input
x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, image_depth]) # batch size, image size, image size, image depth
y_ = tf.placeholder(tf.float32, shape=[None, num_classes]) # batch size, num_classes

W_conv1 = weight_variable([5, 5, image_depth, 32]) # filter size, filter size, input channels (image depth), output channels
b_conv1 = bias_variable([32])

#x_image = tf.reshape(x, [batch_size,28,28,1])

h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([int(image_size/4) * int(image_size/4) * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, int(image_size/4) * int(image_size/4)*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Softmax
W_fc2 = weight_variable([1024, num_classes])
b_fc2 = bias_variable([num_classes])

y_conv=tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)


In [88]:
%%timeit
# How long does it take to load a batch?
x_1, y_1 = get_batch(train_filenames, batch_size, image_size, num_classes)

1 loop, best of 3: 1.82 s per loop


In [89]:
image_index

1536