This is a very basic tutorial on convolutional neural networks (ConvNets) in Tensorflow. The reason for this is that I had an extremely challenging time getting my head wrapped around it and wanted to help anyone out that was in a similar boat. This is not meant to be a be all end all on ConvNets but rather a supplimentary guide that is specific to Tensorflow. I will not use the mnist dataset but rather a dataset of my own images. The model code will probably be very familiar, as it will be pieced together from other examples and tutorials. This is both for time saving and also to help bridge the gap between various tutorials. Code and comments will be made about how to import your own images into a TFRecords file and the process of running it through a ConvNet. Current expectations is not to create a highly accurate model but rather a model that runs. In addition to TFRecords, this tutorial will also attempt to cover Tensorboards as well. 

## Resize images

Depending on your setup, you may not be able to train on full size images, so I wrote some code to help get all your images into one place.

In [1]:
# some of the initial imports
import PIL
from PIL import Image, ImageOps
from tqdm import tqdm
import pandas as pd
import os
# dataframe of filepaths & image labels, the base size to resize too, place to store the resized images
# TODO: make the cropping be dynamic
def image_resizer(df,size, filepath):
    open = 0
    closed = 0
    for i in tqdm(range(df.shape[0])):
        basewidth = size
        img = Image.open(df.ix[i,0])
        wpercent = (basewidth/float(img.size[0]))
        hsize = int((float(img.size[1])*float(wpercent)))
        img = img.resize((basewidth,hsize), PIL.Image.ANTIALIAS)
        #crop the sides off to make a square image
        img = ImageOps.fit(img, (100,100), Image.ANTIALIAS) #Change this if you want to crop to a different size
        label = df.ix[i,1]
        if label == str(0):
            closed += 1
            img.save(filepath + '/closed/' + str(label) + 'closed' + str(closed) + '.jpg')

        elif label == str(1):
            open += 1
            img.save(filepath + '/open/' + str(label) + 'open' + str(open) + '.jpg')

#path to all the full res pictures
filepath1 = '/media/mcamp/Local SSHD/Python Projects/Garage Door Project/KaicongWiFiCameraControl-master/images/open/home/'
filepath2 = '/media/mcamp/Local SSHD/Python Projects/Garage Door Project/KaicongWiFiCameraControl-master/images/open/away/'
filepath3 = '/media/mcamp/Local SSHD/Python Projects/Garage Door Project/KaicongWiFiCameraControl-master/images/closed/home/'
filepath4 = '/media/mcamp/Local SSHD/Python Projects/Garage Door Project/KaicongWiFiCameraControl-master/images/closed/away/'

#put all the paths+filenames into a list

image_files = os.listdir(filepath1)
filenames = []
filelables = []
for image in tqdm(image_files):
    image_file = os.path.join(filepath1, image)
    filelables.append('1')
    filenames.append(image_file)
image_files = os.listdir(filepath2)
for image in tqdm(image_files):
    image_file = os.path.join(filepath2, image)
    filelables.append('1')
    filenames.append(image_file)
image_files = os.listdir(filepath3)
for image in tqdm(image_files):
    image_file = os.path.join(filepath3, image)
    filelables.append('0')
    filenames.append(image_file)
image_files = os.listdir(filepath4)
for image in tqdm(image_files):
    image_file = os.path.join(filepath4, image)
    filelables.append('0')
    filenames.append(image_file)


df = pd.DataFrame({'file' : filenames, 'label' : filelables})
image_resizer(df, 150, '/home/mcamp/Documents/GarageImagesResized')

100%|██████████| 2345/2345 [00:00<00:00, 858033.92it/s]
100%|██████████| 3203/3203 [00:00<00:00, 878866.66it/s]
100%|██████████| 2155/2155 [00:00<00:00, 703895.73it/s]
100%|██████████| 4601/4601 [00:00<00:00, 616706.91it/s]
100%|██████████| 12304/12304 [02:02<00:00, 100.42it/s]


# Create Queue Text File

Simple code to throw all the filepaths and labels into a single text file (CSV)

In [2]:
#path to all the  pictures
filepathlist = ['/home/mcamp/Documents/GarageImagesResized/open/',
                '/home/mcamp/Documents/GarageImagesResized/closed/']

#put all the paths+filenames into a list
def q_file_maker(filepath):
    filenames = []
    for path in filepathlist:
        image_files = os.listdir(path)
        for image in tqdm(image_files):
            image_file = os.path.join(path, image)
            image_file = image_file + ',' + os.path.basename(os.path.normpath(path))
            # image_file = image_file
            filenames.append(image_file)

    #write
    new_writefile = open("queue.txt", "w") #dunno why I made it a "txt" and not a "csv" but whatevs
    for k in filenames:
        new_writefile.write("%s\n" % k)
    new_writefile.close()

q_file_maker(filepathlist)

100%|██████████| 5548/5548 [00:00<00:00, 170276.59it/s]
100%|██████████| 6756/6756 [00:00<00:00, 238512.51it/s]


# Make TFRecords Files

In [4]:
# Some additional imports
import pandas as pd  # I like pandas
import os
import numpy as np
import tensorflow as tf
from PIL import Image
from sklearn.model_selection import train_test_split

In [11]:
def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

#takes a csv file in the format of: path,label
def read_qfile(qfile):
    q = pd.read_csv(qfile, header = None)
    q.columns = ['path', 'label']
    labels = q.label.tolist()
    path = q.path.tolist()
    return path, labels

'''takes the list of plane text labels (ie Open, Closed) and converts to 1 and 0.
this will return an array of all the labels.
Example:
labels = ['open','open','open','closed','closed','open','open','closed']
int_classes = [0,0,0,1,1,0,0,1]
index = ['open', 'closed']'''
def label_to_int(labels=None, index=None): #labels is a list of labels
    class_index = index
    int_classes = []
    for label in labels:
      int_classes.append(class_index.index(label)) #the class_index.index() looks values up in the list label
    int_classes = np.array(int_classes, dtype=np.uint32)
    return int_classes

#just a helper function to read in all the images to an array
def read_images(pathlist=None):
    images = []
    labels = []
    for file in pathlist:
        im = Image.open(file)
        im = np.asarray(im, np.uint8)
        image_name = file.split('/')[-1].split('.')[0]
         if 'open' in image_name:
             image_name = 1
         elif 'closed' in image_name:
             image_name = 0
         else:
              image_name = 99
        images.append([image_name,im])
    images = sorted(images, key = lambda image: image[0])
    images_only = [np.asarray(image[1].flatten(), np.uint8) for image in images]
    images_only = np.array(images_only)
    labels_only = [np.asarray(key[0], np.int32) for key in images]
    labels_only = np.array(labels_only)
    return images_only, labels_only #flat images


def convert_to_TF(images, labels, name):
    label_count = labels.shape[0]
    print('There are %d images in this dataset.' % (label_count))
    if images.shape[0] != label_count:
        raise ValueError('WTF! Devil! There are %d images and %d labels. Go fix yourself!' %
                         (images.shape[0], label_count))
    #TODO: make this either dynamic or more easily changed
    rows = 100
    cols = 100
    depth = 3

    filename = os.path.join(name + '.tfrecords')
    print('Writing', filename)
    writer = tf.python_io.TFRecordWriter(filename)
    for index in range(label_count):
        image_raw = images[index].tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
            'height': _int64_feature(rows),
            'width': _int64_feature(cols),
            'depth': _int64_feature(depth),
            'label': _int64_feature(int(labels[index])),
            'image_raw': _bytes_feature(image_raw)}))
        writer.write(example.SerializeToString())

In [12]:
path, labels = read_qfile('queue.txt')
print(path[:5])
print(labels[:5])

['/home/mcamp/Documents/GarageImagesResized/open/1open3160.jpg', '/home/mcamp/Documents/GarageImagesResized/open/1open5530.jpg', '/home/mcamp/Documents/GarageImagesResized/open/1open4681.jpg', '/home/mcamp/Documents/GarageImagesResized/open/1open3971.jpg', '/home/mcamp/Documents/GarageImagesResized/open/1open2312.jpg']
['open', 'open', 'open', 'open', 'open']


In [13]:
X_train, X_test, y_train, y_test = train_test_split(
    path, labels, test_size=0.15)

label_index = ['open', 'closed'] #this is so we have a consistent labeling
X_train, y_train = read_images(pathlist=X_train)
print('Images:\n',X_train[:5])
X_test, y_test = read_images(pathlist=X_test)
print('Train_Size:', X_train.shape)
print('Test Size:', X_test.shape)
convert_to_TF(X_train, y_train, 'garage_door100_TRAIN')
convert_to_TF(X_test, y_test, 'garage_door100_TEST')

Labels:
 [0 1 0 1 1]
Image Shape: (10458, 30000)
Images:
 [[ 27  28  32 ...,  42  37  69]
 [ 32  32  32 ...,  42  36  64]
 [193 139 129 ...,   4   0   1]
 [196 145  98 ...,   4   0   1]
 [198 145 111 ...,   4   0   1]]
Image Shape: (1846, 30000)
Train_Size: (10458, 30000)
Test Size: (1846, 30000)
There are 10458 images in this dataset.
Writing garage_door100_TRAIN.tfrecords
There are 1846 images in this dataset.
Writing garage_door100_TEST.tfrecords


# Make Tensorflow ConvNet

In [18]:
n_classes = 2 # classes: open, closed
batch_size = 700 # play with this if you don't have much VRAM
#x is a placeholder for the images, so make it the size of a flattened image. 
#In my case it is 100x100x3 = 30000
x = tf.placeholder('float', [None, 30000], name='x-input')
y = tf.placeholder('float', name='y-input')

keep_rate = 0.8 # drop out threshold
keep_prob = tf.placeholder(tf.float32)
'''I found it easier to put the number of filters up front 
as it meant fewer numbers to change in the code.'''
CFilter1 = 16 
CFilter2 = 32
FCFilter = 720

# where is my data located
TRAIN_FILE = 'garage_door100_TRAIN.tfrecords'
VALIDATION_FILE = 'garage_door100_TEST.tfrecords'
train_dir = './'

The following two functions are simply used to import the TFRecords data, and put it into a queue on the graph. The functions out put Tensors that can not be directly fed into a feed_dict. This means they need to be run() or eval() first. One problem I ran into here was that only one place on the internet (so it seemed) said that you need to initialize local variables in order to get them to work. Long story short, I was making the data queue and it wasn't loading any data because it wasn't initialized. 

In [19]:
def read_and_decode(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        #if you want to import the other variables from the 
        #TFRecords you can, but they aren't needed
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
        })
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image.set_shape([30000])
    image = tf.cast(image, tf.float32) * (1. / 255) - 0.5
    label = tf.cast(features['label'], tf.int32)
    #Note: this returns Tensors not numpy arrays or lists...
    return image, label

In [20]:
def inputs(train_dir, train, batch_size, num_epochs, one_hot_labels=False):

    if not num_epochs: num_epochs = None
    filename = os.path.join(train_dir,
                            TRAIN_FILE if train else VALIDATION_FILE)

    with tf.name_scope('input'):
        filename_queue = tf.train.string_input_producer(
            [filename], num_epochs=num_epochs)

        image, label = read_and_decode(filename_queue)

        if one_hot_labels:
            label = tf.one_hot(label, 2, dtype=tf.int32)

        example_batch, label_batch = tf.train.shuffle_batch(
            [image, label], batch_size=batch_size, num_threads=1,
            capacity=1000, enqueue_many=False,
            # Ensures a minimum amount of shuffling of examples.
            min_after_dequeue=100)

    return example_batch, label_batch

# Make ConvNet

In [21]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#strides = [batch, x, y, depth] for some reason depth needs to be 1 even for color images

def maxpool2d(x):
    #                        size of window         movement of window
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


'''the weights format is: 
filter size, filter size, input size (3 here for RGB color), the number of filters
the number of filters is independent of the filter size. This was a major misconception I had
for a long time, and caused me to be lost. Think of the number of filters as the number of hidden nuerons 
in an MLP. The W_fc layer is 25 * 25 because that what size the image was reduced too by the convolution. 
The filter size must be able to fit accross the image space a whole number of times. 
The equation is  ((W−F+2P)/S)+1 with P =(F−1)/2. See http://cs231n.github.io/convolutional-networks/ for a 
detailed explaination of ConvNets.
'''
def convolutional_neural_network(x):
    weights = {'W_conv1': tf.Variable(tf.random_normal([5, 5, 3, CFilter1])),
               'W_conv2': tf.Variable(tf.random_normal([5, 5, CFilter1, CFilter2])),
               'W_fc': tf.Variable(tf.random_normal([25 * 25 * CFilter2, FCFilter])),
               'out': tf.Variable(tf.random_normal([FCFilter, n_classes]))}

    biases = {'b_conv1': tf.Variable(tf.random_normal([CFilter1])),
              'b_conv2': tf.Variable(tf.random_normal([CFilter2])),
              'b_fc': tf.Variable(tf.random_normal([FCFilter])),
              'out': tf.Variable(tf.random_normal([n_classes]))}

    x = tf.reshape(x, shape=[-1, 100,100, 3], name='X')

    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'], name='conv1')
    conv1 = maxpool2d(conv1)

    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'], name='conv2')
    conv2 = maxpool2d(conv2)

    fc = tf.reshape(conv2, [-1, 25 * 25 * CFilter2])

    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'], name='FC')
    fc = tf.nn.dropout(fc, keep_rate)

    output = tf.matmul(fc, weights['out']) + biases['out']

    return output

# Train ConvNet

In [22]:
def train_neural_network():
    prediction = convolutional_neural_network(x)
    # prediction = mlp(x)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction, y))
    optimizer = tf.train.AdamOptimizer().minimize(cost)

    hm_epochs = 100
    with tf.Session() as sess:
        example_batch, label_batch = inputs(train_dir, True, batch_size, hm_epochs, one_hot_labels=True)
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter('./train', sess.graph)
        test_writer = tf.summary.FileWriter('./test')
        testx, testy = inputs(train_dir, False, 100, 1, one_hot_labels=True)
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        for i in range(hm_epochs):

            epoch_loss = 0
            example, label = sess.run([example_batch, label_batch])


            _, c = sess.run([optimizer, cost], feed_dict={x: example, y: label})
            epoch_loss += c
            print(epoch_loss)
            if i % 10 == 0:

                correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
                accuracy = tf.reduce_mean(tf.cast(correct, 'float'))


                X, Y = sess.run([testx, testy])
                print('Accuracy:', accuracy.eval({x: X, y: Y}))
                summary, acc = sess.run([merged, accuracy], feed_dict={x: X, y: Y})
                test_writer.add_summary(summary, i)
            #TODO: Add/Fix Tensorboard
        coord.request_stop()
        coord.join(threads)

In [23]:
train_neural_network()

107817.53125
Accuracy: 0.56
32550.6953125
79816.03125
37529.1484375
29496.5859375
37192.5703125
34762.0234375
40861.1875
50435.8984375
49480.5546875
55593.1523438
Accuracy: 0.44
34304.8945312
29300.6289062
22512.359375
15767.9179688
37853.171875
35118.0585938
47607.046875
53573.140625
43093.9726562
47579.09375
Accuracy: 0.52
56849.1796875
56837.0664062
31376.8359375
45445.6875
75650.7421875
41951.9140625
30513.9921875
22845.3847656
22988.4003906
29384.6835938
Accuracy: 0.49
57064.9609375
27504.78125
49801.9765625
57483.78125
46560.125
66907.796875
71388.3671875
54984.8789062
32552.2070312
21879.5820312
Accuracy: 0.53
21122.1425781
24958.796875
26096.1113281
39626.7578125
59528.3945312
66790.4921875
52696.6132812
33177.734375
32352.296875
31832.1132812
Accuracy: 0.49
55996.9414062
46617.3203125
26349.6054688
38451.5820312
57167.5390625
81941.4375
70855.4609375
43071.703125
31429.3730469
23518.5917969
Accuracy: 0.51
20330.1054688
19476.0117188
17373.6328125
19110.9394531
22002.0566406
41