This project is from one of my assignments for STAT5242 Advanced Machine Learning. The task is to use a small collection of photos (500 to be exact) to construct a flower-image classifier capable of discerning differences between daisies, roses, dandelions, sunflowers, and tulips. <br>
My approach was to leverage what a pre-trained convolutional neural network has already learned about important image features from the imagenet dataset. Then I removed the last layer, fixed the weights of the remaining layers, used what remains as a black-box function transforming images into derived feature vectors, and finally fitted a new classifier on the derived feature vectors.

In [1]:
import tensorflow as tf
import numpy as np
import os
import os.path
import transfer_learning

  return f(*args, **kwds)


In [2]:
INCEPTION_LOG_DIR = '/Users/youyang/tmp/inception_v3_log'
if not os.path.exists(INCEPTION_LOG_DIR):
    os.makedirs(INCEPTION_LOG_DIR)

In [3]:
training_images, testing_images, label_maps = transfer_learning.create_image_lists('./data/flower_photos',testing_percentage=10, max_number_images=100)

INFO:tensorflow:Looking for images in 'daisy'
INFO:tensorflow:Looking for images in 'dandelion'
INFO:tensorflow:Looking for images in 'roses'
INFO:tensorflow:Looking for images in 'sunflowers'
INFO:tensorflow:Looking for images in 'tulips'


In [7]:
graph, bottleneck, resized_input, softmax = transfer_learning.create_model()

In [9]:
with graph.as_default():
    jpeg_data, decoded_image = transfer_learning.make_jpeg_decoding()
    
    with tf.Session() as sess:
        summary_writer = tf.summary.FileWriter('/Users/youyang/tmp/inception_v3_log', graph)

In [10]:
def compute_bottleneck(session, image_data):
    
   
    jpeg_data_tensor = session.run(decoded_image, feed_dict={jpeg_data: image_data})
    bottleneck_tensor = session.run(bottleneck, feed_dict = {resized_input: jpeg_data_tensor})   

    return bottleneck_tensor

In [11]:
with graph.as_default():
    with tf.Session() as session:
        transfer_learning.cache_bottlenecks(compute_bottleneck, session, training_images)

Saved 1/456 bottlenecks
Saved 21/456 bottlenecks
Saved 41/456 bottlenecks
Saved 61/456 bottlenecks
Saved 81/456 bottlenecks
Saved 101/456 bottlenecks
Saved 121/456 bottlenecks
Saved 141/456 bottlenecks
Saved 161/456 bottlenecks
Saved 181/456 bottlenecks
Saved 201/456 bottlenecks
Saved 221/456 bottlenecks
Saved 241/456 bottlenecks
Saved 261/456 bottlenecks
Saved 281/456 bottlenecks
Saved 301/456 bottlenecks
Saved 321/456 bottlenecks
Saved 341/456 bottlenecks
Saved 361/456 bottlenecks
Saved 381/456 bottlenecks
Saved 401/456 bottlenecks
Saved 421/456 bottlenecks
Saved 441/456 bottlenecks
Done computing bottlenecks!


In [12]:
training_data_set = transfer_learning.create_training_dataset(training_images)

In [13]:
def make_final_layers(bottleneck_tensor, num_classes):
    bottleneck_tensor_size = int(bottleneck.shape[1])
    
    with tf.variable_scope('input'):
        # This is the input for the bottleneck. 
        bottleneck_input = tf.placeholder_with_default(
            bottleneck_tensor,
            [None, bottleneck_tensor_size],
            'bottleneck_input')
        
        # This is the input for the label (integer, 1 to number of classes)
        label_input = tf.placeholder(tf.int64, [None], name='label_input')
        
    # Define weights, biases, and logit transforms
    logits = tf.layers.dense(bottleneck_input, num_classes)
    # Compute the cross entropy loss
    loss = tf.losses.sparse_softmax_cross_entropy(labels=label_input, logits=logits)
    # Create a summary for the loss
    loss_summary = tf.summary.scalar('cross_entropy', loss)
    # Create a Gradient Descent Optimizer
    optimizer = tf.train.GradientDescentOptimizer(0.1)
    # Obtain a function which performs a single training step
    train_step = optimizer.minimize(loss)
    
    return bottleneck_input, label_input, logits, train_step, loss_summary


In [14]:
def compute_accuracy(labels, logits):
    """Compute the accuracy for the predicted output.
    
    Parameters
    ----------
    labels: The input labels (in a one-hot encoded fashion).
    predicted_output: The predicted class probability for each output.
    
    Returns
    -------
    A tensor representing the accuracy.
    """
    with tf.name_scope('accuracy'):
        
        predicted_output = tf.argmax(logits, 1, name = 'pred_class')
    
        label_onehot = tf.one_hot(labels, depth = 5)
        true_label = tf.argmax(label_onehot, 1, name = 'true_class')
        accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted_output, true_label), tf.float32))
    
    accuracy_summary = tf.summary.scalar('accuracy_summary', accuracy)
    
    return accuracy, accuracy_summary

In [15]:
with graph.as_default():
    bottleneck_input, label_input, logits, train_step, loss_summary = make_final_layers(bottleneck, len(label_maps))
    accuracy, accuracy_summary = compute_accuracy(label_input, logits)
    summary_op = tf.summary.merge([loss_summary, accuracy_summary])

In [16]:
def execute_train_step(session: tf.Session, summary_writer: tf.summary.FileWriter, current_step: int):
    """This function runs a single training step.
    
    Parameters
    ----------
    session: the tensorflow session to use to run the training step.
    summary_writer: the summary file writer to write your summaries to.
    current_step: the current step count (starting from zero)
    """
    _, ac, summary = session.run((train_step, accuracy, summary_op),
                       feed_dict={bottleneck_input: training_data_set['bottlenecks'],
                                  label_input: training_data_set['labels']
                                 })
    
    summary_writer.add_summary(summary, current_step)
    
    if current_step % 10 == 0:
        print('Accuracy at step {0} is {1}'.format(current_step, ac))

In [17]:
def evaluate_images(session: tf.Session, images_jpeg_data: [bytes], labels: [int]):
    """This function will evaluate the accuracy of our model on the specified data.
    
    Parameters
    ----------
    session: the tensorflow session to use to run the evaluation step.
    images_jpeg_data: a list of strings, with each element in the list corresponding
        to the jpeg-encoded data for a given image
    labels: a list of integers, with each element in the list corresponding to the label
        of a given image.
    
    Returns
    -------
    This function should return the accuracy as a floating point number between
    0 and 1 (proportion of correctly classified instances).
    """
    correct = []
    
    for label, jpeg in zip(labels, images_jpeg_data):
        image_data = session.run(decoded_image, feed_dict={jpeg_data: jpeg})
        ac = session.run(accuracy, feed_dict={resized_input: image_data, label_input: [label]})
        correct.append(ac)
    
    return np.mean(correct)

In [18]:
with graph.as_default():
    with tf.Session() as session:
        print('------------- Starting training ----------------')
        session.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter(os.path.join(INCEPTION_LOG_DIR, 'retrained'), graph)
        for i in range(100):
            execute_train_step(session, summary_writer, i)
        summary_writer.close()  
        print('------------- Training done! -------------------')
        print('---------- Loading testing data ----------------')
        tlabels, timages = transfer_learning.get_testing_data(testing_images)
        print('----------- Evaluating on testing --------------')
        
        eval_accuracy = evaluate_images(session, timages, tlabels)
        print('Evaluation accuracy was: {0}'.format(eval_accuracy))

------------- Starting training ----------------
Accuracy at step 0 is 0.2017543911933899
Accuracy at step 10 is 0.5197368264198303
Accuracy at step 20 is 0.7039473652839661
Accuracy at step 30 is 0.9320175647735596
Accuracy at step 40 is 0.9451754093170166
Accuracy at step 50 is 0.9583333134651184
Accuracy at step 60 is 0.9758771657943726
Accuracy at step 70 is 0.9758771657943726
Accuracy at step 80 is 0.9824561476707458
Accuracy at step 90 is 0.9868420958518982
------------- Training done! -------------------
---------- Loading testing data ----------------
----------- Evaluating on testing --------------
Evaluation accuracy was: 0.9090909361839294
