In [1]:
import tensorflow as tf
import os
from datetime import datetime
import time
import numpy as np
import segmentation_input
import segmentation_model
from six.moves import xrange

In [2]:
PATH_TO_RECORD = os.path.join(segmentation_input.DIRECTORY,segmentation_input.TF_RECORDS)
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = segmentation_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
NUM_EPOCHS_PER_DECAY = 10
INITIAL_LEARNING_RATE = 1.0E-1
LEARNING_RATE_DECAY_FACTOR = 0.1
MOVING_AVERAGE_DECAY = 0.9
STEPS_PER_DISPLAY = 10
STEPS_PER_SUMMARY = 100
STEPS_PER_CHECKPT = 1000

FLAGS = tf.app.flags.FLAGS

tf.app.flags.DEFINE_string('train_dir', 'segmentation_train',
                           """Directory where to write event logs """
                           """and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 1500,
                            """Number of batches to run.""")
tf.app.flags.DEFINE_integer('batch_size', 32,
                            """Batch size.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
                            """Whether to log device placement.""")

In [3]:
def _add_loss_summaries(total_loss):
    """Add summaries for losses in CIFAR-10 model.
    Generates moving average for all losses and associated summaries for
    visualizing the performance of the network.
    Args:
        total_loss: Total loss from loss().
    Returns:
        loss_averages_op: op for generating moving averages of losses.
    """
    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Name each loss as '(raw)' and name the moving average version of the loss
        # as the original loss name.
        tf.scalar_summary(l.op.name +' (raw)', l)
        tf.scalar_summary(l.op.name, loss_averages.average(l))

    return loss_averages_op

def train_helper(total_loss, global_step):
    """Train segmentation model.
    Create an optimizer and apply to all trainable variables. Add moving
    average for all trainable variables.
    Args:
        total_loss: Total loss from loss().
        global_step: Integer Variable counting the number of training steps
          processed.
    Returns:
        train_op: op for training.
    """
    # Variables that affect learning rate.
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)
    tf.scalar_summary('learning_rate', lr)

    # Generate moving averages of all losses and associated summaries.
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)

    # Apply gradients.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.histogram_summary(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
        if grad is not None:
            tf.histogram_summary(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

In [4]:
def train():
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)
        
        images, labels = segmentation_input.distorted_inputs([PATH_TO_RECORD],FLAGS.batch_size)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = segmentation_model.inference(images,is_train=True)

        # Calculate loss.
        loss,acc = segmentation_model.loss_and_accuracy(logits, labels)
        
        # updates the model parameters.
        train_op = train_helper(loss, global_step)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.merge_all_summaries()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        
        # Start running operations on the Graph.
        sess = tf.Session()
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
        
        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value, acc_value = sess.run([train_op, loss, acc])
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
            if step % STEPS_PER_DISPLAY == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = ('%s: step %d, loss = %.2f, accuracy = %.2f, (%.1f examples/sec; %.3f '
                              'sec/batch)')
                print (format_str % (datetime.now(), step, loss_value, acc_value,
                        examples_per_sec, sec_per_batch))
                
            if step % STEPS_PER_SUMMARY == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)

            # Save the model checkpoint periodically.
            if step % STEPS_PER_CHECKPT == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
        
        tf.train.write_graph(sess.graph_def, FLAGS.train_dir, "segmentation.pb", False) #proto
        
        

In [5]:
if tf.gfile.Exists(FLAGS.train_dir):
    tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
train()


Filling queue with 3198 images before starting to train. This will take a few minutes.
2016-10-10 11:48:24.053114: step 0, loss = 1.39, accuracy = 0.25, (13.9 examples/sec; 2.301 sec/batch)
2016-10-10 11:48:31.150984: step 10, loss = 1.39, accuracy = 0.26, (60.3 examples/sec; 0.531 sec/batch)
2016-10-10 11:48:36.842344: step 20, loss = 1.39, accuracy = 0.26, (49.2 examples/sec; 0.651 sec/batch)
2016-10-10 11:48:42.350301: step 30, loss = 1.39, accuracy = 0.27, (57.6 examples/sec; 0.556 sec/batch)
2016-10-10 11:48:47.987727: step 40, loss = 1.39, accuracy = 0.27, (57.0 examples/sec; 0.561 sec/batch)
2016-10-10 11:48:53.832714: step 50, loss = 1.39, accuracy = 0.28, (56.7 examples/sec; 0.564 sec/batch)
2016-10-10 11:48:59.660985: step 60, loss = 1.39, accuracy = 0.29, (55.0 examples/sec; 0.582 sec/batch)
2016-10-10 11:49:05.552546: step 70, loss = 1.39, accuracy = 0.30, (54.5 examples/sec; 0.587 sec/batch)
2016-10-10 11:49:11.381988: step 80, loss = 1.39, accuracy = 0.31, (55.6 examples/