In [54]:
%load_ext autoreload
%autoreload 2

# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import re
import sys
import time
import math
import numpy as np
from datetime import datetime
import tensorflow as tf

sys.path.append(os.path.join(os.getcwd(), '..', 'utils'))
from utils import *
from tf_utils import *
import cifar10_loader

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


This TensorFlow tutorial is written based on https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10, which is the tutorial for the image classification task on CIFAR-10 dataset.

In [2]:
# Basic model parameters.
batch_size = 128
data_dir = 'cifar10_data'
data_url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'

# Global constants describing the CIFAR-10 data set.
IMAGE_SIZE = cifar10_loader.IMAGE_SIZE
NUM_CLASSES = cifar10_loader.NUM_CLASSES
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = cifar10_loader.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = cifar10_loader.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL

# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1       # Initial learning rate.

# If a model is trained with multiple GPUs, prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'

### Download the CIFAR-10 dataset

In [20]:
maybe_download_and_extract(data_url, data_dir, 'cifar-10-batches-bin')

>> Downloading cifar-10-binary.tar.gz 100.0%('Successfully downloaded', 'cifar-10-binary.tar.gz', 170052171, 'bytes.')


### Get variable initialization function

In [3]:
initializer = get_initializer('normal', **{'stddev':0.01})

### Functions for constructing model (graph)

In [7]:
def build_inputs(data_dir, eval_data=False):
    """Construct input for CIFAR evaluation using the Reader ops.
    Args:
        data_dir: path to directory including data
        eval_data: bool, indicating if one should use the train or eval data set.
    Returns:
        images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
        labels: Labels. 1D tensor of [batch_size] size.
    Raises:
        ValueError: If no data_dir
    """
    if not data_dir:
        raise ValueError('Please supply a data_dir')
    data_dir = os.path.join(data_dir, 'cifar-10-batches-bin')
    images, labels = cifar10_loader.inputs(eval_data=eval_data,
                                        data_dir=data_dir,
                                        batch_size=batch_size)
    
    #images, labels = cifar10_loader.distorted_inputs(data_dir=data_dir,
    #                                              batch_size=batch_size)
    
    return images, labels


def build_model(images):
    """Build the CIFAR-10 model.
    Args:
        images: Images returned from distorted_inputs() or inputs().
    Returns:
        Logits.
    """
    # We instantiate all variables using tf.get_variable() instead of
    # tf.Variable() in order to share variables across multiple GPU training runs.
    # If we only ran this model on a single GPU, we could simplify this function
    # by replacing all instances of tf.get_variable() with tf.Variable().
    
    # conv1
    with tf.variable_scope('conv1') as scope:
        conv1 = get_conv2D_layer(images, 3, 64, 5, 1, initializer, 0.0, 'relu', scope, True)

    # pool1
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
                         padding='SAME', name='pool1')
    # norm1
    norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm1')

    # conv2
    with tf.variable_scope('conv2') as scope:
        conv2 = get_conv2D_layer(norm1, 64, 64, 5, 1, initializer, 0.0, 'relu', scope, True)

    # norm2
    norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
                    name='norm2')
    # pool2
    pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')

    # fc1
    with tf.variable_scope('fc1') as scope:
        fc1 = get_fully_connected_layer(pool2, -1, 384, initializer, 0.004, 'relu', 
                                        True, batch_size, scope, True)

    # fc2
    with tf.variable_scope('fc2') as scope:
        fc2 = get_fully_connected_layer(fc1, 384, 192, initializer, 0.004, 'relu', 
                                        False, -1, scope, True)

    # linear layer(WX + b),
    # We don't apply softmax here because
    # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
    # and performs the softmax internally for efficiency.
    with tf.variable_scope('softmax_linear') as scope:
        softmax_linear = get_fully_connected_layer(fc2, 192, NUM_CLASSES, initializer, 
                                                   0.0, 'None', False, -1, scope, True)

    return softmax_linear


def build_loss(logits, labels):
    """Add L2Loss to all the trainable variables.
    Add summary for "Loss" and "Loss/avg".
    Args:
        logits: Logits from inference().
        labels: Labels from distorted_inputs or inputs(). 1-D tensor
            of shape [batch_size]
    Returns:
        Loss tensor of type float.
    """
    # Calculate the average cross entropy loss across the batch.
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=logits, name='cross_entropy_per_example')
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)

    # The total loss is defined as the cross entropy loss plus all of the weight
    # decay terms (L2 loss).
    return tf.add_n(tf.get_collection('losses'), name='total_loss')


def _add_loss_summaries(total_loss):
    """Add summaries for losses in CIFAR-10 model.
    Generates moving average for all losses and associated summaries for
    visualizing the performance of the network.
    Args:
        total_loss: Total loss from loss().
    Returns:
        loss_averages_op: op for generating moving averages of losses.
    """
    # Compute the moving average of all individual losses and the total loss.
    loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
    losses = tf.get_collection('losses')
    loss_averages_op = loss_averages.apply(losses + [total_loss])

    # Attach a scalar summary to all individual losses and the total loss; do the
    # same for the averaged version of the losses.
    for l in losses + [total_loss]:
        # Name each loss as '(raw)' and name the moving average version of the loss
        # as the original loss name.
        tf.summary.scalar(l.op.name + ' (raw)', l)
        tf.summary.scalar(l.op.name, loss_averages.average(l))

    return loss_averages_op


def build_train_operation(total_loss, global_step):
    """Train CIFAR-10 model.
    Create an optimizer and apply to all trainable variables. Add moving
    average for all trainable variables.
    Args:
        total_loss: Total loss from loss().
        global_step: Integer Variable counting the number of training steps
            processed.
    Returns:
        train_op: op for training.
    """
    # Variables that affect learning rate.
    num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / batch_size
    decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                  global_step,
                                  decay_steps,
                                  LEARNING_RATE_DECAY_FACTOR,
                                  staircase=True)
    tf.summary.scalar('learning_rate', lr)

    # Generate moving averages of all losses and associated summaries.
    loss_averages_op = _add_loss_summaries(total_loss)

    # Compute gradients.
    with tf.control_dependencies([loss_averages_op]):
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)

    # Apply gradients.
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

    # Add histograms for gradients.
    for grad, var in grads:
        if grad is not None:
            tf.summary.histogram(var.op.name + '/gradients', grad)

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
        train_op = tf.no_op(name='train')

    return train_op

### Training the model

In [None]:
train_dir = 'cifar10_train'
max_steps = 10000
log_device_placement = False
log_frequency = 10

if tf.gfile.Exists(train_dir):
    tf.gfile.DeleteRecursively(train_dir)
else:
    tf.gfile.MakeDirs(train_dir)

In [21]:
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
    global_step = tf.contrib.framework.get_or_create_global_step()

    # Get images and labels for CIFAR-10.
    # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
    # GPU and resulting in a slow down.
    with tf.device('/cpu:0'):
        images, labels = build_inputs(data_dir, False)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = build_model(images)

    # Calculate loss.
    loss = build_loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = build_train_operation(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
        """Logs loss and runtime."""

        def begin(self):
            self._step = -1
            self._start_time = time.time()

        def before_run(self, run_context):
            self._step += 1
            return tf.train.SessionRunArgs(loss)  # Asks for loss value.

        def after_run(self, run_context, run_values):
            if self._step % log_frequency == 0:
                current_time = time.time()
                duration = current_time - self._start_time
                self._start_time = current_time

                loss_value = run_values.results
                examples_per_sec = log_frequency * batch_size / duration
                sec_per_batch = float(duration / log_frequency)

                format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print (format_str % (datetime.now(), self._step, loss_value,
                    examples_per_sec, sec_per_batch))

    with tf.train.MonitoredTrainingSession(checkpoint_dir=train_dir,
            hooks=[tf.train.StopAtStepHook(last_step=max_steps),
            tf.train.NanTensorHook(loss), _LoggerHook()],
            config=tf.ConfigProto(log_device_placement=log_device_placement)) as mon_sess:
        while not mon_sess.should_stop():
            mon_sess.run(train_op)


INFO:tensorflow:Summary name conv1/weight_loss (raw) is illegal; using conv1/weight_loss__raw_ instead.


INFO:tensorflow:Summary name conv1/weight_loss (raw) is illegal; using conv1/weight_loss__raw_ instead.


INFO:tensorflow:Summary name conv2/weight_loss (raw) is illegal; using conv2/weight_loss__raw_ instead.


INFO:tensorflow:Summary name conv2/weight_loss (raw) is illegal; using conv2/weight_loss__raw_ instead.


INFO:tensorflow:Summary name fc1/weight_loss (raw) is illegal; using fc1/weight_loss__raw_ instead.


INFO:tensorflow:Summary name fc1/weight_loss (raw) is illegal; using fc1/weight_loss__raw_ instead.


INFO:tensorflow:Summary name fc2/weight_loss (raw) is illegal; using fc2/weight_loss__raw_ instead.


INFO:tensorflow:Summary name fc2/weight_loss (raw) is illegal; using fc2/weight_loss__raw_ instead.


INFO:tensorflow:Summary name softmax_linear/weight_loss (raw) is illegal; using softmax_linear/weight_loss__raw_ instead.


INFO:tensorflow:Summary name softmax_linear/weight_loss (raw) is illegal; using softmax_linear/weight_loss__raw_ instead.


INFO:tensorflow:Summary name cross_entropy (raw) is illegal; using cross_entropy__raw_ instead.


INFO:tensorflow:Summary name cross_entropy (raw) is illegal; using cross_entropy__raw_ instead.


INFO:tensorflow:Summary name total_loss (raw) is illegal; using total_loss__raw_ instead.


INFO:tensorflow:Summary name total_loss (raw) is illegal; using total_loss__raw_ instead.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Saving checkpoints for 1 into cifar10_train/model.ckpt.


INFO:tensorflow:Saving checkpoints for 1 into cifar10_train/model.ckpt.


2017-06-16 02:16:04.283500: step 0, loss = 2.45 (3101.3 examples/sec; 0.041 sec/batch)
INFO:tensorflow:global_step/sec: 22.3354


INFO:tensorflow:global_step/sec: 22.3354


2017-06-16 02:16:07.876035: step 100, loss = 2.44 (3562.9 examples/sec; 0.036 sec/batch)
INFO:tensorflow:global_step/sec: 29.5187


INFO:tensorflow:global_step/sec: 29.5187


2017-06-16 02:16:11.253180: step 200, loss = 2.43 (3790.2 examples/sec; 0.034 sec/batch)
INFO:tensorflow:global_step/sec: 30.0151


INFO:tensorflow:global_step/sec: 30.0151


2017-06-16 02:16:14.583938: step 300, loss = 2.42 (3843.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0202


INFO:tensorflow:global_step/sec: 30.0202


2017-06-16 02:16:17.913611: step 400, loss = 2.41 (3844.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0305


INFO:tensorflow:global_step/sec: 30.0305


2017-06-16 02:16:21.244854: step 500, loss = 2.40 (3842.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 29.9986


INFO:tensorflow:global_step/sec: 29.9986


2017-06-16 02:16:24.578071: step 600, loss = 2.39 (3840.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0166


INFO:tensorflow:global_step/sec: 30.0166


2017-06-16 02:16:27.910379: step 700, loss = 2.39 (3841.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0252


INFO:tensorflow:global_step/sec: 30.0252


2017-06-16 02:16:31.242549: step 800, loss = 2.38 (3841.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0166


INFO:tensorflow:global_step/sec: 30.0166


2017-06-16 02:16:34.570937: step 900, loss = 2.38 (3845.7 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1511


INFO:tensorflow:global_step/sec: 30.1511


2017-06-16 02:16:37.887165: step 1000, loss = 2.37 (3859.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1269


INFO:tensorflow:global_step/sec: 30.1269


2017-06-16 02:16:41.208443: step 1100, loss = 2.36 (3854.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 29.9616


INFO:tensorflow:global_step/sec: 29.9616


2017-06-16 02:16:44.545770: step 1200, loss = 2.36 (3835.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0665


INFO:tensorflow:global_step/sec: 30.0665


2017-06-16 02:16:47.870003: step 1300, loss = 2.36 (3850.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.197


INFO:tensorflow:global_step/sec: 30.197


2017-06-16 02:16:51.182999: step 1400, loss = 2.34 (3863.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1902


INFO:tensorflow:global_step/sec: 30.1902


2017-06-16 02:16:54.496309: step 1500, loss = 2.08 (3863.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0134


INFO:tensorflow:global_step/sec: 30.0134


2017-06-16 02:16:57.830224: step 1600, loss = 1.97 (3839.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 29.9214


INFO:tensorflow:global_step/sec: 29.9214


2017-06-16 02:17:01.170653: step 1700, loss = 1.96 (3831.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0973


INFO:tensorflow:global_step/sec: 30.0973


2017-06-16 02:17:04.491874: step 1800, loss = 1.97 (3854.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1777


INFO:tensorflow:global_step/sec: 30.1777


2017-06-16 02:17:07.807444: step 1900, loss = 1.92 (3860.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1145


INFO:tensorflow:global_step/sec: 30.1145


2017-06-16 02:17:11.128983: step 2000, loss = 1.96 (3853.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 29.9845


INFO:tensorflow:global_step/sec: 29.9845


2017-06-16 02:17:14.463250: step 2100, loss = 1.71 (3838.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0257


INFO:tensorflow:global_step/sec: 30.0257


2017-06-16 02:17:17.794754: step 2200, loss = 1.64 (3842.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 29.9335


INFO:tensorflow:global_step/sec: 29.9335


2017-06-16 02:17:21.135140: step 2300, loss = 1.54 (3831.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0907


INFO:tensorflow:global_step/sec: 30.0907


2017-06-16 02:17:24.457405: step 2400, loss = 1.52 (3852.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.039


INFO:tensorflow:global_step/sec: 30.039


2017-06-16 02:17:27.787355: step 2500, loss = 1.34 (3843.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1476


INFO:tensorflow:global_step/sec: 30.1476


2017-06-16 02:17:31.103936: step 2600, loss = 1.34 (3859.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0907


INFO:tensorflow:global_step/sec: 30.0907


2017-06-16 02:17:34.426686: step 2700, loss = 1.54 (3852.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1244


INFO:tensorflow:global_step/sec: 30.1244


2017-06-16 02:17:37.747162: step 2800, loss = 1.37 (3854.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1313


INFO:tensorflow:global_step/sec: 30.1313


2017-06-16 02:17:41.064529: step 2900, loss = 1.24 (3858.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2626


INFO:tensorflow:global_step/sec: 30.2626


2017-06-16 02:17:44.369785: step 3000, loss = 1.11 (3872.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3552


INFO:tensorflow:global_step/sec: 30.3552


2017-06-16 02:17:47.663736: step 3100, loss = 1.37 (3885.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1879


INFO:tensorflow:global_step/sec: 30.1879


2017-06-16 02:17:50.976258: step 3200, loss = 1.31 (3864.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2295


INFO:tensorflow:global_step/sec: 30.2295


2017-06-16 02:17:54.284409: step 3300, loss = 1.12 (3869.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0435


INFO:tensorflow:global_step/sec: 30.0435


2017-06-16 02:17:57.613291: step 3400, loss = 1.19 (3845.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.0096


INFO:tensorflow:global_step/sec: 30.0096


2017-06-16 02:18:00.946104: step 3500, loss = 1.27 (3840.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1738


INFO:tensorflow:global_step/sec: 30.1738


2017-06-16 02:18:04.259411: step 3600, loss = 1.06 (3863.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1236


INFO:tensorflow:global_step/sec: 30.1236


2017-06-16 02:18:07.579475: step 3700, loss = 1.01 (3855.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2143


INFO:tensorflow:global_step/sec: 30.2143


2017-06-16 02:18:10.888304: step 3800, loss = 0.98 (3868.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.086


INFO:tensorflow:global_step/sec: 30.086


2017-06-16 02:18:14.213827: step 3900, loss = 0.86 (3849.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.187


INFO:tensorflow:global_step/sec: 30.187


2017-06-16 02:18:17.525346: step 4000, loss = 1.16 (3865.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2032


INFO:tensorflow:global_step/sec: 30.2032


2017-06-16 02:18:20.835731: step 4100, loss = 1.01 (3866.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3076


INFO:tensorflow:global_step/sec: 30.3076


2017-06-16 02:18:24.135420: step 4200, loss = 1.02 (3879.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2922


INFO:tensorflow:global_step/sec: 30.2922


2017-06-16 02:18:27.437886: step 4300, loss = 0.82 (3875.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1202


INFO:tensorflow:global_step/sec: 30.1202


2017-06-16 02:18:30.756615: step 4400, loss = 0.95 (3856.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2465


INFO:tensorflow:global_step/sec: 30.2465


2017-06-16 02:18:34.062787: step 4500, loss = 0.82 (3871.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.198


INFO:tensorflow:global_step/sec: 30.198


2017-06-16 02:18:37.375651: step 4600, loss = 0.85 (3863.7 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.255


INFO:tensorflow:global_step/sec: 30.255


2017-06-16 02:18:40.680451: step 4700, loss = 0.67 (3873.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1266


INFO:tensorflow:global_step/sec: 30.1266


2017-06-16 02:18:43.999941: step 4800, loss = 0.71 (3856.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.197


INFO:tensorflow:global_step/sec: 30.197


2017-06-16 02:18:47.311649: step 4900, loss = 0.84 (3865.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3316


INFO:tensorflow:global_step/sec: 30.3316


2017-06-16 02:18:50.607525: step 5000, loss = 0.89 (3883.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3317


INFO:tensorflow:global_step/sec: 30.3317


2017-06-16 02:18:53.905142: step 5100, loss = 1.03 (3881.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3106


INFO:tensorflow:global_step/sec: 30.3106


2017-06-16 02:18:57.204483: step 5200, loss = 0.89 (3879.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2082


INFO:tensorflow:global_step/sec: 30.2082


2017-06-16 02:19:00.514252: step 5300, loss = 0.62 (3867.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1914


INFO:tensorflow:global_step/sec: 30.1914


2017-06-16 02:19:03.826808: step 5400, loss = 0.64 (3864.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3085


INFO:tensorflow:global_step/sec: 30.3085


2017-06-16 02:19:07.125716: step 5500, loss = 0.70 (3880.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.4128


INFO:tensorflow:global_step/sec: 30.4128


2017-06-16 02:19:10.413881: step 5600, loss = 0.65 (3892.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.363


INFO:tensorflow:global_step/sec: 30.363


2017-06-16 02:19:13.707703: step 5700, loss = 0.84 (3886.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3134


INFO:tensorflow:global_step/sec: 30.3134


2017-06-16 02:19:17.006173: step 5800, loss = 0.62 (3880.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.4263


INFO:tensorflow:global_step/sec: 30.4263


2017-06-16 02:19:20.291954: step 5900, loss = 0.49 (3895.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.4162


INFO:tensorflow:global_step/sec: 30.4162


2017-06-16 02:19:23.581072: step 6000, loss = 0.68 (3891.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2336


INFO:tensorflow:global_step/sec: 30.2336


2017-06-16 02:19:26.888901: step 6100, loss = 0.84 (3869.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1958


INFO:tensorflow:global_step/sec: 30.1958


2017-06-16 02:19:30.198169: step 6200, loss = 0.90 (3867.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.4488


INFO:tensorflow:global_step/sec: 30.4488


2017-06-16 02:19:33.482299: step 6300, loss = 0.61 (3897.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3553


INFO:tensorflow:global_step/sec: 30.3553


2017-06-16 02:19:36.778589: step 6400, loss = 0.62 (3883.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3387


INFO:tensorflow:global_step/sec: 30.3387


2017-06-16 02:19:40.075861: step 6500, loss = 0.77 (3882.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.208


INFO:tensorflow:global_step/sec: 30.208


2017-06-16 02:19:43.387980: step 6600, loss = 0.92 (3864.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.129


INFO:tensorflow:global_step/sec: 30.129


2017-06-16 02:19:46.704839: step 6700, loss = 0.62 (3859.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1196


INFO:tensorflow:global_step/sec: 30.1196


2017-06-16 02:19:50.027795: step 6800, loss = 0.68 (3852.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.22


INFO:tensorflow:global_step/sec: 30.22


2017-06-16 02:19:53.334093: step 6900, loss = 0.75 (3871.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2092


INFO:tensorflow:global_step/sec: 30.2092


2017-06-16 02:19:56.644067: step 7000, loss = 0.64 (3867.1 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1917


INFO:tensorflow:global_step/sec: 30.1917


2017-06-16 02:19:59.956717: step 7100, loss = 0.44 (3864.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1939


INFO:tensorflow:global_step/sec: 30.1939


2017-06-16 02:20:03.268152: step 7200, loss = 0.53 (3865.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1431


INFO:tensorflow:global_step/sec: 30.1431


2017-06-16 02:20:06.585252: step 7300, loss = 0.62 (3858.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2035


INFO:tensorflow:global_step/sec: 30.2035


2017-06-16 02:20:09.896367: step 7400, loss = 0.67 (3865.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2218


INFO:tensorflow:global_step/sec: 30.2218


2017-06-16 02:20:13.205332: step 7500, loss = 0.59 (3868.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.175


INFO:tensorflow:global_step/sec: 30.175


2017-06-16 02:20:16.519963: step 7600, loss = 0.58 (3861.7 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.175


INFO:tensorflow:global_step/sec: 30.175


2017-06-16 02:20:19.834289: step 7700, loss = 0.55 (3862.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2443


INFO:tensorflow:global_step/sec: 30.2443


2017-06-16 02:20:23.139527: step 7800, loss = 0.62 (3872.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3674


INFO:tensorflow:global_step/sec: 30.3674


2017-06-16 02:20:26.432179: step 7900, loss = 0.55 (3887.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2184


INFO:tensorflow:global_step/sec: 30.2184


2017-06-16 02:20:29.741506: step 8000, loss = 0.57 (3867.9 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1389


INFO:tensorflow:global_step/sec: 30.1389


2017-06-16 02:20:33.060503: step 8100, loss = 0.57 (3856.6 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.107


INFO:tensorflow:global_step/sec: 30.107


2017-06-16 02:20:36.382116: step 8200, loss = 0.56 (3853.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1375


INFO:tensorflow:global_step/sec: 30.1375


2017-06-16 02:20:39.699013: step 8300, loss = 0.62 (3859.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2694


INFO:tensorflow:global_step/sec: 30.2694


2017-06-16 02:20:43.004599: step 8400, loss = 0.64 (3872.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2121


INFO:tensorflow:global_step/sec: 30.2121


2017-06-16 02:20:46.312753: step 8500, loss = 0.59 (3869.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2548


INFO:tensorflow:global_step/sec: 30.2548


2017-06-16 02:20:49.617669: step 8600, loss = 0.52 (3873.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2305


INFO:tensorflow:global_step/sec: 30.2305


2017-06-16 02:20:52.926157: step 8700, loss = 0.48 (3868.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2923


INFO:tensorflow:global_step/sec: 30.2923


2017-06-16 02:20:56.227848: step 8800, loss = 0.50 (3876.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.308


INFO:tensorflow:global_step/sec: 30.308


2017-06-16 02:20:59.527255: step 8900, loss = 0.52 (3879.5 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2471


INFO:tensorflow:global_step/sec: 30.2471


2017-06-16 02:21:02.832865: step 9000, loss = 0.56 (3872.2 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2513


INFO:tensorflow:global_step/sec: 30.2513


2017-06-16 02:21:06.138281: step 9100, loss = 0.84 (3872.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3767


INFO:tensorflow:global_step/sec: 30.3767


2017-06-16 02:21:09.430602: step 9200, loss = 0.51 (3887.8 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3741


INFO:tensorflow:global_step/sec: 30.3741


2017-06-16 02:21:12.723062: step 9300, loss = 0.55 (3887.7 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1949


INFO:tensorflow:global_step/sec: 30.1949


2017-06-16 02:21:16.036177: step 9400, loss = 0.56 (3863.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.1043


INFO:tensorflow:global_step/sec: 30.1043


2017-06-16 02:21:19.357093: step 9500, loss = 0.50 (3854.4 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2893


INFO:tensorflow:global_step/sec: 30.2893


2017-06-16 02:21:22.656933: step 9600, loss = 0.55 (3879.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3924


INFO:tensorflow:global_step/sec: 30.3924


2017-06-16 02:21:25.947381: step 9700, loss = 0.76 (3890.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.3464


INFO:tensorflow:global_step/sec: 30.3464


2017-06-16 02:21:29.242728: step 9800, loss = 0.44 (3884.3 examples/sec; 0.033 sec/batch)
INFO:tensorflow:global_step/sec: 30.2834


INFO:tensorflow:global_step/sec: 30.2834


2017-06-16 02:21:32.545947: step 9900, loss = 0.48 (3875.0 examples/sec; 0.033 sec/batch)
INFO:tensorflow:Saving checkpoints for 10000 into cifar10_train/model.ckpt.


INFO:tensorflow:Saving checkpoints for 10000 into cifar10_train/model.ckpt.


### Evaluating the model

In [49]:
eval_dir = 'cifar10_eval'
eval_data = 'test'
checkpoint_dir = 'cifar10_train'
eval_interval_secs = 1
num_examples = 10000
run_once = False

In [50]:
def eval_once(saver, summary_writer, top_k_op, summary_op):
    """Run Eval once.
    Args:
    saver: Saver.
    summary_writer: Summary writer.
    top_k_op: Top K op.
    summary_op: Summary op.
    """
    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        print(ckpt)
        if ckpt and ckpt.model_checkpoint_path:
            # Restores from checkpoint
            saver.restore(sess, ckpt.model_checkpoint_path)
            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/cifar10_train/model.ckpt-0,
            # extract global_step from it.
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        else:
            print('No checkpoint file found')
            return

        # Start the queue runners.
        coord = tf.train.Coordinator()
        try:
            threads = []
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(qr.create_threads(sess, coord=coord, daemon=True,
                                                 start=True))

            num_iter = int(math.ceil(num_examples / batch_size))
            true_count = 0  # Counts the number of correct predictions.
            total_sample_count = num_iter * batch_size
            step = 0
            while step < num_iter and not coord.should_stop():
                predictions = sess.run([top_k_op])
                true_count += np.sum(predictions)
                step += 1

            # Compute precision @ 1.
            precision = true_count / total_sample_count
            print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))

            summary = tf.Summary()
            summary.ParseFromString(sess.run(summary_op))
            summary.value.add(tag='Precision @ 1', simple_value=precision)
            summary_writer.add_summary(summary, global_step)
        except Exception as e:  # pylint: disable=broad-except
            coord.request_stop(e)

        coord.request_stop()
        coord.join(threads, stop_grace_period_secs=10)

In [55]:
with tf.Graph().as_default() as g:
    eval_data = eval_data == 'test'
    images, labels = build_inputs(data_dir, eval_data)
    
    logits = build_model(images)
    top_k_op = tf.nn.in_top_k(logits, labels, 1)
    
    # Restore the moving average version of the learned variables for eval.
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    summary_writer = tf.summary.FileWriter(eval_dir, g)

    while True:
        eval_once(saver, summary_writer, top_k_op, summary_op)
        if run_once:
            break
        time.sleep(eval_interval_secs)

model_checkpoint_path: "cifar10_train/model.ckpt-10000"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-1"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-10000"

2017-06-16 03:01:18.671515: precision @ 1 = 0.996
model_checkpoint_path: "cifar10_train/model.ckpt-10000"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-1"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-10000"

2017-06-16 03:01:24.496865: precision @ 1 = 0.997
model_checkpoint_path: "cifar10_train/model.ckpt-10000"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-1"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-10000"

2017-06-16 03:01:30.323077: precision @ 1 = 0.997
model_checkpoint_path: "cifar10_train/model.ckpt-10000"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-1"
all_model_checkpoint_paths: "cifar10_train/model.ckpt-10000"

2017-06-16 03:01:36.167443: precision @ 1 = 0.995
model_checkpoint_path: "cifar10_train/model.ckpt-10000"
all_model_checkpoint_paths: "cifar10_train/m

INFO:tensorflow:Error reported to Coordinator: <type 'exceptions.RuntimeError'>, Attempted to use a closed Session.


KeyboardInterrupt: 