In [1]:
import time
import tensorflow as tf

import input_data
import mnist

In [2]:
# 基础的数据

flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 2000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.  '
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', 'MNIST_data', 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
                     'for unit testing.')

In [3]:
# placeholder_inputs()函数将生成两个tf.placeholder 操作，定义传入图表中的shape
# 参数，shape 参数中包括batch_size值，后续还会将实际的训练用例传入图表。

# 在训练循环（training loop）的后续步骤中，传入的整个图像和标签数据集会被切片，
# 以符合每一个操作所设置的batch_size值，占位符操作将会填补以符合这个batch_size
# 值。然后使用feed_dict参数，将数据传入sess.run()函数。
def placeholder_inputs(batch_size):
  """Generate placeholder variables to represent the input tensors.

  These placeholders are used as inputs by the rest of the model building
  code and will be fed from the downloaded data in the .run() loop, below.

  Args:
    batch_size: The batch size will be baked into both placeholders.

  Returns:
    images_placeholder: Images placeholder.
    labels_placeholder: Labels placeholder.
  """
  # Note that the shapes of the placeholders match the shapes of the full
  # image and label tensors, except the first dimension is now batch_size
  # rather than the full size of the train or test data sets.
  images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                         mnist.IMAGE_PIXELS))
  labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
  return images_placeholder, labels_placeholder

In [7]:
def fill_feed_dict(data_set, images_pl, labels_pl):
  """Fills the feed_dict for training the given step.

  A feed_dict takes the form of:
  feed_dict = {
      <placeholder>: <tensor of values to be passed for placeholder>,
      ....
  }

  Args:
    data_set: The set of images and labels, from input_data.read_data_sets()
    images_pl: The images placeholder, from placeholder_inputs().
    labels_pl: The labels placeholder, from placeholder_inputs().

  Returns:
    feed_dict: The feed dictionary mapping from placeholders to values.
  """
  # Create the feed_dict for the placeholders filled with the next
  # `batch size ` examples.
  images_feed, labels_feed = data_set.next_batch(FLAGS.batch_size,
                                                 FLAGS.fake_data)
  feed_dict = {
      images_pl: images_feed,
      labels_pl: labels_feed,
  }
  return feed_dict

In [8]:
def do_eval(sess,
            eval_correct,
            images_placeholder,
            labels_placeholder,
            data_set):
  """Runs one evaluation against the full epoch of data.

  Args:
    sess: The session in which the model has been trained.
    eval_correct: The Tensor that returns the number of correct predictions.
    images_placeholder: The images placeholder.
    labels_placeholder: The labels placeholder.
    data_set: The set of images and labels to evaluate, from
      input_data.read_data_sets().
  """
  # And run one epoch of eval.
  true_count = 0  # Counts the number of correct predictions.
  steps_per_epoch = data_set.num_examples // FLAGS.batch_size
  num_examples = steps_per_epoch * FLAGS.batch_size
  for step in xrange(steps_per_epoch):
    feed_dict = fill_feed_dict(data_set,
                               images_placeholder,
                               labels_placeholder)
    true_count += sess.run(eval_correct, feed_dict=feed_dict)
  precision = true_count / num_examples
  print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
        (num_examples, true_count, precision))
    

In [9]:
def run_training():
  """Train MNIST for a number of steps."""
  # Get the sets of images and labels for training, validation, and
  # test on MNIST.
  data_sets = input_data.read_data_sets(FLAGS.train_dir, FLAGS.fake_data)

  # 在run_training()这个函数的一开始，是一个Python 语言中的with命令，这个命令表明所有已经构建的操作都要与默认的[`tf.Graph`] 全局实例关联起来。
  with tf.Graph().as_default():
    # Generate placeholders for the images and labels.
    images_placeholder, labels_placeholder = placeholder_inputs(
        FLAGS.batch_size)

    # Build a Graph that computes predictions from the inference model.
    logits = mnist.inference(images_placeholder,
                             FLAGS.hidden1,
                             FLAGS.hidden2)

    # Add to the Graph the Ops for loss calculation.
    loss = mnist.loss(logits, labels_placeholder)

    # Add to the Graph the Ops that calculate and apply gradients.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = mnist.evaluation(logits, labels_placeholder)
#   状态可视化为了释放[TensorBoard] 所使用的事件文件（events file），所有的即时数据
#  （在这里只有一个）都要在图表构建阶段合并至一个操作（op）中。
    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.merge_all_summaries()

    # Add the variable initializer Op.
    init = tf.initialize_all_variables()
#   保存检查点(checkpoint) 为了得到可以用来后续恢复模型以进一步训练或评估的检查
#   点文件（checkpoint file），我们实例化一个 tf.train.Saver
    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()
#   在创建好会话（session）之后，可以实例化一个tf.train.SummaryWriter ，用于写入
#   包含了图表本身和即时数据具体值的事件文件。
    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in xrange(FLAGS.max_steps):
      start_time = time.time()

#     执行每一步时，我们的代码会生成一个反馈字典（feed dictionary），其中包含对应
#     步骤中训练所要使用的例子，这些例子的哈希键就是其所代表的占位符操作。
#     fill_feed_dict函数会查询给定的DataSet，索要下一批次batch_size的图像和标签，
#     与占位符相匹配的Tensor 则会包含下一批次的图像和标签。
      feed_dict = fill_feed_dict(data_sets.train,
                                 images_placeholder,
                                 labels_placeholder)

      # Run one step of the model.  The return values are the activations
      # from the `train_op` (which is discarded) and the `loss` Op.  To
      # inspect the values of your Ops or variables, you may include them
      # in the list passed to sess.run() and the value tensors will be
      # returned in the tuple from the call.
      _, loss_value = sess.run([train_op, loss],
                               feed_dict=feed_dict)

      duration = time.time() - start_time

      # Write the summaries and print an overview fairly often.
      if step % 100 == 0:
        # Print status to stdout.
        print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
        # Update the events file.
#       最后，每次运行summary_op时，都会往事件文件中写入最新的即时数据，函数的输
#       出会传入事件文件读写器（writer）的add_summary()函数。。
        summary_str = sess.run(summary_op, feed_dict=feed_dict)
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()

#     每隔一千个训练步骤，我们的代码会尝试使用训练数据集与测试数据集，对模型进
#     行评估。do_eval函数会被调用三次，分别使用训练数据集、验证数据集合测试数据集。
# 注意，更复杂的使用场景通常是，先隔绝data_sets.test测试数据集，只有在大量
# 的超参数优化调整（hyperparameter tuning）之后才进行检查。但是，由于MNIST 问题
# 比较简单，我们在这里一次性评估所有的数据。
      # Save a checkpoint and evaluate the model periodically.
      if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
#       在训练循环中，将定期调用saver.save() 方法，向训练文件夹中写入包含了当前所有可训练变量值得检查点文件。
        saver.save(sess, FLAGS.train_dir, global_step=step)
        # Evaluate against the training set.
        print('Training Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.train)
        # Evaluate against the validation set.
        print('Validation Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.validation)
        # Evaluate against the test set.
        print('Test Data Eval:')
        do_eval(sess,
                eval_correct,
                images_placeholder,
                labels_placeholder,
                data_sets.test)

In [11]:
def main(_):
  run_training()

if __name__ == '__main__':
    tf.app.run()

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Step 0: loss = 2.35 (0.006 sec)
Step 100: loss = 2.12 (0.004 sec)
Step 200: loss = 1.83 (0.003 sec)
Step 300: loss = 1.50 (0.003 sec)
Step 400: loss = 1.19 (0.007 sec)
Step 500: loss = 0.80 (0.005 sec)
Step 600: loss = 0.87 (0.003 sec)
Step 700: loss = 0.72 (0.004 sec)
Step 800: loss = 0.65 (0.003 sec)
Step 900: loss = 0.64 (0.004 sec)
Training Data Eval:
  Num examples: 55000  Num correct: 47508  Precision @ 1: 0.0000
Validation Data Eval:
  Num examples: 5000  Num correct: 4345  Precision @ 1: 0.0000
Test Data Eval:
  Num examples: 10000  Num correct: 8700  Precision @ 1: 0.0000
Step 1000: loss = 0.54 (0.009 sec)
Step 1100: loss = 0.38 (0.100 sec)
Step 1200: loss = 0.52 (0.007 sec)
Step 1300: loss = 0.50 (0.003 sec)
Step 1400: loss = 0.46 (0.003 sec)
Step 1500: loss = 0.52 (0.003 sec)
Step 160

SystemExit: 

To exit: use 'exit', 'quit', or Ctrl-D.
