In [2]:
"""A very simple MNIST classifier.
See extensive documentation at
https://www.tensorflow.org/get_started/mnist/beginners
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import sys

from tensorflow.examples.tutorials.mnist import input_data

import tensorflow as tf

# Import data
data_dir = '/tmp/tensorflow/mnist/input_data'
mnist = input_data.read_data_sets(data_dir, one_hot=True)
# Define loss and optimizer
x = tf.placeholder(tf.float32, [None, 784])
y_ = tf.placeholder(tf.float32, [None, 10])
learning_rate = tf.placeholder(tf.float32)

with tf.name_scope('reshape'):
  x_image = tf.reshape(x, [-1, 28, 28, 1])

# First convolutional layer - maps one grayscale image to 32 feature maps.
with tf.name_scope('conv1'):
  shape = [5, 5, 1, 32]
  W_conv1 = tf.Variable(tf.truncated_normal(shape, stddev=0.1),
                        collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])
  shape = [32]
  b_conv1 = tf.Variable(tf.constant(0.1, shape=shape))
  l_conv1 = tf.nn.conv2d(x_image, W_conv1, strides=[1, 1, 1, 1], 
                         padding='SAME') + b_conv1
  h_conv1 = tf.nn.relu(l_conv1)

# Pooling layer - downsamples by 2X.
with tf.name_scope('pool1'):
  h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='VALID')

# Second convolutional layer -- maps 32 feature maps to 64.
with tf.name_scope('conv2'):
  W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1),
                        
                        collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])
  b_conv2 = tf.Variable(tf.constant(0.1, shape=[64]))
  l_conv2 = tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], 
                         padding='SAME') + b_conv2
  h_conv2 = tf.nn.relu(l_conv2)

# Second pooling layer.
with tf.name_scope('pool2'):
  h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='VALID')

# Fully connected layer 1 -- after 2 round of downsampling, our 28x28 image
# is down to 7x7x64 feature maps -- maps this to 1024 features.
with tf.name_scope('fc1'):
  W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1024], stddev=0.1),
                      
                      collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])
  b_fc1 = tf.Variable(tf.constant(0.1, shape=[1024]))

  h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
  h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# Dropout - controls the complexity of the model, prevents co-adaptation of
# features.
with tf.name_scope('dropout'):
  keep_prob = tf.placeholder(tf.float32)
  h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# Map the 1024 features to 10 classes, one for each digit
with tf.name_scope('fc2'):
  W_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1),
                      collections=[tf.GraphKeys.GLOBAL_VARIABLES,'WEIGHTS'])
  b_fc2 = tf.Variable(tf.constant(0.1, shape=[10]))

  y = tf.matmul(h_fc1_drop, W_fc2) + b_fc2





# The raw formulation of cross-entropy,
#
#   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
#                                 reduction_indices=[1]))
#
# can be numerically unstable.
#
# So here we use tf.nn.softmax_cross_entropy_with_logits on the raw
# outputs of 'y', and then average across the batch.
cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y))

l2_loss = tf.add_n( [tf.nn.l2_loss(w) for w in tf.get_collection('WEIGHTS')] )
total_loss = cross_entropy + 7e-5*l2_loss
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)

sess = tf.Session()
init_op = tf.global_variables_initializer()
sess.run(init_op)
# Train
for step in range(3000):
  batch_xs, batch_ys = mnist.train.next_batch(100)
  lr = 0.01
  _, loss, l2_loss_value, total_loss_value = sess.run(
               [train_step, cross_entropy, l2_loss, total_loss], 
               feed_dict={x: batch_xs, y_: batch_ys, learning_rate:lr, keep_prob:0.5})
  
  if (step+1) % 100 == 0:
    print('step %d, entropy loss: %f, l2_loss: %f, total loss: %f' % 
            (step+1, loss, l2_loss_value, total_loss_value))
    # Test trained model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(sess.run(accuracy, feed_dict={x: batch_xs, y_: batch_ys, keep_prob:0.5}))
  if (step+1) % 1000 == 0:
    print(sess.run(accuracy, feed_dict={x: mnist.test.images,
                                    y_: mnist.test.labels, keep_prob:0.5}))

Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
step 100, entropy loss: 0.758398, l2_loss: 25324.212891, total loss: 2.531092
0.73
step 200, entropy loss: 0.636668, l2_loss: 25320.984375, total loss: 2.409137
0.79
step 300, entropy loss: 0.506227, l2_loss: 25317.562500, total loss: 2.278456
0.83
step 400, entropy loss: 0.408348, l2_loss: 25314.169922, total loss: 2.180340
0.89
step 500, entropy loss: 0.384058, l2_loss: 25310.726562, total loss: 2.155809
0.89
step 600, entropy loss: 0.377220, l2_loss: 25307.304688, total loss: 2.148731
0.87
step 700, entropy loss: 0.352998, l2_loss: 25303.867188, total loss: 2.124269
0.92


KeyboardInterrupt: 