In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

##### MNIST数据集相关常数

In [2]:
INPUT_NODE = 784 #输入层的节点数。对于MNIST数据集，这个就等于图片的像素。
OUTPUT_NODE = 10 #输出层的节点数。这个等于类别的数目。因为在MNIST数据集中需要区分的事0-9，所以这里输出层的节点数为10。

##### 配置神经网络的参数

In [3]:
LAYER1_NODE = 500 #只有一个带有500个节点的隐藏层
BATCH_SIZE = 100  #定义batch的大小
LEARNING_RATE_BASE = 0.8  #基础的学习率
LEARNING_RATE_DECAY = 0.99  
#学习率的衰减率
REGULARIZATION_RATE = 0.0001 #描述模型复杂度的正则化项在损失函数中的系数
TRAINING_STEPS = 30000   #训练轮数
MOVING_AVERAGE_DECAY = 0.99  #滑动平均距离

In [4]:
def inference(input_tensor,avg_class,weights1,biases1,weights2,biases2):
    #当没有提供滑动平均类时，直接使用参数当前的取值
    #这里实际含义是： avg_class == None 时，是训练时的前向传播过程，else时是为了在测试时计算准确里用的
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor,weights1)+biases1)
        return tf.matmul(layer1,weights2)+biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor,avg_class.average(weights1))+avg_class.average(biases1))
        return tf.matmul(layer1,avg_class.average(weights2))+avg_class.average(biases2)

In [5]:
def train(mnist):
    x = tf.placeholder(tf.float32,[None,INPUT_NODE],name='x-input')
    y_ = tf.placeholder(tf.float32,[None,OUTPUT_NODE],name='y-input')
    #truncated_normal生成正太分布值
    #隐藏层参数
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE,LAYER1_NODE],stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1,shape=[LAYER1_NODE]))
    #输出层参数
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE,OUTPUT_NODE],stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1,shape=[OUTPUT_NODE]))
    
    #计算未使用滑动平均一次前向传播结果
    y = inference(x,None,weights1,biases1,weights2,biases2)
    #定义当前步数，移动平均时会用到，自动更新+1
    global_step = tf.Variable(0,trainable = False)
    #计算使用滑动平均的前向传播结果
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY,global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x,variable_averages,weights1,biases1,weights2,biases2)
    #在前向传播过后计算交叉熵
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))
    #交叉熵平均值
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    #正则项
    regularizer=tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1)+regularizer(weights2)
    #损失等于交叉商加上正则项
    loss = cross_entropy_mean + regularization
    #定义学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE,#基础学习速率
                                              global_step,       #当前迭代轮数
                                              mnist.train.num_examples/BATCH_SIZE,  #总共需要的迭代次数
                                              LEARNING_RATE_DECAY)      #学习率衰减速率
    #训练过程
    train_step = tf.train.GradientDescentOptimizer(learning_rate)\
                    .minimize(loss,global_step = global_step)
    #反向传播和滑动平均更新参数，这里直接实现了前向及逆向传播过程，在利用滑动平均更新参数的一整个过程
    #with tf.control_dependencies([train_step,variables_averages_op]):
        #train_op = tf.no_op(name='train')
    train_op = tf.group(train_step,variables_averages_op) 
    
    correct_prediction = tf.equal(tf.argmax(average_y,1),tf.argmax(y_,1))
    #计算出准确度，此处将bool转换成0，1，再用reduce_mean算1占的比例就可以得出准确度，可用一下注释代码验证
    #tmp = tf.Variable([True,False,True])
    #tmp1  = tf.cast(tmp,dtype=tf.float32)
    #with tf.Session() as sess1:
        #tf.global_variables_initializer().run()
        #print(sess1.run(tf.reduce_mean(sess1.run(tmp1))))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
    with tf.Session() as sess:
        #验证数据
        tf.global_variables_initializer().run()
        validate_feed = {x:mnist.validation.images,y_:mnist.validation.labels}
        #测试数据
        test_feed = {x:mnist.test.images,y_:mnist.test.labels}
    
        for i in range(TRAINING_STEPS):
            if i%1000 == 0:
                validate_acc = sess.run(accuracy,feed_dict=validate_feed)
                print("after %d training step(s),validation accuracy " "using average model is %g" %(i,validate_acc))
            xs,ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op,feed_dict={x:xs,y_:ys})
        test_acc = sess.run(accuracy,feed_dict=test_feed)
        print("after %d training step(s),validation accuracy " "using average model is %g" %(TRAINING_STEPS,test_acc))
        print()

In [6]:
mnist = input_data.read_data_sets("/Users/zhouzelun/Documents/python/mnist_data",one_hot=True)
train(mnist)


Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting /Users/zhouzelun/Documents/python/mnist_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting /Users/zhouzelun/Documents/python/mnist_data/train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting /Users/zhouzelun/Documents/python/mnist_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting /Users/zhouzelun/Documents/python/mnist_data/t10k-labels-idx1-ubyte.gz
after 0 training step(s),validation accuracy using average model is 0.1614
after 1000 training step(s),validation accuracy using average model is 0.977
after 2000 training step(s),validation accuracy using average model is 0.9816
after 3000 training step(s),validation accuracy using average model is 0.9824
after 4000 training step(s),validation accuracy using average model is 0.9848
after 5000 training step(s)