# Deep Learning
##  Assignment 3
Previously in 2_fullyconnected.ipynb, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import math

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    train_dataset = save['train_dataset']
    train_labels = save['train_labels']
    valid_dataset = save['valid_dataset']
    valid_labels = save['valid_labels']
    test_dataset = save['test_dataset']
    test_labels = save['test_labels']
    del save  # hint to help gc free up memory
    print('Training set', train_dataset.shape, train_labels.shape)
    print('Validation set', valid_dataset.shape, valid_labels.shape)
    print('Test set', test_dataset.shape, test_labels.shape)

image_size = 28
num_labels = 10

def reformat(dataset, labels):
    dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
    # Map 0 to [1.0, 0.0, 0.0 ...], 1 to [0.0, 1.0, 0.0 ...]
    labels = (np.arange(num_labels) == labels[:, None]).astype(np.float32)
    return dataset, labels


train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (10000, 28, 28) (10000,)
Test set (10000, 28, 28) (10000,)
Training set (200000, 784) (200000, 10)
Validation set (10000, 784) (10000, 10)
Test set (10000, 784) (10000, 10)


###  加入正则化、学习率指数衰减 、dropout

In [5]:
INPUT_NODE = 784
OUTPUT_NODE = 10

LAYER1_NODE = 1024
LAYER2_NODE = 256

BATCH_SIZE = 128

LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99


REGULATION_RATE = 5e-4
TRAINING_STEP = 5001
MOVING_AVERAG_EATE = 0.99

# train_dataset = train_dataset[0:BATCH_SIZE*2]
# train_labels = train_labels[0:BATCH_SIZE*2]

def inference(x, avg_class, w1, b1, w2, b2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(x, w1)+b1)
        return tf.matmul(layer1, w2) + b2
    else:
        ''' 使用滑动平均中的影子变量'''
        layer1 = tf.nn.relu(tf.matmul(x, avg_class.average(w1))+avg_class.average(b1))
        return tf.matmul(layer1, avg_class.average(w2))+avg_class.average(b2)


def train():
    # 定义三层神经网络
    x = tf.placeholder(tf.float32, shape=[None, INPUT_NODE])
    y_ = tf.placeholder(tf.float32, shape=[None, OUTPUT_NODE])
    
    hidden_w1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE],\
                                                stddev=2.0/math.sqrt(INPUT_NODE*LAYER1_NODE)))
    hidden_b1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    
    layer1 = tf.nn.relu(tf.matmul(x, hidden_w1)+hidden_b1)
        
    hidden_w2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, LAYER2_NODE],\
                                                stddev=2.0/math.sqrt(LAYER1_NODE*LAYER2_NODE)))
    hidden_b2 = tf.Variable(tf.constant(0.1, shape=[LAYER2_NODE]))
    
    layer2 = tf.nn.relu(tf.matmul(layer1, hidden_w2)+hidden_b2)
        
    hidden_w3 = tf.Variable(tf.truncated_normal([LAYER2_NODE, OUTPUT_NODE],\
                                                stddev=2.0/math.sqrt(LAYER2_NODE*OUTPUT_NODE)))
    hidden_b3 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
      
    y = tf.matmul(layer2, hidden_w3) + hidden_b3

    global_step = tf.Variable(0, trainable=False)

    # 交叉熵
    cross_entroy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.argmax(y_,1), logits=y)
    cross_entroy_mean = tf.reduce_mean(cross_entroy)

    # 加入正则化
    regulater = tf.contrib.layers.l2_regularizer(REGULATION_RATE)
    regulation = regulater(hidden_w1) + regulater(hidden_w2) + regulater(hidden_w3)

    # 学习率指数衰减
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step,\
                            train_dataset.shape[0]/BATCH_SIZE, LEARNING_RATE_DECAY)

    # 定义损失 & 优化算法
    loss = cross_entroy_mean + regulation
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    # 模型评估
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  
    # 训练神经网络
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        # 验证数据
        validate_feed = { x : valid_dataset, y_ : valid_labels}
        # 测试数据 
        test_feed = { x : test_dataset, y_ : test_labels}

        # 迭代训练更新参数
        for step in range(TRAINING_STEP):
            offset = (step * BATCH_SIZE) % (train_labels.shape[0] - BATCH_SIZE)
            # Generate a minibatch.
            batch_data = train_dataset[offset:(offset + BATCH_SIZE), :]
            batch_labels = train_labels[offset:(offset + BATCH_SIZE), :]
            train_feed = { x: batch_data, y_: batch_labels }
            _, l, train_accuracy = sess.run( [optimizer, loss, accuracy], feed_dict=train_feed)
            if (step % 500 == 0):
                print("Minibatch loss at step %d: %f" % (step, l))
                print("Minibatch accuracy: %.1f" %train_accuracy)
                valid_accuracy = sess.run(accuracy, feed_dict=validate_feed)
                print("Validation accuracy: %f" % valid_accuracy)
        test_accuracy = sess.run(accuracy, feed_dict=test_feed)
        print("Test accuracy: %f" %test_accuracy)

        
train()


Minibatch loss at step 0: 2.300287
Minibatch accuracy: 0.1
Validation accuracy: 0.100000
Minibatch loss at step 500: 0.682174
Minibatch accuracy: 0.8
Validation accuracy: 0.850800
Minibatch loss at step 1000: 0.481413
Minibatch accuracy: 0.9
Validation accuracy: 0.855100
Minibatch loss at step 1500: 0.543780
Minibatch accuracy: 0.8
Validation accuracy: 0.870800
Minibatch loss at step 2000: 0.491746
Minibatch accuracy: 0.8
Validation accuracy: 0.875200
Minibatch loss at step 2500: 0.535162
Minibatch accuracy: 0.9
Validation accuracy: 0.874500
Minibatch loss at step 3000: 0.475902
Minibatch accuracy: 0.9
Validation accuracy: 0.880900
Minibatch loss at step 3500: 0.441360
Minibatch accuracy: 0.9
Validation accuracy: 0.887900
Minibatch loss at step 4000: 0.504814
Minibatch accuracy: 0.9
Validation accuracy: 0.888300
Minibatch loss at step 4500: 0.360647
Minibatch accuracy: 0.9
Validation accuracy: 0.886800
Minibatch loss at step 5000: 0.352049
Minibatch accuracy: 0.9
Validation accuracy: 0