Deep Learning
=============

Assignment 3
------------

Previously in `2_fullyconnected.ipynb`, you trained a logistic regression and a neural network model.

The goal of this assignment is to explore regularization techniques.

In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import matplotlib.pyplot as plt

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


First reload the data we generated in `1_notmnist.ipynb`.

In [2]:
pickle_file = 'notMNIST.pickle'

with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 28, 28) (200000,)
Validation set (40000, 28, 28) (40000,)
Test set (10000, 28, 28) (10000,)


Reformat into a shape that's more adapted to the models we're going to train:
- data as a flat matrix,
- labels as float 1-hot encodings.

In [3]:
image_size = 28
num_labels = 10

def reformat(dataset, labels):
  dataset = dataset.reshape((-1, image_size * image_size)).astype(np.float32)
  # Map 1 to [0.0, 1.0, 0.0 ...], 2 to [0.0, 0.0, 1.0 ...]
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)

Training set (200000, 784) (200000, 10)
Validation set (40000, 784) (40000, 10)
Test set (10000, 784) (10000, 10)


In [4]:
def accuracy(predictions, labels):
  return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [5]:
batch_size = 128

In [78]:

#传入创建层的回调函数，构造图然后运行模型
def run_model(train_data,train_labels,valid_data,valid_labels,test_data,test_labels,layers_callback,wd=0):
    graph = tf.Graph()
    with graph.as_default():

        # Input data. For the training data, we use a placeholder that will be fed
        # at run time with a training minibatch.
        tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_data)
        tf_test_dataset = tf.constant(test_data)

        # 创建隐藏层的回调
        train_logits,valid_logits,test_logits,regular = layers_callback(
            [tf_train_dataset,tf_valid_dataset,tf_test_dataset],wd)
        
        t = tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=train_logits)
#         print(t.shape)
        loss = tf.reduce_mean(tf.reduce_mean(t)+regular)
  
        # Optimizer.
        optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(train_logits)
        valid_prediction = tf.nn.softmax(valid_logits)
        test_prediction = tf.nn.softmax(test_logits)
        
        num_steps = 3001
        with tf.Session(graph=graph) as session:
            tf.global_variables_initializer().run()
            print("Initialized")
            for step in range(num_steps):
                # Pick an offset within the training data, which has been randomized.
                # Note: we could use better randomization across epochs.
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
                #     print(offset,step)
                # Generate a minibatch.
                batch_data = train_data[offset:(offset + batch_size), :]
                batch_labels = train_labels[offset:(offset + batch_size), :]
                # Prepare a dictionary telling the session where to feed the minibatch.
                # The key of the dictionary is the placeholder node of the graph to be fed,
                # and the value is the numpy array to feed to it.
                feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
                _, l, predictions = session.run(
                      [optimizer, loss, train_prediction], feed_dict=feed_dict)
                if (step % 500 == 0):
                    print("Minibatch loss at step %d: %f" % (step, l))
                    print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
                    print("Validation accuracy: %.1f%%" % accuracy(
                        valid_prediction.eval(), valid_labels))
                    print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))

In [74]:
# 增加一层全连接层
def layer_fullconnect(dataset,wd):
    nodes = 1024
    [tf_train_dataset,tf_valid_dataset,tf_test_dataset]=dataset
    # Variables.
    w1 = tf.truncated_normal([image_size * image_size, nodes])
    tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w1),wd))
    weights1 = tf.Variable(w1)
    w2 = tf.truncated_normal([nodes, num_labels])
    tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w2),wd))
    weights2 = tf.Variable(w2)
    biases1 = tf.Variable(tf.zeros([nodes]))
    biases2 = tf.Variable(tf.zeros([num_labels]))

    # Hidden Layer
    train_hidden_layer = tf.nn.relu(tf.matmul(tf_train_dataset, weights1)+biases1)
    valid_hidden_layer = tf.nn.relu(tf.matmul(tf_valid_dataset, weights1)+biases1)
    test_hidden_layer = tf.nn.relu(tf.matmul(tf_test_dataset, weights1)+biases1) 
    # Training computation.
    train_logits = tf.matmul(train_hidden_layer,weights2) + biases2
    valid_logits = tf.matmul(valid_hidden_layer, weights2) + biases2
    test_logits = tf.matmul(test_hidden_layer, weights2) + biases2
    
#     regular = tf.add_n(tf.get_collection("regular"))
    regular = tf.nn.l2_loss(w1)+tf.nn.l2_loss(w2)

    return (train_logits,valid_logits,test_logits,wd*regular)

#没有隐藏层
def layer_default(dataset,wd):
    [tf_train_dataset,tf_valid_dataset,tf_test_dataset]=dataset
    # Variables.
    w = tf.truncated_normal([image_size * image_size, num_labels])
    tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w),wd))
    weights = tf.Variable(w)
    biases = tf.Variable(tf.zeros([num_labels]))

    # Training computation.
    train_logits = tf.matmul(tf_train_dataset, weights) + biases
    valid_logits = tf.matmul(tf_valid_dataset, weights) + biases
    test_logits = tf.matmul(tf_test_dataset, weights) + biases
    
    regular = tf.add_n(tf.get_collection("regular"))
    return (train_logits,valid_logits,test_logits,regular)


---
Problem 1
---------

Introduce and tune L2 regularization for both logistic and neural network models. Remember that L2 amounts to adding a penalty on the norm of the weights to the loss. In TensorFlow, you can compute the L2 loss for a tensor `t` using `nn.l2_loss(t)`. The right amount of regularization should improve your validation / test accuracy.

---

In [70]:
run_model(train_dataset,train_labels,valid_dataset,valid_labels,test_dataset,test_labels,layer_default)

Initialized
Minibatch loss at step 0: 19.263342
Minibatch accuracy: 7.0%
Validation accuracy: 10.7%
Test accuracy: 10.1%
Minibatch loss at step 500: 1.712344
Minibatch accuracy: 75.0%
Validation accuracy: 75.7%
Test accuracy: 83.2%
Minibatch loss at step 1000: 1.338128
Minibatch accuracy: 76.6%
Validation accuracy: 76.3%
Test accuracy: 84.1%
Minibatch loss at step 1500: 1.124742
Minibatch accuracy: 77.3%
Validation accuracy: 77.0%
Test accuracy: 84.9%
Minibatch loss at step 2000: 0.999392
Minibatch accuracy: 81.2%
Validation accuracy: 77.9%
Test accuracy: 85.6%
Minibatch loss at step 2500: 1.184443
Minibatch accuracy: 73.4%
Validation accuracy: 78.2%
Test accuracy: 85.9%
Minibatch loss at step 3000: 0.906283
Minibatch accuracy: 78.1%
Validation accuracy: 78.9%
Test accuracy: 86.2%


In [71]:
run_model(train_dataset,train_labels,valid_dataset,valid_labels,test_dataset,test_labels,layer_default,0.003)

Initialized
Minibatch loss at step 0: 26.589451
Minibatch accuracy: 11.7%
Validation accuracy: 13.3%
Test accuracy: 13.7%
Minibatch loss at step 500: 10.530587
Minibatch accuracy: 78.1%
Validation accuracy: 75.9%
Test accuracy: 83.8%
Minibatch loss at step 1000: 10.472232
Minibatch accuracy: 76.6%
Validation accuracy: 76.8%
Test accuracy: 84.0%
Minibatch loss at step 1500: 10.195402
Minibatch accuracy: 75.0%
Validation accuracy: 77.4%
Test accuracy: 85.0%
Minibatch loss at step 2000: 10.036562
Minibatch accuracy: 85.9%
Validation accuracy: 78.3%
Test accuracy: 85.7%
Minibatch loss at step 2500: 10.178033
Minibatch accuracy: 78.9%
Validation accuracy: 78.5%
Test accuracy: 85.6%
Minibatch loss at step 3000: 9.967793
Minibatch accuracy: 75.8%
Validation accuracy: 79.0%
Test accuracy: 86.1%


In [66]:
run_model(train_dataset,train_labels,valid_dataset,valid_labels,test_dataset,test_labels,layer_fullconnect)

Initialized
Minibatch loss at step 0: 392.809631
Minibatch accuracy: 4.7%
Validation accuracy: 31.5%
Test accuracy: 35.0%
Minibatch loss at step 500: 10.879689
Minibatch accuracy: 78.9%
Validation accuracy: 80.3%
Test accuracy: 87.3%
Minibatch loss at step 1000: 8.535004
Minibatch accuracy: 81.2%
Validation accuracy: 80.7%
Test accuracy: 87.3%
Minibatch loss at step 1500: 10.787449
Minibatch accuracy: 82.8%
Validation accuracy: 80.6%
Test accuracy: 87.4%
Minibatch loss at step 2000: 1.750031
Minibatch accuracy: 85.2%
Validation accuracy: 81.2%
Test accuracy: 88.2%
Minibatch loss at step 2500: 3.716812
Minibatch accuracy: 76.6%
Validation accuracy: 81.0%
Test accuracy: 88.2%
Minibatch loss at step 3000: 2.497025
Minibatch accuracy: 78.9%
Validation accuracy: 82.1%
Test accuracy: 89.1%


In [79]:
run_model(train_dataset,train_labels,valid_dataset,valid_labels,test_dataset,test_labels,layer_fullconnect,0.003)

Initialized
Minibatch loss at step 0: 1313.163330
Minibatch accuracy: 7.0%
Validation accuracy: 31.4%
Test accuracy: 34.5%
Minibatch loss at step 500: 954.466797
Minibatch accuracy: 82.8%
Validation accuracy: 80.7%
Test accuracy: 87.8%
Minibatch loss at step 1000: 951.375671
Minibatch accuracy: 82.0%
Validation accuracy: 81.5%
Test accuracy: 89.0%
Minibatch loss at step 1500: 953.182007
Minibatch accuracy: 80.5%
Validation accuracy: 80.7%
Test accuracy: 87.9%
Minibatch loss at step 2000: 948.219788
Minibatch accuracy: 85.2%
Validation accuracy: 82.0%
Test accuracy: 88.7%
Minibatch loss at step 2500: 949.304077
Minibatch accuracy: 78.9%
Validation accuracy: 80.4%
Test accuracy: 87.7%
Minibatch loss at step 3000: 947.265137
Minibatch accuracy: 81.2%
Validation accuracy: 82.6%
Test accuracy: 89.4%


---
Problem 2
---------
Let's demonstrate an extreme case of overfitting. Restrict your training data to just a few batches. What happens?

---

In [83]:
run_model(train_dataset[:10000],train_labels[:10000],valid_dataset,valid_labels,test_dataset,test_labels,layer_fullconnect)

Initialized
Minibatch loss at step 0: 311.969971
Minibatch accuracy: 8.6%
Validation accuracy: 30.7%
Test accuracy: 32.9%
Minibatch loss at step 500: 17.832119
Minibatch accuracy: 91.4%
Validation accuracy: 74.4%
Test accuracy: 81.6%
Minibatch loss at step 1000: 0.419055
Minibatch accuracy: 98.4%
Validation accuracy: 80.9%
Test accuracy: 87.8%
Minibatch loss at step 1500: 0.000418
Minibatch accuracy: 100.0%
Validation accuracy: 81.9%
Test accuracy: 88.9%
Minibatch loss at step 2000: 0.000749
Minibatch accuracy: 100.0%
Validation accuracy: 82.0%
Test accuracy: 88.9%
Minibatch loss at step 2500: 0.000002
Minibatch accuracy: 100.0%
Validation accuracy: 82.1%
Test accuracy: 88.9%
Minibatch loss at step 3000: 0.025618
Minibatch accuracy: 99.2%
Validation accuracy: 81.7%
Test accuracy: 88.7%


In [86]:
run_model(train_dataset[:10000],train_labels[:10000],valid_dataset,valid_labels,test_dataset,test_labels,layer_fullconnect,0.3)

Initialized
Minibatch loss at step 0: 94473.273438
Minibatch accuracy: 10.2%
Validation accuracy: 41.7%
Test accuracy: 45.5%
Minibatch loss at step 500: 94167.226562
Minibatch accuracy: 93.8%
Validation accuracy: 81.5%
Test accuracy: 89.3%
Minibatch loss at step 1000: 94154.523438
Minibatch accuracy: 97.7%
Validation accuracy: 82.2%
Test accuracy: 89.6%
Minibatch loss at step 1500: 94434.101562
Minibatch accuracy: 97.7%
Validation accuracy: 81.9%
Test accuracy: 89.5%
Minibatch loss at step 2000: 94393.609375
Minibatch accuracy: 99.2%
Validation accuracy: 82.4%
Test accuracy: 90.0%
Minibatch loss at step 2500: 94057.500000
Minibatch accuracy: 100.0%
Validation accuracy: 82.3%
Test accuracy: 89.7%
Minibatch loss at step 3000: 94289.210938
Minibatch accuracy: 99.2%
Validation accuracy: 82.6%
Test accuracy: 90.0%


---
Problem 3
---------
Introduce Dropout on the hidden layer of the neural network. Remember: Dropout should only be introduced during training, not evaluation, otherwise your evaluation results would be stochastic as well. TensorFlow provides `nn.dropout()` for that, but you have to make sure it's only inserted during training.

What happens to our extreme overfitting case?

---

In [6]:
def layer_fullconnect_with_dropout(dataset,wd,tf_is_training):
    nodes = 1024
    [tf_train_dataset,tf_valid_dataset,tf_test_dataset]=dataset
    # Variables.
    w1 = tf.truncated_normal([image_size * image_size, nodes])
    tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w1),wd))
    weights1 = tf.Variable(w1)
    w2 = tf.truncated_normal([nodes, num_labels])
    tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w2),wd))
    weights2 = tf.Variable(w2)
    biases1 = tf.Variable(tf.zeros([nodes]))
    biases2 = tf.Variable(tf.zeros([num_labels]))
    
    # Hidden Layer
    train_hidden_layer = tf.nn.relu(tf.matmul(tf_train_dataset, weights1)+biases1)
    train_hidden_layer = tf.layers.dropout(train_hidden_layer,rate=0.5, training=tf_is_training)
    valid_hidden_layer = tf.nn.relu(tf.matmul(tf_valid_dataset, weights1)+biases1)
    test_hidden_layer = tf.nn.relu(tf.matmul(tf_test_dataset, weights1)+biases1) 
    # Training computation.
    train_logits = tf.matmul(train_hidden_layer,weights2) + biases2
    valid_logits = tf.matmul(valid_hidden_layer, weights2) + biases2
    test_logits = tf.matmul(test_hidden_layer, weights2) + biases2
    
#     regular = tf.add_n(tf.get_collection("regular"))
    regular = tf.nn.l2_loss(w1)+tf.nn.l2_loss(w2)

    return (train_logits,valid_logits,test_logits,wd*regular)

def run_model_with_dropout(train_data,train_labels,valid_data,valid_labels,test_data,test_labels,layers_callback,wd=0):
    graph = tf.Graph()
    with graph.as_default():
        # Input data. For the training data, we use a placeholder that will be fed
        # at run time with a training minibatch.
        tf_train_dataset = tf.placeholder(tf.float32,
                                    shape=(batch_size, image_size * image_size))
        tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
        tf_valid_dataset = tf.constant(valid_data)
        tf_test_dataset = tf.constant(test_data)
        # control dropout layer
        tf_is_training = tf.placeholder(tf.bool, None)

        # 创建隐藏层的回调
        train_logits,valid_logits,test_logits,regular = layers_callback(
            [tf_train_dataset,tf_valid_dataset,tf_test_dataset],wd,tf_is_training)
        
        t = tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=train_logits)
#         print(t.shape)
        loss = tf.reduce_mean(tf.reduce_mean(t)+regular)
  
        # Optimizer.
#         global_step = tf.Variable(0)  # count the number of steps taken.
#         learning_rate = tf.train.exponential_decay(0.5, global_step,decay_steps=1200,decay_rate=0.5)
#         optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        optimizer = tf.train.AdamOptimizer(0.001).minimize(loss)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(train_logits)
        valid_prediction = tf.nn.softmax(valid_logits)
        test_prediction = tf.nn.softmax(test_logits)
        
        num_steps = 6001
        train_acc=np.zeros(num_steps)
        valid_acc=np.zeros(num_steps)
        test_acc=np.zeros(num_steps)
        losses=np.zeros(num_steps)
        with tf.Session(graph=graph) as session:
            tf.global_variables_initializer().run()
            print("Initialized")
            for step in range(num_steps):
                # Pick an offset within the training data, which has been randomized.
                # Note: we could use better randomization across epochs.
                offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
                #     print(offset,step)
                # Generate a minibatch.
                batch_data = train_data[offset:(offset + batch_size), :]
                batch_labels = train_labels[offset:(offset + batch_size), :]
                # Prepare a dictionary telling the session where to feed the minibatch.
                # The key of the dictionary is the placeholder node of the graph to be fed,
                # and the value is the numpy array to feed to it.
                feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels,tf_is_training:True}
                _, l, predictions = session.run(
                      [optimizer, loss, train_prediction], feed_dict=feed_dict)
                
                if np.isnan(l): break
                if (step % 500 == 0):
                    losses[step]=l
                    train_accuracy = accuracy(predictions, batch_labels)
                    train_acc[step]=train_accuracy
                    valid_accuracy = accuracy(valid_prediction.eval(), valid_labels)
                    valid_acc[step]=valid_accuracy
                    test_accuracy = accuracy(test_prediction.eval(), test_labels)
                    test_acc[step]=test_accuracy
                    print("Minibatch loss at step %d: %f" % (step, l))
                    print("Minibatch accuracy: %.1f%%" % train_accuracy)
                    print("Validation accuracy: %.1f%%" % valid_accuracy)
                    print("Test accuracy: %.1f%%" % test_accuracy)
#                     break
#         sub_axix = range(len(test_acc))
#         ax1 = plt.subplot(1,2,1)
#         plt.sca(ax1)
#         plt.plot(sub_axix,train_acc,color='red',label='train accuracy')
#         plt.plot(sub_axix,valid_acc,color='green',label='valid accuracy')
#         plt.plot(sub_axix,test_acc,color='blue',label='test accuracy')
#         plt.legend()
#         ax2 = plt.subplot(1,2,2)
#         plt.sca(ax2)
#         plt.plot(sub_axix[:10],losses[:10],color='skyblue',label='loss')
#         plt.legend()
#         plt.show()

In [93]:
run_model_with_dropout(
    train_dataset[:10000],train_labels[:10000],valid_dataset,valid_labels,test_dataset,test_labels,
    layer_fullconnect_with_dropout,0.003)

Initialized
Minibatch loss at step 0: 1410.657227
Minibatch accuracy: 9.4%
Validation accuracy: 31.6%
Test accuracy: 34.7%
Minibatch loss at step 500: 963.703735
Minibatch accuracy: 82.8%
Validation accuracy: 80.9%
Test accuracy: 87.9%
Minibatch loss at step 1000: 955.687500
Minibatch accuracy: 87.5%
Validation accuracy: 81.9%
Test accuracy: 89.0%
Minibatch loss at step 1500: 959.057983
Minibatch accuracy: 90.6%
Validation accuracy: 82.3%
Test accuracy: 89.3%
Minibatch loss at step 2000: 947.640625
Minibatch accuracy: 92.2%
Validation accuracy: 82.7%
Test accuracy: 89.9%
Minibatch loss at step 2500: 944.683594
Minibatch accuracy: 90.6%
Validation accuracy: 82.8%
Test accuracy: 89.8%
Minibatch loss at step 3000: 946.646301
Minibatch accuracy: 93.8%
Validation accuracy: 82.8%
Test accuracy: 89.9%


---
Problem 4
---------

Try to get the best performance you can using a multi-layer model! The best reported test accuracy using a deep network is [97.1%](http://yaroslavvb.blogspot.com/2011/09/notmnist-dataset.html?showComment=1391023266211#c8758720086795711595).

One avenue you can explore is to add multiple layers.

Another one is to use learning rate decay:

    global_step = tf.Variable(0)  # count the number of steps taken.
    learning_rate = tf.train.exponential_decay(0.5, global_step, ...)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
 
 ---


In [7]:
def deep_layer_with_dropout(dataset,wd,tf_is_training):
    # 使用3个隐层
    nodes_1 = 4096*2
    nodes_2 = 4096
    nodes_3 = 4096*2
    [tf_train_dataset,tf_valid_dataset,tf_test_dataset]=dataset
    # Variables.
    w1 = tf.truncated_normal([image_size * image_size, nodes_1])
#     tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w1),wd))
    weights1 = tf.Variable(w1)
    w2 = tf.truncated_normal([nodes_1, nodes_2])
#     tf.add_to_collection('regular',tf.multiply(tf.nn.l2_loss(w2),wd))
    weights2 = tf.Variable(w2)
    w3 = tf.truncated_normal([nodes_2, nodes_3])
    weights3 = tf.Variable(w3)
    w4 = tf.truncated_normal([nodes_3, num_labels])
    weights4 = tf.Variable(w4)
    biases1 = tf.Variable(tf.zeros([nodes_1]))
    biases2 = tf.Variable(tf.zeros([nodes_2]))
    biases3 = tf.Variable(tf.zeros([nodes_3]))
    biases4 = tf.Variable(tf.zeros([num_labels]))
    
    # Hidden Layer
#     train_hidden_layer_1 = tf.layers.dense( tf_train_dataset, units = nodes_1, activation = tf.nn.relu )
    train_hidden_layer_1 = tf.nn.relu(tf.matmul(tf_train_dataset, weights1)+biases1)
    train_hidden_layer_2 = tf.nn.relu(tf.matmul(train_hidden_layer_1, weights2)+biases2)
#     train_hidden_layer_2 = tf.layers.dropout(train_hidden_layer_2,rate=0.1, training=tf_is_training)
#     train_hidden_layer_3 = tf.nn.relu(tf.matmul(train_hidden_layer_2, weights3)+biases3)
    train_hidden_layer_4 = tf.nn.relu(tf.matmul(train_hidden_layer_2, weights3)+biases3)
    
    valid_hidden_layer_1 = tf.nn.relu(tf.matmul(tf_valid_dataset, weights1)+biases1)
    valid_hidden_layer_2 = tf.nn.relu(tf.matmul(valid_hidden_layer_1, weights2)+biases2)
#     valid_hidden_layer_3 = tf.nn.relu(tf.matmul(valid_hidden_layer_2, weights3)+biases3)
    valid_hidden_layer_4 = tf.nn.relu(tf.matmul(valid_hidden_layer_2, weights3)+biases3)
    
    test_hidden_layer_1 = tf.nn.relu(tf.matmul(tf_test_dataset, weights1)+biases1) 
    test_hidden_layer_2 = tf.nn.relu(tf.matmul(test_hidden_layer_1, weights2)+biases2)
#     test_hidden_layer_3 = tf.nn.relu(tf.matmul(test_hidden_layer_2, weights3)+biases3)
    test_hidden_layer_4 = tf.nn.relu(tf.matmul(test_hidden_layer_2, weights3)+biases3)
    
    # Training computation.
    train_logits = tf.matmul(train_hidden_layer_4,weights4) + biases4
    valid_logits = tf.matmul(valid_hidden_layer_4, weights4) + biases4
    test_logits = tf.matmul(test_hidden_layer_4, weights4) + biases4
    
#     regular = tf.add_n(tf.get_collection("regular"))
#     regular = tf.nn.l2_loss(w1)+tf.nn.l2_loss(w2)+tf.nn.l2_loss(w3)
    regular = 0
    return (train_logits,valid_logits,test_logits,wd*regular)

In [8]:
from collections import defaultdict
#查看样本种类是否均衡
def check_balance():
    train_cnt = len(train_labels)
#     print(train_labels[:10])
    label_count=defaultdict(int)
    for label in train_labels:
        index = np.argwhere(label)
        label_count[index[0][0]]+=1
    print(label_count)
    for label in range(10):
        print("label %d count:%d, pecentage: %.3f" % (label,label_count[label],label_count[label]*1.0/train_cnt))
              
check_balance()

defaultdict(<class 'int'>, {2: 40000, 5: 40000, 7: 40000, 8: 40000, 9: 40000, 3: 40000, 6: 40000, 4: 40000, 1: 40000, 0: 40000})
label 0 count:40000, pecentage: 0.100
label 1 count:40000, pecentage: 0.100
label 2 count:40000, pecentage: 0.100
label 3 count:40000, pecentage: 0.100
label 4 count:40000, pecentage: 0.100
label 5 count:40000, pecentage: 0.100
label 6 count:40000, pecentage: 0.100
label 7 count:40000, pecentage: 0.100
label 8 count:40000, pecentage: 0.100
label 9 count:40000, pecentage: 0.100


In [8]:
run_model_with_dropout(
    train_dataset,train_labels,valid_dataset,valid_labels,test_dataset,test_labels,
    deep_layer_with_dropout,0.003)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Initialized
Minibatch loss at step 0: 2111297.500000
Minibatch accuracy: 11.7%
Validation accuracy: 20.3%
Test accuracy: 21.9%
Minibatch loss at step 500: 104270.632812
Minibatch accuracy: 84.4%
Validation accuracy: 83.2%
Test accuracy: 88.8%
Minibatch loss at step 1000: 81650.835938
Minibatch accuracy: 79.7%
Validation accuracy: 83.7%
Test accuracy: 89.2%
Minibatch loss at step 1500: 64506.429688
Minibatch accuracy: 82.0%
Validation accuracy: 85.4%
Test accuracy: 90.6%
Minibatch loss at step 2000: 30587.343750
Minibatch accuracy: 87.5%
Validation accuracy: 86.1%
Test accuracy: 91.5%
Minibatch loss at step 2500: 22427.207031
Minibatch accuracy: 87.5%
Validation accuracy: 85.9%
Test accuracy: 91.9%
Minibatch loss at step 3000: 26242.650391
Minibatch accuracy: 86.7%
Validation accuracy: 86.4%
Test ac