## ReLU

tf.nn.relu() 放到隐藏层，就像开关一样把负权重关掉了。在激活函数之后，添加像输出层这样额外的层，就把模型变成了非线性函数。这个非线性的特征使得网络可以解决更复杂的问题。

In [42]:
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')

output = None
hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[1.0, 2.0, 3.0, 4.0], [-1.0, -2.0, -3.0, -4.0], [11.0, 12.0, 13.0, 14.0]])


# TODO: Create Model
hidden_layer = tf.nn.relu(tf.add(tf.matmul(features, weights[0]), biases[0]))
logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])

# TODO: Print session results
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    output = sess.run(logits)
    print(output)


[[ 5.11      8.440001]
 [ 0.        0.      ]
 [24.010002 38.239998]]


## Deep Neural Network

In [43]:
from tensorflow.examples.tutorials.mnist import input_data
import warnings
warnings.filterwarnings('ignore')
mnist = input_data.read_data_sets(".", one_hot=True, reshape=False)

# 参数 Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 128  # 如果没有足够内存，可以降低 batch size
display_step = 1

n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

n_hidden_layer = 256 # layer number of features 特征的层数

# Store layers weight & bias
weights = {
    'hidden_layer': tf.Variable(tf.random_normal([n_input, n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_hidden_layer, n_classes]))
}
biases = {
    'hidden_layer': tf.Variable(tf.random_normal([n_hidden_layer])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}

Extracting .\train-images-idx3-ubyte.gz
Extracting .\train-labels-idx1-ubyte.gz
Extracting .\t10k-images-idx3-ubyte.gz
Extracting .\t10k-labels-idx1-ubyte.gz


In [44]:
# tf Graph input
x = tf.placeholder("float", [None, 28, 28, 1])
y = tf.placeholder("float", [None, n_classes])

x_flat = tf.reshape(x, [-1, n_input])

# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x_flat, weights['hidden_layer']), biases['hidden_layer'])
layer_1 = tf.nn.relu(layer_1)

# Output layer with linear activation
logits = tf.add(tf.matmul(layer_1, weights['out']), biases['out'])

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

In [45]:
# Initializing the variables
init = tf.global_variables_initializer()

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    
    # Training cycle
    for epoch in range(training_epochs):
        total_batch = int(mnist.train.num_examples/batch_size)
        
        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})

## Save & Load

TensorFlow 可以通过一个叫 `tf.train.Saver` 的类把进程保存下来。这个类可以把任何 `tf.Variable` 存到你的文件系统。

In [49]:
import tensorflow as tf

# The file path to save the data
save_file = './model.ckpt'

# Two Tensor Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()

with tf.Session() as sess:
    # Initialize all the Variables
    sess.run(tf.global_variables_initializer())

    # Show the values of weights and bias
    print('Weights:')
    print(sess.run(weights))
    print('Bias:')
    print(sess.run(bias))

    # Save the model
    # 保存模型
    saver.save(sess, save_file)

Weights:
[[-0.17909837 -1.9843892  -1.2348624 ]
 [-1.0586609   0.30565867  0.23285024]]
Bias:
[-0.4386942  -0.87117994 -0.12426009]


如果使用 `TensorFlow 0.11.0RC1` 或者更新的版本，还会生成一个包含了 TensorFlow graph 的文件 "model.ckpt.meta"。

In [50]:
# Remove the previous weights and bias
tf.reset_default_graph()

# Two Variables: weights and bias
weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))

# Class used to save and/or restore Tensor Variables
saver = tf.train.Saver()

with tf.Session() as sess:
    # Load the weights and bias
    saver.restore(sess, "./model.ckpt")

    # Show the values of weights and bias
    print('Weight:')
    print(sess.run(weights))
    print('Bias:')
    print(sess.run(bias))

INFO:tensorflow:Restoring parameters from ./model.ckpt
Weight:
[[-0.15739983 -0.86331886  0.2773363 ]
 [ 0.45793948 -0.86050624 -0.7860226 ]]
Bias:
[-0.42900687  0.88805526  0.28762463]


## Save & Load on real model

In [51]:
# Remove previous Tensors and Operations
tf.reset_default_graph()

from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

learning_rate = 0.001
n_input = 784  
n_classes = 10 

mnist = input_data.read_data_sets('.', one_hot=True)

features = tf.placeholder(tf.float32, [None, n_input])
labels = tf.placeholder(tf.float32, [None, n_classes])

weights = tf.Variable(tf.random_normal([n_input, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

logits = tf.add(tf.matmul(features, weights), bias)


cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)

correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

Extracting .\train-images-idx3-ubyte.gz
Extracting .\train-labels-idx1-ubyte.gz
Extracting .\t10k-images-idx3-ubyte.gz
Extracting .\t10k-labels-idx1-ubyte.gz


In [52]:
import math

save_file = './train_model.ckpt'
batch_size = 128
n_epochs = 100

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(n_epochs):
        total_batch = math.ceil(mnist.train.num_examples / batch_size)

        for i in range(total_batch):
            batch_features, batch_labels = mnist.train.next_batch(batch_size)
            sess.run(
                optimizer,
                feed_dict={features: batch_features, labels: batch_labels})


        if epoch % 10 == 0:
            valid_accuracy = sess.run(
                accuracy,
                feed_dict={
                    features: mnist.validation.images,
                    labels: mnist.validation.labels})
            print('Epoch {:<3} - Validation Accuracy: {}'.format(
                epoch,
                valid_accuracy))

    saver.save(sess, save_file)
    print('Trained Model Saved.')

Epoch 0   - Validation Accuracy: 0.11580000072717667
Epoch 10  - Validation Accuracy: 0.2685999870300293
Epoch 20  - Validation Accuracy: 0.4099999964237213
Epoch 30  - Validation Accuracy: 0.5067999958992004
Epoch 40  - Validation Accuracy: 0.5654000043869019
Epoch 50  - Validation Accuracy: 0.6123999953269958
Epoch 60  - Validation Accuracy: 0.6492000222206116
Epoch 70  - Validation Accuracy: 0.6690000295639038
Epoch 80  - Validation Accuracy: 0.6919999718666077
Epoch 90  - Validation Accuracy: 0.7085999846458435
Trained Model Saved.


In [53]:
saver = tf.train.Saver()

with tf.Session() as sess:
    saver.restore(sess, save_file)

    test_accuracy = sess.run(
        accuracy,
        feed_dict={features: mnist.test.images, labels: mnist.test.labels})

print('Test Accuracy: {}'.format(test_accuracy))

INFO:tensorflow:Restoring parameters from ./train_model.ckpt
Test Accuracy: 0.7236999869346619


## Fine-tune

“微调”一个你已经训练并保存了的模型。但是，把保存的变量直接加载到已经修改过的模型会产生错误。<br>
<br>
命名报错: <br>
TensorFlow 对 Tensor 和计算使用一个叫 `name` 的字符串辨识器，如果没有定义 `name`，TensorFlow 会自动创建一个。TensorFlow 会把第一个节点命名为 `<Type>`，把后续的命名为`<Type>_<number>`。让我们看看这对加载一个有不同顺序权重和偏置项的模型有哪些影响：

In [54]:
import tensorflow as tf

tf.reset_default_graph()

save_file = 'model.ckpt'

weights = tf.Variable(tf.truncated_normal([2, 3]))
bias = tf.Variable(tf.truncated_normal([3]))

saver = tf.train.Saver()

print('Save Weights: {}'.format(weights.name))
print('Save Bias: {}'.format(bias.name))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, save_file)

tf.reset_default_graph()

bias = tf.Variable(tf.truncated_normal([3]))
weights = tf.Variable(tf.truncated_normal([2, 3]))

saver = tf.train.Saver()

print('Load Weights: {}'.format(weights.name))
print('Load Bias: {}'.format(bias.name))

with tf.Session() as sess:
    #saver.restore(sess, save_file)           # error
    
    #InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match.

SyntaxError: unexpected EOF while parsing (<ipython-input-54-17f523b03ab7>, line 32)

`weights` 和 `bias` 的 `name` 属性与你保存的模型不同。这是为什么代码报 “Assign requires shapes of both tensors to match” 这个错误。<br>
<br>
`saver.restore(sess, save_file)` 代码试图把权重数据加载到`bias`里，把偏置项数据加载到`weights`里。与其让 TensorFlow 来设定 `name` 属性，不如让我们来手动设定:

In [None]:
import tensorflow as tf

tf.reset_default_graph()

save_file = 'model.ckpt'

weights = tf.Variable(tf.truncated_normal([2, 3]), name='weights_0')
bias = tf.Variable(tf.truncated_normal([3]), name='bias_0')

saver = tf.train.Saver()

# Print the name of Weights and Bias
print('Save Weights: {}'.format(weights.name))
print('Save Bias: {}'.format(bias.name))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, save_file)


tf.reset_default_graph()

bias = tf.Variable(tf.truncated_normal([3]), name='bias_0')
weights = tf.Variable(tf.truncated_normal([2, 3]) ,name='weights_0')

saver = tf.train.Saver()

print('Load Weights: {}'.format(weights.name))
print('Load Bias: {}'.format(bias.name))

with tf.Session() as sess:
    saver.restore(sess, save_file)

print('Loaded Weights and Bias successfully.')

Tensor 名称匹配正确，数据被正确加载。

## Dropout

Dropout 是一个降低过拟合的正则化技术。它在网络中暂时的丢弃一些单元（神经元），以及与它们的前后相连的所有节点。<br>
<br>
TensorFlow 提供了一个 tf.nn.dropout() 函数，你可以用来实现 dropout。<br>
<br>
在训练时，一个好的keep_prob初始值是0.5;<br>
在测试时，把 keep_prob 值设为1.0 ，这样保留所有的单元，最大化模型的能力。

In [55]:
'''
keep_prob = tf.placeholder(tf.float32) # probability to keep units

hidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer, keep_prob)

logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])
'''

'\nkeep_prob = tf.placeholder(tf.float32) # probability to keep units\n\nhidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])\nhidden_layer = tf.nn.relu(hidden_layer)\nhidden_layer = tf.nn.dropout(hidden_layer, keep_prob)\n\nlogits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])\n'

In [56]:
import tensorflow as tf

hidden_layer_weights = [
    [0.1, 0.2, 0.4],
    [0.4, 0.6, 0.6],
    [0.5, 0.9, 0.1],
    [0.8, 0.2, 0.8]]
out_weights = [
    [0.1, 0.6],
    [0.2, 0.1],
    [0.7, 0.9]]

# Weights and biases
weights = [
    tf.Variable(hidden_layer_weights),
    tf.Variable(out_weights)]
biases = [
    tf.Variable(tf.zeros(3)),
    tf.Variable(tf.zeros(2))]

# Input
features = tf.Variable([[0.0, 2.0, 3.0, 4.0], [0.1, 0.2, 0.3, 0.4], [11.0, 12.0, 13.0, 14.0]])

# TODO: Create Model with Dropout
keep_prob = tf.placeholder(tf.float32)
hidden_layer = tf.add(tf.matmul(features, weights[0]), biases[0])
hidden_layer = tf.nn.relu(hidden_layer)
hidden_layer = tf.nn.dropout(hidden_layer, keep_prob)

logits = tf.add(tf.matmul(hidden_layer, weights[1]), biases[1])

# TODO: Print logits from a session
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print(sess.run(logits, feed_dict={keep_prob: 0.5}))



Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
[[ 1.1         6.6000004 ]
 [ 0.11200001  0.6720001 ]
 [48.020004   76.479996  ]]
