In [1]:
import tensorflow as tf

简单的两个数组相加案例

In [2]:
a = tf.constant([1,2], name='a')
b = tf.constant([3,4], name='b')
result = a + b
sess = tf.Session()
sess.run(result)  

array([4, 6])

# 1 计算图Graph
---
```python
import tensorflow as tf
tf.Graph()
```

## 1.1 计算图的使用
---
每个计算图相互独立

In [5]:
# 创建两个计算图，并定义一个初始化变量v
g1 = tf.Graph()
with g1.as_default():
    v = tf.get_variable('v', shape=[1], initializer=tf.zeros_initializer)
    
g2 = tf.Graph()
with g2.as_default():
    v = tf.get_variable('v', shape=[1], initializer=tf.ones_initializer)
    
# 获取g1中的v
with tf.Session(graph=g1) as sess:
    tf.global_variables_initializer().run()
    with tf.variable_scope('', reuse=True):
        print('Graph1 v = ', sess.run(tf.get_variable('v')))

# 获取g2中的v
with tf.Session(graph=g2) as sess:
    tf.global_variables_initializer().run()
    with tf.variable_scope('', reuse=True):
        print('Graph2 v = ', sess.run(tf.get_variable('v')))

Graph1 v =  [0.]
Graph2 v =  [1.]


指定图在GPU计算

In [9]:
g = tf.Graph()
a = tf.constant([1,2], name='a')
b = tf.constant([3,4], name='b')

with g.device('/gpu:0'):
    result = a + b
    with tf.Session() as sess:
        print(sess.run(result))

[4 6]


## 1.2 Tensor
---
### 1.2.1 introduction
---
1. tensor
    - 0阶张量为标量(scalar)，对应一个数
    - 一阶张量为向量(vector)，对应一维数组
    - n阶张量对应n维数组
2. tensor的数据结构
    - name: `name:src_output`
    - shape:
    - dtype:
3. tensor支持的数据类型
    - tf.float32, tf.float64
    - tf.int8, tf.int16, tf.int32, tf.int64, tf.uint8
    - tf.bool
    - tf.complex64, tf.complex128
4. 张量的使用
    - 对中间计算结果的引用
    - 可以通过name获取保存的计算结果

In [31]:
a = tf.constant([1,2], name='a')
b = tf.constant([3,4], name='b')
result = tf.add(a, b, name='add_ab')

In [34]:
result
# '节点名:该节点的第几个输出'，如果重复赋值，以前的不会被覆盖，会出现`add_ab_1:0`

<tf.Tensor 'add_ab:0' shape=(2,) dtype=int32>

In [57]:
a

<tf.Tensor 'a_12:0' shape=(2,) dtype=int32>

## 1.3 Session
---
tensorflow 通过使用Session来执行定义好的运算。
```python
sess = tf.Session()
# 执行定义好的张量
sess.run()

sess.close()
```
or
```python
with tf.Session() as sess:
    sess.run()
```

### 1.3.1 定义默认会话
---


**方法1**：使用as_default创建一个context manager，在内部可以直接使用tensor_name.eval()执行。

In [83]:
# 使用as_default创建一个context manager，在内部可以直接使用tensor_name.eval()执行。
sess = tf.Session()
with sess.as_default():
    print(result.eval())

[4 6]


**方法2**：使用`tf.InteractiveSession()`函数,直接创建一个默认session，不用再去指定。

In [86]:
sess = tf.InteractiveSession()
print(result.eval())

sess.close()

[4 6]


**方法3**：使用`ConfigProto()`创建一个config对象，在session创建的时候指定config属性

In [89]:
config = tf.ConfigProto(allow_soft_placement=True,
                       log_device_placement=True)

sess1 = tf.InteractiveSession(config=config)
# sess2 = tf.Session(config=config)

## 1.4 实现神经网络
---


In [100]:
x = tf.constant([0.7, 0.9], shape=(2, 1))

In [101]:
w1 = tf.constant([[0.2, 0.3],[0.1,-0.5],[0.4,0.2]], shape=(3, 2))

In [102]:
w2 = tf.constant([0.6, 0.1, -0.2], shape=(1, 3))

In [103]:
a = tf.matmul(w1, x)
y = tf.matmul(w2, a)

In [104]:
y.eval()

array([[0.11600002]], dtype=float32)

### 1.4.1 variable
---
在tensorflow中，一个变量值在被使用之前，这个变量的**初始化过程**要被明确调用。变量定义类似于python中的函数定义，想要使用，需要调用。通过session调用。
```python
# 创建variable,并将变量加入到GraphKeys.VARIABLES,如果参数trainable=True,默认为True,则变量会被加入到GraphKeys.TRAINABLE_VARIABLES
v = tf.Variable()
# 初始化
v.initializer

# 获取已定义的变量列表
tf.global_variables()

# 获取可训练的变量列表
tf.trainable_variables

```

>  随机生成函数
---
```python
tf.random_normal
tf.truncated_normal
tf.random_uniform
tf.random.gamma
```

> 常数生成函数
---
```python
tf.zeros
tf.ones
tf.fill
tf.constant
```
### 1.4.2 前向传播
---
1. 定义输入值
2. 定义隐藏层1的权重
3. 定义隐藏层2的权重
4. 计算

In [14]:
import tensorflow as tf

# 定义输入值
x = tf.constant([0.7,0.9], shape=(2, 1), dtype=tf.float32)

# 随机生成权重值，采用正太分布
w1 = tf.Variable(tf.random_normal((3, 2), stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal((1, 3), stddev=1, seed=1))

# 定义计算过程
h = tf.matmul(w1, x)
y = tf.matmul(w2, h)

# 定义全局变量初始化
init_op = tf.global_variables_initializer()

# 开始计算
with tf.Session() as sess:
#     sess.run(w1.initializer)
#     sess.run(w2.initializer)
    sess.run(init_op)
    print(sess.run(y))

[[-3.7798672]]


### 1.4.3 placeholder
---
用placeholder管理常量
```python
# 创建
v = tf.placeholder(dtype, shape=None, name=None)
```

In [3]:
import tensorflow as tf

# 定义输入值
x = tf.placeholder(tf.float32, shape=(2, 1), name='input')

# 随机生成权重值，采用正太分布
w1 = tf.Variable(tf.random_normal((3, 2), stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal((1, 3), stddev=1, seed=1))

# 定义计算过程
h = tf.matmul(w1, x)
y = tf.matmul(w2, h)

# 定义全局变量初始化
init_op = tf.global_variables_initializer()

# 开始计算
with tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(y, feed_dict={x: [[0.7],[0.9]]}))

[[-3.7798672]]


定义多维输入值,这里定义三个样本

In [8]:
import tensorflow as tf
import numpy as np

# 定义输入值（三个样本）
x = tf.placeholder(tf.float32, shape=(2, 3), name='input')

# 随机生成权重值，采用正太分布
w1 = tf.Variable(tf.random_normal((3, 2), stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal((1, 3), stddev=1, seed=1))

# 定义计算过程
h = tf.matmul(w1, x)
y = tf.matmul(w2, h)

# 定义全局变量初始化
init_op = tf.global_variables_initializer()

# 定义输入值
input_data = np.array([[0.7, 0.9],
                      [0.1, 0.4],
                      [0.5, 0.8]]).T
# 开始计算
with tf.Session() as sess:
    sess.run(init_op)
    print(sess.run(y, feed_dict={x: input_data}))

[[-3.7798674 -1.8407464 -3.4529805]]


### 1.4.4 反向传播
---
这里采用交叉熵
$$H(p,q)=-\sum_{i=1}^np(x_i)log(q(x_i))$$
其中p为target值的概率，q为预测值的概率

### 1.4.5 完整的代码
---

In [46]:
import tensorflow as tf
import numpy as np

# 定义batch的大小
batch_size = 8

# 定义输入值,这里不具体指定样本的数量
x = tf.placeholder(tf.float32, shape=(None, 2), name='input')
# 定义输出的label
y_ = tf.placeholder(tf.float32, shape=(None, 1), name='target')

# 随机生成权重值，采用正太分布
w1 = tf.Variable(tf.random_normal((2, 3), stddev=1, seed=1))
w2 = tf.Variable(tf.random_normal((3, 1), stddev=1, seed=1))

# 定义计算过程
h = tf.matmul(x, w1)
y = tf.matmul(h, w2)

# 对输出结果归一化处理
y = tf.sigmoid(y)


# 求交叉熵，衡量误差
cross_entropy = -tf.reduce_mean(
    y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)) + (1 - y_) * tf.log(tf.clip_by_value(1 - y, 1e-10, 1.0))
)

train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

# 随机生成模拟数据集
rdm = np.random.RandomState(1)
dataset_size = 128
X = rdm.rand(dataset_size, 2)

# 确定target
Y = [[int(x1 + x2 < 1)] for (x1, x2) in X]

with tf.Session() as sess:   
    # 定义全局变量初始化
    init_op = tf.global_variables_initializer()
    
    sess.run(init_op)
    print('w1-->\n', sess.run(w1))
    print('w2-->\n', sess.run(w2))

    steps = 5000
    for i in range(steps):
        start = (i * batch_size) % dataset_size
        end = min(start + batch_size, dataset_size)
        sess.run(train_step, feed_dict={x: X[start: end], y_: Y[start: end]})
        
        if i % 1000 == 0:
            total_cross_entropy = sess.run(cross_entropy, feed_dict={x: X, y_: Y})
            print('After %d , cross entropy is %g' % (i, total_cross_entropy))
    print('new w1\n', sess.run(w1))
    print('new w2\n', sess.run(w2))

w1-->
 [[-0.8113182   1.4845988   0.06532937]
 [-2.4427042   0.0992484   0.5912243 ]]
w2-->
 [[-0.8113182 ]
 [ 1.4845988 ]
 [ 0.06532937]]
After 0 , cross entropy is 1.89805
After 1000 , cross entropy is 0.655075
After 2000 , cross entropy is 0.626172
After 3000 , cross entropy is 0.615096
After 4000 , cross entropy is 0.610309
new w1
 [[ 0.02476974  0.56948686  1.6921943 ]
 [-2.1977353  -0.23668927  1.1143897 ]]
new w2
 [[-0.45544702]
 [ 0.49110925]
 [-0.98110336]]


# 2 深层NN
---
## 2.1 常用的非线性激活函数
---
```python
tf.nn.relu()
tf.nn.sigmoid()
tf.nn.tanh()
```

## 2.2 损失函数
---
1. 分类问题
    - 交叉熵
        - 先用softmax将输出结果转成概率分布的形式`tf.nn.softmax()`
        - $$H(p, q) = - \sum_xlog q(x)$$
        - p为正确答案，q为预测答案，这里都可以用概率表示。整体的意义为用q的概率分布来表示p的概率分布的困难程度。
        - p和q越接近，交叉熵的值越小。
        ```python
        # softmax + cross_entropy
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
        # 只有一个正确答案的分类问题
        tf.nn.softmax_sparse_softmax_cross_entropy_with_logits()
        ```
2. 回归问题
    - MSE(mean squared error)
    - $$MSE(y, y^{'}) = \frac{\sum_{i=1}^{n}(y_i - y_i^{'})^2}{n}$$
    - MSE也常用于分类问题的损失函数
    - `tf.reduce_mean(tf.square(y_ - y))`
3. 自定义

## 2.3 网络优化
---
### 1. 学习率
- 指数衰减法
```python
    decayed_learning_rate = learning_rate *
                    decay_rate ^ (global_step / decay_steps)
```
- learning_rate:学习率，dency_rate:衰减率，global_step:迭代次数，常为一个数值为0的tensor。decay_steps:衰减速度。eg:每迭代十万次，衰减率乘以0.96
```python
...
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           100000, 0.96, staircase=True)
# Passing global_step to minimize() will increment it at each step.
learning_step = (
    tf.train.GradientDescentOptimizer(learning_rate)
    .minimize(...my loss..., global_step=global_step)
)
```
- 对应tf中的函数为：`tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)`
    - staircase为False，表示连续衰减
    - staircase为True，表示阶梯衰减。
    
### 2. 过拟合
- 正则化。正则化是处理过拟合常用的方法。**正则化是针对权重进行操作的**
- L1正则化$$R(w) = \lVert{w}\rVert_1 = \sum_i\lvert{w_i}\rvert$$
- L2正则化$$R(w) = \lVert{w}\rVert_1^2 = \sum_i\lvert{w_i^2}\rvert$$
- L1和L2同时使用：$$R(w) = \sum_i\alpha\lvert{w_i}\rvert + (1 - \alpha)w_i^2$$
- 如果损失函数为$J(\theta)$加入了正则化后，加会变成$J(\theta) + \lambda{R(w)}$
- eg
```python
weights = tf.constant([[1.0, -2.0],[-3.0, 4.0]])
with tf.Session() as sess:
    print(sess.run(tf.contrib.layers.l1_regularizer(0.5)(weights)))
    print(sess.run(tf.contrib.layers.l2_regularizer(0.5)(weights)))
    print(sess.run(tf.contrib.layers.l1_l2_regularizer(0.5)(weights)))
```

下一个五层神经网络(3个隐藏层)，带有L2正则化的实现方法。(只是部分实现代码，不做具体实现)

In [55]:
import tensorflow as tf

# 这里传入一个shape，随机生成一组权重值，并以lambda和权重值得到L2，并加入到tf的collection中
def get_weight(shape, lamb):
    # 依据给定的shape生成一组正太分布随机值
    weights = tf.Variable(tf.random_normal(shape), dtype=tf.float32)
    # 计算L2
    L2 = tf.contrib.layers.l2_regularizer(lamb)(weights)
    tf.add_to_collection('losses', L2)
    return weights

# 定义输入值
x = tf.placeholder(tf.float32, shape=(None, 2))
# 定义target值结构
y_ = tf.placeholder(tf.float32, shape=(None, 1))

batch_size = 8
# 定义每一层的节点数
layer_dimension = [2, 10, 10, 10, 1]
# 层数
n_layers = len(layer_dimension)

# 定义所在计算层
cur_layer = x
in_dimension = layer_dimension[0]

for i in range(1, n_layers):
    # 获取第一层的weights，先确定shape
    out_dimension = layer_dimension[i]
    weight = get_weight((in_dimension, out_dimension), 0.01)
    # 更新当前层的输出值
    bias = tf.Variable(tf.constant(0.1, shape=(out_dimension, 1)))
    cur_layer = tf.matmul(cur_layer, weight) + bias
    in_dimension = out_dimension
    
# 求损失函数，MSE
mse_loss = tf.reduce_mean(tf.square(y_ - cur_layer))
tf.add_to_collection('losses', mse_loss)

# 
loss = tf.add_n(tf.get_collection('losses'))

## ing


### 3. 滑动平均模型（作用？？）
- 在每层的计算中，对权重和偏移量计算一个滑动平均(也理解为对所有神经网络参数的变量上使用滑动平均)
- (可以使模型在测试数据上更加的robust).
- 适用于**随机梯度下降**训练神经网络时
- `tf.train.ExponentialMovingAverage(decay, num_updates=None, zero_debias=False, name='ExponentialMovingAverage')`
    - decay越大模型越趋于稳定。
    - 

In [59]:
# 实现移动平均模型
import tensorflow as tf

v1 = tf.Variable(0, dtype=tf.float32)
step = tf.Variable(0, trainable=False)

ema = tf.train.ExponentialMovingAverage(0.99, step)
maintain_average_op = ema.apply([v1])

with tf.Session() as sess:
    init_op = tf.global_variables_initializer()
    sess.run(init_op)
    print('v1=0,step=0')
    print(sess.run([v1, ema.average(v1)]))
    
    sess.run(tf.assign(v1, 5))
    sess.run(maintain_average_op)
    print('\nv1=5,step=0')
    print(sess.run([v1, ema.average(v1)]))
    
    sess.run(tf.assign(step, 1000))
    sess.run(tf.assign(v1, 10))
    sess.run(maintain_average_op)
    print('\nv1=10,step=1000')
    print(sess.run([v1, ema.average(v1)]))
    
    print('\nv1=10, step=1000')
    sess.run(maintain_average_op)
    print(sess.run([v1, ema.average(v1)]))     
          

v1=0,step=0
[0.0, 0.0]

v1=5,step=0
[5.0, 4.5]

v1=10,step=1000
[10.0, 4.555]

v1=10, step=1000
[10.0, 4.60945]


# 3 MNIST
---


In [60]:
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('D:/softfiles/workspace/tensorflow/data/', one_hot=True)

Extracting D:/softfiles/workspace/tensorflow/data/train-images-idx3-ubyte.gz
Extracting D:/softfiles/workspace/tensorflow/data/train-labels-idx1-ubyte.gz
Extracting D:/softfiles/workspace/tensorflow/data/t10k-images-idx3-ubyte.gz
Extracting D:/softfiles/workspace/tensorflow/data/t10k-labels-idx1-ubyte.gz


In [61]:
print(mnist.train.num_examples)

55000


In [63]:
tf.argmax?

In [65]:
import numpy as np

In [66]:
a = np.array([
    [1,2,3,4],
    [5,4,3,2]
])

In [67]:
a.argmax(axis=0)

array([1, 1, 0, 0], dtype=int64)

In [68]:
a.argmax(axis=1)

array([3, 0], dtype=int64)