# 2-3自动微分机制

神经网络通常依赖反向传播求梯度来更新网络参数，求梯度过程通常是一件非常复杂而容易出错的事情。

而深度学习框架可以帮助我们自动地完成这种求梯度运算。

Tensorflow一般使用梯度磁带tf.GradientTape来记录正向运算过程，然后反播磁带自动得到梯度值。

这种利用tf.GradientTape求微分的方法叫做Tensorflow的自动微分机制。

## 一、利用梯度磁带求导数

In [1]:
import tensorflow as tf
import numpy as np 

# f(x) = a*x**2 + b*x + c的导数

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
a = tf.constant(1.0)
b = tf.constant(-2.0)
c = tf.constant(1.0)

with tf.GradientTape() as tape:
    y = a*tf.pow(x,2) + b*x + c
    
dy_dx = tape.gradient(y,x)
print(dy_dx) # 这里默认x= 0

tf.Tensor(-2.0, shape=(), dtype=float32)


In [5]:
# 对常量张量也可以求导，需要增加watch

with tf.GradientTape() as tape:
    tape.watch([a,b,c])
    y = a*tf.pow(x,2) + b*x + c
    
dy_dx,dy_da,dy_db,dy_dc = tape.gradient(y,[x,a,b,c])
print(dy_da)
print(dy_db) #
print(dy_dc)
print(dy_dx)

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(1.0, shape=(), dtype=float32)
tf.Tensor(-2.0, shape=(), dtype=float32)


In [3]:
# 可以求二阶导数
with tf.GradientTape() as tape2:
    with tf.GradientTape() as tape1:   
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape1.gradient(y,x)   
dy2_dx2 = tape2.gradient(dy_dx,x)

print(dy2_dx2)

tf.Tensor(2.0, shape=(), dtype=float32)


In [4]:
# 可以在autograph中使用

@tf.function
def f(x):   
    a = tf.constant(1.0)
    b = tf.constant(-2.0)
    c = tf.constant(1.0)
    
    # 自变量转换成tf.float32
    x = tf.cast(x,tf.float32)
    with tf.GradientTape() as tape:
        tape.watch(x)
        y = a*tf.pow(x,2)+b*x+c
    dy_dx = tape.gradient(y,x) 
    
    return((dy_dx,y))

tf.print(f(tf.constant(0.0)))
tf.print(f(tf.constant(1.0)))

(-2, 1)
(0, 0)


## 二、利用梯度磁带和优化器求最小值

In [13]:
# 求f(x) = a*x**2 + b*x + c的最小值
# 使用optimizer.apply_gradients
x = tf.Variable(0.0,name = "x",dtype = tf.float32)
a = tf.constant(1.0)
b = tf.constant(-16.0)
c = tf.constant(1.0)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)
for i in range(100):
    with tf.GradientTape() as tape:
        y = a*tf.pow(x,2) + b*x + c
    dy_dx = tape.gradient(y,x)
    print(i,"-"*10,dy_dx.numpy(),"\t",y.numpy(),"\t",x.numpy())
    optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])
    
tf.print("y =",y,"; x =",x)

0 ---------- -16.0 	 1.0 	 0.0
1 ---------- -12.8 	 -22.04 	 1.6
2 ---------- -10.24 	 -36.785603 	 2.88
3 ---------- -8.191999 	 -46.222786 	 3.9040003
4 ---------- -6.5535994 	 -52.262585 	 4.7232003
5 ---------- -5.24288 	 -56.12805 	 5.37856
6 ---------- -4.1943035 	 -58.601955 	 5.9028482
7 ---------- -3.355443 	 -60.18525 	 6.3222785
8 ---------- -2.6843548 	 -61.19856 	 6.6578226
9 ---------- -2.1474838 	 -61.847076 	 6.926258
10 ---------- -1.7179871 	 -62.26213 	 7.1410065
11 ---------- -1.3743896 	 -62.527763 	 7.312805
12 ---------- -1.0995121 	 -62.69777 	 7.450244
13 ---------- -0.87961006 	 -62.806572 	 7.560195
14 ---------- -0.70368767 	 -62.876205 	 7.648156
15 ---------- -0.56295013 	 -62.920773 	 7.718525
16 ---------- -0.4503603 	 -62.949295 	 7.77482
17 ---------- -0.36028862 	 -62.96755 	 7.8198557
18 ---------- -0.2882309 	 -62.97923 	 7.8558846
19 ---------- -0.2305851 	 -62.986706 	 7.8847075
20 ---------- -0.18446827 	 -62.991493 	 7.907766
21 ---------- -0.14

In [14]:
# 求f(x) = a*x**2 + b*x + c的最小值
# 使用optimizer.minimize
# optimizer.minimize相当于先用tape求gradient,再apply_gradient

x = tf.Variable(0.0,name = "x",dtype = tf.float32)

#注意f()无参数
def f():   
    a = tf.constant(1.0)
    b = tf.constant(-16.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   
for _ in range(1000):
    optimizer.minimize(f,[x])   
    
tf.print("y =",f(),"; x =",x)

y = -63 ; x = 7.99998856


In [15]:
# 在autograph中完成最小值求解
# 使用optimizer.apply_gradients

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

@tf.function
def minimizef():
    a = tf.constant(1.0)
    b = tf.constant(-16.0)
    c = tf.constant(1.0)
    
    for _ in tf.range(1000): #注意autograph时使用tf.range(1000)而不是range(1000)
        with tf.GradientTape() as tape:
            y = a*tf.pow(x,2) + b*x + c
        dy_dx = tape.gradient(y,x)
        optimizer.apply_gradients(grads_and_vars=[(dy_dx,x)])
        
    y = a*tf.pow(x,2) + b*x + c
    return y

tf.print(minimizef())
tf.print(x)

-63
7.99998856


In [16]:
# 在autograph中完成最小值求解
# 使用optimizer.minimize

x = tf.Variable(0.0,name = "x",dtype = tf.float32)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)   

@tf.function
def f():   
    a = tf.constant(1.0)
    b = tf.constant(-16.0)
    c = tf.constant(1.0)
    y = a*tf.pow(x,2)+b*x+c
    return(y)

@tf.function
def train(epoch):  
    for _ in tf.range(epoch):  
        optimizer.minimize(f,[x])
    return(f())


tf.print(train(1000))
tf.print(x)


-63
7.99998856
