# 自定义求导

In [27]:
import tensorflow as tf
from tensorflow import keras

In [3]:
#近似求导数derivative
def f(x):
    return 3. * x ** 2 + 2. * x - 1
def approximate_derivative(f, x, eps =1e-3):
    return(f(x+eps)- f(x-eps))/ (2. * eps)

print(approximate_derivative(f,1.))

7.999999999999119


In [5]:
def g(x1, x2):
    return(x1 + 5) * (x2 **2)

def approximate_qradient(g, x1, x2, eps = 1e-3):
    dg_x1 = approximate_derivative(lambda x: g(x, x2), x1, eps)#定义g对x1的偏导
    dg_x2 = approximate_derivative(lambda x: g(x1, x), x2, eps)#定义g对x2的偏导
    return dg_x1, dg_x2
print(approximate_qradient(g, 2., 3.))

(8.999999999993236, 41.999999999994486)


In [6]:
# GradientTape求偏导，求解梯度
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)#求z对x1的偏导
print(dz_x1)
try:
    dz_x1 = tape.gradient(z, x1)#GradientTape.gradient can only be called once on non-persistent tapes.
except RuntimeError as ex:
    print(ex)


    

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [9]:
# GradientTape求偏导，求解梯度
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent= True) as tape:
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)#求z对x1的偏导
print(dz_x1)
try:
    dz_x2 = tape.gradient(z, x2)
    print(dz_x2)
except RuntimeError as ex:
    print(ex)
    
del tape#用了persistent= True要手动释放资源

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [10]:
# GradientTape求偏导，求解梯度
x1 = tf.Variable(2.0)
x2 = tf.Variable(3.0)
with tf.GradientTape(persistent= True) as tape:
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1,x2])#求z对x1和x2的偏导
print(dz_x1x2)


[<tf.Tensor: id=177, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=183, shape=(), dtype=float32, numpy=42.0>]


In [13]:
# 对常量求偏导,这样不可以
x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1,x2])#求z对x1和x2的偏导
print(dz_x1x2)

[None, None]


In [14]:
# 对常量求偏导,这样不可以, 要用tape.watch()关注常量就可以了

x1 = tf.constant(2.0)
x2 = tf.constant(3.0)
with tf.GradientTape() as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)
    
dz_x1x2 = tape.gradient(z, [x1,x2])#求z对x1和x2的偏导
print(dz_x1x2)

[<tf.Tensor: id=221, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=227, shape=(), dtype=float32, numpy=42.0>]


In [19]:
x = tf.Variable(5.)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
dz_z1z2 = tape.gradient([z1, z2],x)
print(dz_z1z2)

tf.Tensor(13.0, shape=(), dtype=float32)


In [22]:
# 求二阶导数要使用嵌套
x1 = tf.Variable(2.0)
x2 =tf.Variable(3.0)
with tf.GradientTape(persistent=True) as out_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
out_grads = [out_tape.gradient(z, [x1, x2]) for inner_grad in inner_grads]
print(out_grads)

del out_tape
del inner_tape



[[<tf.Tensor: id=443, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=449, shape=(), dtype=float32, numpy=42.0>], [<tf.Tensor: id=451, shape=(), dtype=float32, numpy=9.0>, <tf.Tensor: id=457, shape=(), dtype=float32, numpy=42.0>]]


In [24]:
#模拟梯度下降
def f(x):
    return 3. * x ** 2 + 2. * x - 1

learn_rate = 0.1
x = tf.Variable(0.0)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learn_rate*dz_dx)  #梯度下降过程，x - learn_rate * 梯度
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>


In [30]:
#如何与optimizer结合使用
learn_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learn_rate)
for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)

<tf.Variable 'Variable:0' shape=() dtype=float32, numpy=-0.3333333>
