In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import sklearn
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.3.1
sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)
matplotlib 3.3.2
numpy 1.18.4
pandas 1.1.4
sklearn 0.23.2
tensorflow 2.3.1
tensorflow.keras 2.4.0


In [3]:
def g(x1, x2):
    return (x1 + 5) * (x2 ** 2)

In [6]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)

# Tape 调用一次即被消解
with tf.GradientTape() as tape:
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z, x1)
except RuntimeError as ex:
    print(ex)

tf.Tensor(9.0, shape=(), dtype=float32)
GradientTape.gradient can only be called once on non-persistent tapes.


In [9]:
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)

# Tape 调用一次即被消解
with tf.GradientTape(persistent=True) as tape:
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z, x1)
except RuntimeError as ex:
    print(ex)
    
# 手动释放资源
del tape

tf.Tensor(9.0, shape=(), dtype=float32)


In [14]:
x1 = tf.constant(2.)
x2 = tf.constant(3.)

# Tape 调用一次即被消解
with tf.GradientTape(persistent=True) as tape:
    tape.watch(x1)
    tape.watch(x2)
    z = g(x1, x2)
    
dz_x1 = tape.gradient(z, x1)
print(dz_x1)

try:
    dz_x2 = tape.gradient(z, x2)
    print(dz_x2)
except RuntimeError as ex:
    print(ex)
    
# 手动释放资源
del tape

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(42.0, shape=(), dtype=float32)


In [15]:
x = tf.Variable(5.)
with tf.GradientTape() as tape:
    z1 = 3 * x
    z2 = x ** 2
tape.gradient([z1, z2], x)

<tf.Tensor: shape=(), dtype=float32, numpy=13.0>

In [16]:
# 高阶求导
x1 = tf.Variable(2.)
x2 = tf.Variable(3.)

with tf.GradientTape(persistent=True) as outer_tape:
    with tf.GradientTape(persistent=True) as inner_tape:
        z = g(x1, x2)
    inner_grads = inner_tape.gradient(z, [x1, x2])
outer_grads = [outer_tape.gradient(inner_grad[x1, x2])
               for inner_grad in inner_grads]
print(outer_grads)

# 手动释放资源
del inner_tape
del outer_tape

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got <tf.Variable 'Variable:0' shape=() dtype=float32, numpy=2.0>

In [17]:
learning_rate = 0.1
x = tf.Variable(0.0)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    x.assign_sub(learning_rate * dz_dx)
print(x)

NameError: name 'f' is not defined

In [None]:
learning_rate = 0.1
x = tf.Variable(0.0)

optimizer = keras.optimizers.SGD(lr = learning_rate)

for _ in range(100):
    with tf.GradientTape() as tape:
        z = f(x)
    dz_dx = tape.gradient(z, x)
    optimizer.apply_gradients([(dz_dx, x)])
print(x)