<a href="https://colab.research.google.com/github/yeesem/Deep_Learning/blob/main/Gradient_Tape_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import tensorflow as tf

In [6]:
# Define a 2x2 array of 1's
x = tf.ones((2,2))

with tf.GradientTape() as t:
  # Record the actions performed on tensor x with 'watch'
  t.watch(x)

  # Define y as the sum of the element x
  # y = 4x
  y = tf.reduce_sum(x)

  # Let z be the square of y
  # z = (4x)^2
  z = tf.square(y)
  print(z)

# Get the derivative of z with the original input tensor x
# dz_dx = 8x
dz_dx = t.gradient(z,x)

print(dz_dx)

tf.Tensor(16.0, shape=(), dtype=float32)
tf.Tensor(
[[8. 8.]
 [8. 8.]], shape=(2, 2), dtype=float32)


In [7]:
x = tf.constant(3.0)

with tf.GradientTape() as t:
  t.watch(x)

  y = x * x

  z = y * y

# Compute dz/dx. 4 * x^3 at x = 3 --> 108.0
dz_dx = t.gradient(z,x)
print(dz_dx)

tf.Tensor(108.0, shape=(), dtype=float32)


In [8]:
# If you try to compute dy/dx after the gradient tape has expired:
# Obtain error use GradientTape only can use once, need to set persistence = True
try:
    dy_dx = t.gradient(y, x)  # 6.0
    print(dy_dx)
except RuntimeError as e:
    print("The error message you get is:")
    print(e)

The error message you get is:
A non-persistent GradientTape can only be used to compute one set of gradients (or jacobians)


In [10]:
# Make the gradient Tape persistent
x = tf.constant(3.0)

# Set the persistence = True, so can reuse the tape
with tf.GradientTape(persistent = True) as t:
  t.watch(x)

  y = x*x
  z = y*y

# Compute dz/dx 4 * x^3 at x = 3 --> 108.0
dz_dx = t.gradient(z,x)
print(dz_dx)

# You can still compute dy/dx because of the persistent flag.
dy_dx = t.gradient(y, x)  # 6.0
print(dy_dx)

tf.Tensor(108.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [11]:
# Drop the reference to the tape
del t

In [12]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape2:
  with tf.GradientTape() as tape1:
    y = x * x * x

  # dy_dx = 3x^2
  dy_dx = tape1.gradient(y,x)

# dy2_dx2 = 6x
dy2_dx2 = tape2.gradient(dy_dx,x)

print(dy_dx)
print(dy2_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [13]:
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x

# The first gradient call is outside the outer with block
# so the tape will expire after this
dy_dx = tape_1.gradient(y, x)

# The tape is now expired and the gradient output will be `None`
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
None


In [15]:
x = tf.Variable(1.0)

# Setting persistent = True still won't work
with tf.GradientTape(persistent = True) as tape_2:
  # Setting persistent = True still won't work
  with tf.GradientTape(persistent = True) as tape_1:
    y = x * x * x

# The first gradient call is outside the outer with block
# so the tape will expire after this
dy_dx = tape_1.gradient(y, x)

# the output will be `None`
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
None


In [17]:
# Proper Indentation for the second gradient calculation
x = tf.Variable(x)

with tf.GradientTape() as tape_2:
  with tf.GradientTape() as tape_1:
    y = x * x * x

    dy_dx = tape_1.gradient(y,x)

    # This is acceptable
    d2y_dx2 = tape_2.gradient(dy_dx,x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [18]:
# This is acceptable
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x

        dy_dx = tape_1.gradient(y, x)

    # this is also acceptable
    d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)


In [19]:
# This is acceptable
x = tf.Variable(1.0)

with tf.GradientTape() as tape_2:
    with tf.GradientTape() as tape_1:
        y = x * x * x

        dy_dx = tape_1.gradient(y, x)

# this is also acceptable
d2y_dx2 = tape_2.gradient(dy_dx, x)

print(dy_dx)
print(d2y_dx2)

tf.Tensor(3.0, shape=(), dtype=float32)
tf.Tensor(6.0, shape=(), dtype=float32)
