[Median Source](https://medium.com/analytics-vidhya/tf-gradienttape-explained-for-keras-users-cc3f06276f22).

In [None]:
When x is a constant we need to use watch

# Usage

## when variable is a constant

In [22]:
import tensorflow as tf

x = tf.constant(5.0)
with tf.GradientTape() as tape:
    # need to watch
    tape.watch(x)
    y = x**3

# typically the gd is an eager tensor and you can call numpy() to see the value
gd = tape.gradient(y, x)

In [23]:
gd

<tf.Tensor: shape=(), dtype=float32, numpy=75.0>

## when variable is trainable instead of constant

In [26]:
import tensorflow as tf

x = tf.Variable(6.0, trainable=True)
with tf.GradientTape() as tape:
    # no need to watch
    y = x**3

# typically the gd is an eager tensor and you can call numpy() to see the value
gd = tape.gradient(y, x)

In [30]:
gd

<tf.Tensor: shape=(), dtype=float32, numpy=108.0>

# Higher Order deravative

In [28]:
x = tf.Variable(3.0, trainable=True)
with tf.GradientTape() as tape1:
    with tf.GradientTape() as tape2:
        y = x ** 3
    order_1 = tape2.gradient(y, x)
order_2 = tape1.gradient(order_1, x)

print(order_2.numpy()) # -> 18.0
print(order_1.numpy())

18.0
27.0


# Persistent

In [37]:

a = tf.Variable(6.0, trainable=True)
b = tf.Variable(2.0, trainable=True)
# without persistent = True, calling tape.gradient will through exceptions when you call it the second time
with tf.GradientTape(persistent=True) as tape:
    y1 = a ** 2
    y2 = b ** 3
                                                                                                                                                                                                                                                                                                                                                
print(tape.gradient(y1, a).numpy())
print(tape.gradient(y2, b).numpy())

12.0
12.0


# Real example on Linear Regression

## init

In [49]:
import numpy as np
import random

# Loss function
def loss(real_y, pred_y):
    return tf.abs(real_y - pred_y)

# Training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([i*10+5 for i in x_train]) # y = 10x+5

# Trainable variables
a = tf.Variable(random.random(), trainable=True)
b = tf.Variable(random.random(), trainable=True)

## step function

In [50]:
def step(real_x, real_y):
    with tf.GradientTape(persistent=True) as tape:
        # Make prediction
        pred_y = a * real_x + b
        # Calculate loss
        reg_loss = loss(real_y, pred_y)
    
    # Calculate gradients
    a_gradients, b_gradients = tape.gradient(reg_loss, (a, b))

    # Update variables
    a.assign_sub(a_gradients * 0.01)
    b.assign_sub(b_gradients * 0.001)

In [51]:
for _ in range(2000):
    step(x_train, y_train)

print(f'y ≈ {a.numpy()}x + {b.numpy()}')

y ≈ 9.973946571350098x + 4.98994779586792


# Polynomial Regression

In [76]:
# Loss function
def loss(real_y, pred_y):
    return tf.abs(real_y - pred_y)

# Training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([6*i**2 + 8*i + 2 for i in x_train]) # y = 6x^2 + 8x + 2

# Trainable variables
a = tf.Variable(random.random(), trainable=True)
b = tf.Variable(random.random(), trainable=True)
c = tf.Variable(random.random(), trainable=True)
lr = 0.001

# Step function
def step(x_real, y_real):
    with tf.GradientTape(persistent=True) as tape:
        y_pred = a * x_real**2 + b * x_real + c
        closs = loss(y_real, y_pred)
    ga,gb,gc = tape.gradient(closs, (a,b,c))
    a.assign_sub(ga*lr)
    b.assign_sub(gb*lr)
    c.assign_sub(gc*lr)


# Training loop
for _ in range(3000):
    step(x_train, y_train)

print(f'y ≈ {a.numpy()}x^2 + {b.numpy()}x + {c.numpy()}')

y ≈ 6.427342414855957x^2 + 7.563826560974121x + 2.010007619857788


# Real example in nn

## init

In [82]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import RandomNormal 

# Load and pre-process training data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train / 255).reshape((-1, 28, 28, 1))
y_train = tf.keras.utils.to_categorical(y_train, 10)
x_test = (x_test / 255).reshape((-1, 28, 28, 1))
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Hyperparameters
batch_size = 128
epochs = 50
optimizer = Adam(lr=0.001)
weight_init = RandomNormal()



## build model

In [84]:
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D
from tensorflow.keras.models import Sequential

# Build model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer=weight_init, input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer=weight_init))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer=weight_init))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', kernel_initializer=weight_init))



## define step function

In [87]:
def step(real_x, real_y):
    with tf.GradientTape() as tape:
        # Make prediction
        pred_y = model(real_x.reshape((-1, 28, 28, 1)))
        # Calculate loss
        model_loss = tf.keras.losses.categorical_crossentropy(real_y, pred_y)
    
    # Calculate gradients
    model_gradients = tape.gradient(model_loss, model.trainable_variables)
    # Update model
    optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))

## define training loop

In [90]:
import math
# Training loop
bat_per_epoch = math.floor(len(x_train) / batch_size)
epochs = 3
for epoch in range(epochs):
    print('=', end='')
    for i in range(bat_per_epoch):
        n = i*batch_size
        step(x_train[n:n+batch_size], y_train[n:n+batch_size])

# Calculate accuracy
model.compile(optimizer=optimizer, loss=tf.keras.losses.categorical_crossentropy, metrics=['acc']) # Compile just for evaluation
print('\n', model.evaluate(x_test, y_test, verbose=0)[1])

===
 0.9860000014305115


# What's next

[advanced style transfer](https://www.tensorflow.org/tutorials/generative/style_transfer)

[adversarial attacks](https://medium.com/analytics-vidhya/implementing-adversarial-attacks-and-defenses-in-keras-tensorflow-2-0-cab6120c5715)

[transform the world with cycleGAN](https://medium.com/analytics-vidhya/transforming-the-world-into-paintings-with-cyclegan-6748c0b85632)