In [None]:
import tensorflow as tf
import numpy as np

Let's make some variables

In [None]:
my_tensor = tf.constant([[1.0, 2.0], [3.0, 4.0]])
my_variable = tf.Variable(my_tensor)

# Variables can be all kinds of types, just like tensors
bool_variable = tf.Variable([False, False, False, True])
complex_variable = tf.Variable([5 + 4j, 6 + 1j])


Change variables

In [None]:
a = tf.Variable([2.0, 3.0])
# This will keep the same dtype, float32
a.assign([1, 2])
# Not allowed as it resizes the variable:
try:
 a.assign([1.0, 2.0, 3.0])
except Exception as e:
 print(f"{type(e).__name__}: {e}")


Delete a variable using Python lifespans.

In [None]:
w = tf.Variable([1.0, 2.0, 3.0])
b = tf.Variable(2.0)
x = tf.constant([2.0, 3.0, 4.0])

y = w * x + b # <tf.Tensor: [ 4.,  8., 14.], dtype=float32)>

# Explicitly remove the Python object
del(w)


Calculus

In [None]:
x = tf.Variable([3.0, 3.0])

# y = x ** 2
y = x**2 # output [9.0, 9.0]

print(y)

In [None]:
x = tf.Variable([3.0, 3.0])

with tf.GradientTape() as tape:
  y = x**2 # output [9.0, 9.0]

# dy = 2x * dx
dy_dx = tape.gradient(y, x)
print(dy_dx.numpy())  # output [6.0, 6.0]

Gradient tapes for a small model.

In [None]:
layer = tf.keras.layers.Dense(2, activation='relu')
x = tf.constant([[1., 2., 3.]])

with tf.GradientTape() as tape:
 # Forward pass
 y = layer(x)
 loss = tf.reduce_mean(y**2)

# Calculate gradients with respect to every trainable variable
grad = tape.gradient(loss, layer.trainable_variables)
for var, g in zip(layer.trainable_variables, grad):
 print(f'{var.name}, shape: {g.shape}')

Gradients from intermediate values.

In [None]:
x = tf.constant(3.0)

with tf.GradientTape() as tape:
 tape.watch(x)
 r = x * x
 z = r * r

# Use the tape to compute the gradient of z with respect to the
# intermediate value r.
# dz_dx = 2 * r, where r = x ** 2
print(tape.gradient(z, r).numpy()) # 18

In [None]:
x = tf.constant(1.0)
v0 = tf.Variable(2.0)
v1 = tf.Variable(2.0)

with tf.GradientTape(persistent=True) as tape:
 tape.watch(x)
 if x > 0.0:
   result = v0
 else:
   result = v1**2

dv0, dv1 = tape.gradient(result, [v0, v1]) # 1.0, None
print(dv0, dv1)

Lots more details, like:
* Watching
* Stopping gradient computation
* Gradients of non-scalar targets
* Jacobians
* 2nd order gradients


## Exercise: y = mx + b

Try writing a function, y = mx + b.

Send some float tensors into it.



In [None]:
def yeqmxb(x, m, b):
  return (m * x) + b


the_x = tf.constant(2.0)
the_m = tf.constant(2.0)
the_b = tf.constant(4.0)

def yeqmxb(x, m, b):
  return m * x + b

y = yeqmxb(the_x, the_m, the_b)

print(y)


Can you figure out the gradient?

## Solution

In [None]:
with tf.GradientTape() as tape:
  tape.watch(the_x)
  y = yeqmxb(the_x, the_m, the_b)

tape.gradient(y, [the_x])

Now, find the gradient of y with respect to x!

In [None]:
with tf.GradientTape() as tape:
  tape.watch(the_x)
  y = yeqmxb(the_x, the_b, the_b)

tape.gradient(y, the_x)

Bonus: Why does this return `None` as a derivative?

In [None]:
another_x = tf.constant(40.)
another_m = tf.constant(30.)
another_b = tf.constant(20.)

with tf.GradientTape() as tape:
  tape.watch(another_x)
  y = yeqmxb(another_x, another_m, another_b)

print(tape.gradient(y, [another_x]))

In [None]:
# Render batch