In [5]:
# Automatic differentiation pipeline
# 1. record data which needs computation of gradients
# 2. framework will create computational graph according to variaties, for example
#    x = tensor(...), y = operation(x), now we have nodes recording the x, y and their function.
# 3. backward for computation of gradients. graph will backward traverse every 
#    results and parameters in grapgh and accordingly compute gradients on every correalted node

import torch

# Scalar to Vector
# ----------------
# prepare x for function y
print('Scalar to Vector differentiation:')
x = torch.arange(4.0)
print(x)

# tell api to start record
x.requires_grad_(True)
print(x.grad)  # now it is None, beacuse there is no function at all

# make function
y = 2 * torch.dot(x, x)
print(y)

# backward for gradients
y.backward()
print(x.grad)  # now when we need to compute partial differentiation of x from y, we get results

# clean up gradients if we done gradients or pytorch API will keep recording every variaties about x
x.grad.zero_()

# Vector to Vector
# ---------------
print('\nVector to Vector differentiation:')
y = x * x  # this operation will return a vector to y, whose function are Vector to Vector type

# sum() BEFORE backward() !!
y.sum().backward()
print(x.grad)

Scalar to Vector differentiation:
tensor([0., 1., 2., 3.])
None
tensor(28., grad_fn=<MulBackward0>)
tensor([ 0.,  4.,  8., 12.])

Vector to Vector differentiation:
tensor([0., 2., 4., 6.])


In [8]:
# detach
# ------
# if we want to compute a intermediate variable like y in z(y), y(x), x as a constant, 
# we could use .detach() function to extract result of y(x) as a constant u, so that when we compute
# D(z(x)) we will compute z(u * x) without computing y(x) reuslt for z(y(x))

x.grad.zero_()
y = x * x        # we know that y = [0, 1, 4, 9]
u = y.detach()   # we use this vector results as a constant reducing computing of BP
z = u * x

z.sum().backward()
print(z)         # originally get z from z(x) = x * x * x, but now we get z from z(x) = u[0, 1, 4, 9].dot(x)
print(x.grad)
print(u)  # we could see that u are consider as a constant not 
          # the original y function so it exerts nothing to gradients

tensor([ 0.,  1.,  8., 27.], grad_fn=<MulBackward0>)
tensor([0., 1., 4., 9.])
tensor([0., 1., 4., 9.])
