In [1]:
import torch

### Simple gradient

In [2]:
a = torch.tensor([2, 3], dtype=torch.float, requires_grad=True)
b = a + 3
c = b * b * 3
out = c.mean()
out.backward()

In [3]:
print('*' * 10)
print('=====simple gradient======')
print('input')
print(a)
print('compute result is')
print(out)
print('input gradients are')
print(a.grad)

**********
input
tensor([ 2.,  3.])
compute result is
tensor(91.5000)
input gradients are
tensor([ 15.,  18.])


### Backward on non-scalar output

In [4]:
m = torch.tensor([[3, 4]], dtype=torch.float, requires_grad=True)
n = torch.zeros(1, 2)
n[0, 0] = m[0, 0] ** 2
n[0, 1] = m[0, 1] ** 3
n.backward(torch.FloatTensor([[1, 1]]))
print('*' * 10)
print('=====non scalar output======')
print('input')
print(m)
print('input gradients are')
print(m.grad)

**********
input
tensor([[ 3.,  4.]])
input gradients are
tensor([[  6.,  48.]])


In [5]:
# a wrong example of computing gradient
m = torch.tensor([[2, 3]], dtype=torch.float, requires_grad=True)
j = torch.zeros(2, 2)
k = torch.zeros(1, 2)
k[0, 0] = m[0, 0] ** 2 + 3 * m[0, 1]
k[0, 1] = m[0, 1] ** 3 + 2 * m[0, 0]
k.backward(torch.FloatTensor([[1, 1]])) 
print(m.grad)

tensor([[  6.,  30.]])


### Jacobian

In [6]:
j = torch.zeros(2, 2)
k = torch.zeros(1, 2)
m.grad.zero_()
k[0, 0] = m[0, 0] ** 2 + 3 * m[0, 1]
k[0, 1] = m[0, 1] ** 3 + 2 * m[0, 0]
k.backward(torch.FloatTensor([[1, 0]]), retain_graph=True)
j[0, :] = m.grad
m.grad.zero_()
k.backward(torch.FloatTensor([[0, 1]]))
j[1, :] = m.grad
print('Jacobian matrix is')
print(j)

tensor([[ 4.,  3.]])
Jacobian matrix is
tensor([[  4.,   2.],
        [  3.,  27.]])


### Compute jacobian matrix

In [14]:
x = torch.tensor([[2, 1]], dtype=torch.float, requires_grad=True)
y = torch.tensor([[1, 2], [3, 4]], dtype=torch.float)

In [15]:
z = torch.mm(x, y) # mm means matrix multiplication
jacobian = torch.zeros((2, 2))
z.backward(torch.tensor([[1, 0]], dtype=torch.float), retain_graph=True) # dz1/dx1, dz2/dx1
jacobian[:, 0] = x.grad
x.grad.zero_()
z.backward(torch.tensor([[0, 1]], dtype=torch.float)) # dz1/dx2, dz2/dx2
jacobian[:, 1] = x.grad
print('=========Jacobian=========')
print('x')
print(x)
print('y')
print(y)
print('compute result')
print(z)
print('jacobian matrix is')
print(jacobian)

x
tensor([[ 2.,  1.]])
y
tensor([[ 1.,  2.],
        [ 3.,  4.]])
compute result
tensor([[ 5.,  8.]])
jacobian matrix is
tensor([[ 1.,  2.],
        [ 3.,  4.]])
