https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html

In [71]:
import torch

## x 0-d, y 0-d 

In [150]:
x = torch.tensor(1.,requires_grad=True)
print(f'x.shape: {x.shape}, x.grad: {x.grad}')
y = 2*x + x**2
print(f'y: {y}; y.shape: {y.shape}')

x.shape: torch.Size([]), x.grad: None
y: 3.0; y.shape: torch.Size([])


In [151]:
x = torch.tensor(1.,requires_grad=True)
y = 2*x + x**2
y.backward()
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {(2+2*x)}') 

x.grad: 4.0; type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([]), manual result: 4.0


In [152]:
x = torch.tensor(1.,requires_grad=True)
y = 2*x + x**2
gradient=torch.tensor(1.)
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {gradient*(2+2*x)}') 

x.grad: 4.0; type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([]), manual result: 4.0


In [153]:
x = torch.tensor(1.,requires_grad=True)
y = 2*x + x**2
gradient=torch.tensor(100.)
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {gradient*(2+2*x)}') 

x.grad: 400.0; type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([]), manual result: 400.0


## x.shape=[1], y.shape=[1]

In [154]:
x = torch.tensor([1.],requires_grad=True)
print(f'x.shape: {x.shape}')
y = 2*x + x**2
print(f'y: {y}; y.shape: {y.shape}')

x.shape: torch.Size([1])
y: tensor([3.], grad_fn=<AddBackward0>); y.shape: torch.Size([1])


In [155]:
x = torch.tensor([1.],requires_grad=True)
y = 2*x + x**2
y.backward()
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {(2+2*x)}') 

x.grad: tensor([4.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([1]), manual result: tensor([4.], grad_fn=<AddBackward0>)


In [156]:
x = torch.tensor([1.],requires_grad=True)
y = 2*x + x**2
gradient=torch.tensor([1.])
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {gradient*(2+2*x)}') 

x.grad: tensor([4.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([1]), manual result: tensor([4.], grad_fn=<MulBackward0>)


In [157]:
x = torch.tensor([1.],requires_grad=True)
y = 2*x + x**2
gradient=torch.tensor([100.])
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {gradient*(2+2*x)}') 

x.grad: tensor([400.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([1]), manual result: tensor([400.], grad_fn=<MulBackward0>)


## x.shape=[2], y 0d - Example 1

In [158]:
x = torch.tensor([1., 2.],requires_grad=True)
print(f'x: {x}, x.shape: {x.shape}, x.grad: {x.grad}')
y = 2*x[0] + x[1]**2
print(f'y.shape: {y.shape}')

x: tensor([1., 2.], requires_grad=True), x.shape: torch.Size([2]), x.grad: None
y.shape: torch.Size([])


In [159]:
x = torch.tensor([1., 2.],requires_grad=True)
y = 2*x[0] + x[1]**2
y.backward()
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {[2,2*x[1]]}') 

x.grad: tensor([2., 4.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2]), manual result: [2, tensor(4., grad_fn=<MulBackward0>)]


In [160]:
x = torch.tensor([1., 2.],requires_grad=True)
y = 2*x[0] + x[1]**2
gradient=torch.tensor(100.)
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {[2*gradient,2*x[1]*gradient]}') 

x.grad: tensor([200., 400.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2]), manual result: [tensor(200.), tensor(400., grad_fn=<MulBackward0>)]


## x.shape=[2], y 0d - Example 2

In [161]:
x = torch.tensor([1., 2.],requires_grad=True)
print(f'x: {x}, x.shape: {x.shape}, x.grad: {x.grad}')
y = x.sum()
print(f'y.shape: {y.shape}')

x: tensor([1., 2.], requires_grad=True), x.shape: torch.Size([2]), x.grad: None
y.shape: torch.Size([])


In [162]:
x = torch.tensor([1., 2.],requires_grad=True)
y = x.sum()
y.backward()
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {[1,1]}') 

x.grad: tensor([1., 1.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2]), manual result: [1, 1]


In [163]:
x = torch.tensor([1., 2.],requires_grad=True)
y = x.sum()
gradient=torch.tensor(100.)
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}, manual result: {[1*gradient,1*gradient]}') 

x.grad: tensor([100., 100.]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2]), manual result: [tensor(100.), tensor(100.)]


## x.shape=[2,1], y.shape=[1, 1]

In [218]:
a = torch.tensor([[1., 2.]],requires_grad=True)
print(f'a.shape: {a.shape}')
x = torch.tensor([[3.],[5]])
print(f'x.shape: {x.shape}')
y = torch.mm(a, x)
print(f'y: {y}; y.shape: {y.shape}')

a.shape: torch.Size([1, 2])
x.shape: torch.Size([2, 1])
y: tensor([[13.]], grad_fn=<MmBackward>); y.shape: torch.Size([1, 1])


In [219]:
a = torch.tensor([[1., 2.]])
x = torch.tensor([[3.],[5]],requires_grad=True)
y = torch.mm(a, x)
y.backward()
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}') 

x.grad: tensor([[1.],
        [2.]]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2, 1])


In [220]:
a = torch.tensor([[1., 2.]])
x = torch.tensor([[3.],[5]],requires_grad=True)
y = torch.mm(a, x)
gradient = torch.tensor([[100.]])
print(f'gradient.shape: {gradient.shape}')
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}') 

gradient.shape: torch.Size([1, 1])
x.grad: tensor([[100.],
        [200.]]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2, 1])


In [221]:
a = torch.tensor([[1., 2.]])
x = torch.tensor([[3.],[5]],requires_grad=True)
y = torch.mm(a, x)
gradient = torch.tensor([[100., 300]])
print(f'gradient.shape: {gradient.shape}')
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}') 

gradient.shape: torch.Size([1, 2])
x.grad: tensor([[400.],
        [800.]]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2, 1])


In [222]:
a = torch.tensor([[1., 2.]])
x = torch.tensor([[3.],[5]],requires_grad=True)
y = torch.mm(a, x)
gradient = torch.tensor([[100.], [300]])
print(f'gradient.shape: {gradient.shape}')
y.backward(gradient=gradient)
print(f'x.grad: {x.grad}; type(x.grad): {type(x.grad)}, x.grad.shape: {x.grad.shape}') 

gradient.shape: torch.Size([2, 1])
x.grad: tensor([[400.],
        [800.]]); type(x.grad): <class 'torch.Tensor'>, x.grad.shape: torch.Size([2, 1])


## x.shape=[2], y.shape=[2]

In [166]:
x = torch.tensor([1., 2.],requires_grad=True)
print(f'x: {x}, x.shape: {x.shape}, x.grad: {x.grad}')
y = 2*x
print(f'y.shape: {y.shape}')

x: tensor([1., 2.], requires_grad=True), x.shape: torch.Size([2]), x.grad: None
y.shape: torch.Size([2])


In [167]:
# x = torch.tensor([1., 2.],requires_grad=True)
# y = 2*x
# y.backward()
# RuntimeError: grad can be implicitly created only for scalar outputs

In [168]:
x = torch.tensor([1., 2.],requires_grad=True)
y = 3.14*x
y.backward(torch.tensor([1.,1.]))
print(f'x.grad:{x.grad}; type(x.grad):{type(x.grad)}, x.grad.shape:{x.grad.shape}') 

x.grad:tensor([3.1400, 3.1400]); type(x.grad):<class 'torch.Tensor'>, x.grad.shape:torch.Size([2])


In [169]:
x = torch.tensor([1., 2.],requires_grad=True)
y = 3.14*x
y.backward(torch.tensor([10.,100.]))
print(f'x.grad:{x.grad}; type(x.grad):{type(x.grad)}, x.grad.shape:{x.grad.shape}') 

x.grad:tensor([ 31.4000, 314.0000]); type(x.grad):<class 'torch.Tensor'>, x.grad.shape:torch.Size([2])
