In [None]:
import torch

We want to calculate gradient of some function wrt x.

By default, **requires_grad=False**

In [None]:
x = torch.randn(size=(3, ), requires_grad=True)
x

tensor([-0.4415,  0.0681, -0.4358], requires_grad=True)

A computational graph will be created.

Since, + operation is used. Therefore **grad_fn=\<AddBackward0\>**

In [None]:
y = x+2
print(y)

tensor([1.5585, 2.0681, 1.5642], grad_fn=<AddBackward0>)


### Calculating gradients

grad can be implicitly created for scalar outputs.

If we want it to create for a vector, we have to create a vector of same size

In [None]:
z = y*y*2
print(z)

v = torch.zeros_like(z)
# v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v)
print(x.grad)

tensor([4.8577, 8.5541, 4.8936], grad_fn=<MulBackward0>)
tensor([0., 0., 0.])


In [None]:
# dz/dx
x = torch.randn(size=(3, ), requires_grad=True)
y = x+2
z = y*y*2
z_mean = z.mean()
print(z_mean)
z_mean.backward()
print(x.grad)

tensor(1.5317, grad_fn=<MeanBackward0>)
tensor([0.7139, 1.3067, 1.3665])


### Prevent tracking of gradients:

1. x.requires_grad(False)

2. x.detach()-> Creates a new tensor that doesn't require the gradient

3. with torch.no_grad():

In [None]:
x = torch.rand(size=(3, ), requires_grad=True)
print(x)

# Modifies x in-place
x.requires_grad_(False)
print(x)

tensor([0.9804, 0.7026, 0.8790], requires_grad=True)
tensor([0.9804, 0.7026, 0.8790])


In [None]:
x = torch.rand(size=(3, ), requires_grad=True)
print(x)

y = x.detach()
print(y)

tensor([4.0610e-01, 1.6381e-01, 2.5433e-04], requires_grad=True)
tensor([4.0610e-01, 1.6381e-01, 2.5433e-04])


In [None]:
x = torch.rand(size=(3, ), requires_grad=True)
print(x)

with torch.no_grad():
    y = x+2
    print(y)

tensor([0.9053, 0.7492, 0.9062], requires_grad=True)
tensor([2.9053, 2.7492, 2.9062])


**backward()** will keep accumulating gradients, i.e sums it up,  until explicilty mentioned not to

In [None]:
weights = torch.ones(size=(4, ), requires_grad=True)

# Dummy operation which simulates w*x+b
for epoch in range(1):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)

print("-"*50)
for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])
--------------------------------------------------
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


To pevent, empty the variable wrt which, grad is being calculated

In [None]:
weights = torch.ones(size=(4, ), requires_grad=True)

# Dummy operation which simulates w*x+b
for epoch in range(1):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()
print("-"*50)
for epoch in range(2):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    # Emptying the grads
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
--------------------------------------------------
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


### Using optimizers

In [None]:
weights = torch.rand(size=(3, 3), requires_grad=True)

optimizer = torch.optim.SGD(params=[weights], lr=0.01)
optimizer.step()
optimizer.zero_grad()