Gradient checking with internal testing

- https://pytorch.org/docs/stable/notes/extending.html
- https://pytorch.org/docs/stable/generated/torch.autograd.gradcheck.html?highlight=gradcheck#torch.autograd.gradcheck

In [4]:
import torch

from torch.testing._internal.common_utils import gradcheck

In [7]:
from torch.autograd import Function

# Inherit from Function
class LinearFunction(Function):

    # Note that both forward and backward are @staticmethods
    @staticmethod
    # bias is an optional argument
    def forward(ctx, input, weight, bias=None):
        ctx.save_for_backward(input, weight, bias)
        output = input.mm(weight.t())
        if bias is not None:
            output += bias.unsqueeze(0).expand_as(output)
        return output

    # This function has only a single output, so it gets only one gradient
    @staticmethod
    def backward(ctx, grad_output):
        # This is a pattern that is very convenient - at the top of backward
        # unpack saved_tensors and initialize all gradients w.r.t. inputs to
        # None. Thanks to the fact that additional trailing Nones are
        # ignored, the return statement is simple even when the function has
        # optional inputs.
        input, weight, bias = ctx.saved_tensors
        grad_input = grad_weight = grad_bias = None

        # These needs_input_grad checks are optional and there only to
        # improve efficiency. If you want to make your code simpler, you can
        # skip them. Returning gradients for inputs that don't require it is
        # not an error.
        if ctx.needs_input_grad[0]:
            grad_input = grad_output.mm(weight)
        if ctx.needs_input_grad[1]:
            grad_weight = grad_output.t().mm(input)
        if bias is not None and ctx.needs_input_grad[2]:
            grad_bias = grad_output.sum(0)

        return grad_input, grad_weight, grad_bias

In [9]:
linear = LinearFunction.apply

In [11]:
# gradcheck takes a tuple of tensors as input, check if your gradient
# evaluated with these tensors are close enough to numerical
# approximations and returns True if they all verify this condition.
input = (
    torch.randn(20, 20, dtype=torch.double, requires_grad=True), 
    torch.randn(30, 20, dtype=torch.double, requires_grad=True),
)
test = gradcheck(linear, input, eps=1e-6, atol=1e-4)
print(test)

True


Compute grad on 1d data

In [31]:
import torch
from torch.nn.functional import interpolate


t = torch.arange(16, dtype=torch.float, requires_grad=True)
t.grad

In [32]:
out = interpolate(t[None, None, :], size=(5, ), mode="linear")
out

tensor([[[ 1.1000,  4.3000,  7.5000, 10.7000, 13.9000]]],
       grad_fn=<UpsampleLinear1DBackward1>)

In [33]:
out.sum().backward()

In [34]:
t.grad

tensor([0.0000, 0.9000, 0.1000, 0.0000, 0.7000, 0.3000, 0.0000, 0.5000, 0.5000,
        0.0000, 0.3000, 0.7000, 0.0000, 0.1000, 0.9000, 0.0000])

In [41]:
scale = 16 / 5
ri0 = scale * (0.0 + 0.5) - 0.5;
ri1 = i0 + 1.0
i0 = int(ri0)
i1 = int(ri1)
print(ri0, ri1)
print(i0, i1)

w1 = ri0 - i0
w0 = 1.0 - w1
print(w0, w1)

out_p0 = w0 * t[i0] + w1 * t[i1]
out_p0

1.1 2.0
1 2
0.8999999999999999 0.10000000000000009


tensor(1.1000, grad_fn=<AddBackward0>)