In [None]:
import numpy as np
from tidygrad.tensor import Tensor
from tidygrad.utils.grad_check import grad_check

In [None]:
def run_test_binary_elementwise(func, shape1, shape2=None, pos_only=False):
    """Test a binary elementwise function, like add, mul, etc"""
    shape2 = shape1 if shape2 is None else shape2
    if pos_only:
        a = Tensor(
            np.abs(np.random.randn(*shape1)) + 1e-8, name="a", requires_grad=True
        )
        b = Tensor(
            np.abs(np.random.randn(*shape2)) + 1e-8, name="b", requires_grad=True
        )
    else:
        a = Tensor(np.random.randn(*shape1), name="a", requires_grad=True)
        b = Tensor(np.random.randn(*shape2), name="b", requires_grad=True)

    t = func(inputs=None, params=(a, b))
    t.backward()
    grad_check(func=func, inputs=None, params=(a, b))

In [None]:
def run_test_unary_elementwise(func, shape, pos_only=False, offset=1e-3):
    """Test a unary elementwise function, like exp, log, etc"""
    if pos_only:
        # Mostly for log(a) - it's positive only and is instable too close to zero.
        a = Tensor(np.abs(np.random.randn(*shape)) + offset, name="a", requires_grad=True)
    else:
        a = Tensor(np.random.randn(*shape), name="a", requires_grad=True)

    t = func(inputs=None, params=(a,))
    t.backward()
    grad_check(func=func, inputs=None, params=(a,))

### Binary elementwise ops


In [None]:
def add_func(inputs, params: tuple = ()):
    a, b = params
    loss = a.add(b, "t").sum("loss")
    return loss


run_test_binary_elementwise(add_func, (100, 100))

Max fractional gradient difference for b: 0.0000%
Max fractional gradient difference for a: 0.0000%


In [None]:
def sub_func(inputs, params: tuple = ()):
    a, b = params
    loss = a.sub(b, "t").sum("loss")
    return loss


run_test_binary_elementwise(sub_func, (100, 100))

Max fractional gradient difference for b: 0.0000%
Max fractional gradient difference for a: 0.0000%


In [None]:
def mul_func(inputs, params: tuple = ()):
    a, b = params
    loss = a.mul(b, "t").sum("loss")
    return loss


run_test_binary_elementwise(mul_func, (100, 100))

Max fractional gradient difference for b: 0.0048%
Max fractional gradient difference for a: 0.0000%


In [None]:
def pow_func(inputs, params: tuple = ()):
    a = params[0]
    loss = a.pow(2, "t").sum("loss")
    return loss


def run_test_pow(shape):
    a = Tensor(np.random.randn(*shape), name="a", requires_grad=True)
    a.data = np.where(np.abs(a.data) < 1e-5, 1e-5, a.data) 

    t = pow_func(inputs=None, params=(a,))
    
    t.backward()

    grad_check(func=pow_func, inputs=None, params=(a,))

# XXX pow is unstable for values close to zero
# run_test_pow((100, 100))

### Unary elementwise functions


In [None]:
def log_func(inputs, params: tuple = ()):
    (a,) = params

    loss = a.log("t").sum("loss")
    return loss


run_test_unary_elementwise(log_func, (100, 100), pos_only=True)

Max fractional gradient difference for a: 0.3642%


In [None]:
def exp_func(inputs, params: tuple = ()):
    (a,) = params

    loss = a.exp("t").sum("loss")
    return loss


run_test_unary_elementwise(exp_func, (100, 100))

Max fractional gradient difference for a: 0.0013%


In [None]:
from tidygrad.functional import relu, sigmoid, tanh, softmax, gelu, new_gelu

In [None]:
def sigmoid_func(inputs, params: tuple = ()):
    (a,) = params
    t = sigmoid(a)
    return t.sum("loss")


run_test_unary_elementwise(sigmoid_func, (100, 100))

Max fractional gradient difference for a: 0.0007%


In [None]:
def tanh_func(inputs, params: tuple = ()):
    (a,) = params
    t = tanh(a)
    return t.sum("loss")

run_test_unary_elementwise(tanh_func, (100, 100))

Max fractional gradient difference for a: 0.0010%


In [None]:
def relu_func(inputs, params: tuple = ()):
    (a,) = params
    t = relu(a, "t")
    return t.sum("loss")

run_test_unary_elementwise(relu_func, (100, 100))

Max fractional gradient difference for a: 0.0000%


In [None]:
def gelu_func(inputs, params: tuple = ()):
    (a,) = params
    t = gelu(a)
    return t.sum("loss")

# XXX Stability issues
# run_test_unary_elementwise(gelu_func, (100, 100))

In [None]:
def new_gelu_func(inputs, params: tuple = ()):
    (a,) = params
    t = new_gelu(a)
    return t.sum("loss")
# XXX Stability issues
# XXX It's also slow!
# run_test_unary_elementwise(new_gelu_func, (100, 100))

Max fractional gradient difference for a: 0.2677%


In [None]:
def softmax_func(inputs, params: tuple = ()):
    (a,) = params
    n_batch, n_classes = a.shape
    y = np.zeros(a.shape)
    np.random.seed(42)
    y[np.arange(n_batch), np.random.randint(0, n_classes, n_batch)] = 1
    y = Tensor(y, name="y")
    sm = softmax(a, "t")

    cross_entropy = y * sm.log() + (1 - y) * (1 - sm).log()
    #
    return cross_entropy.sum("loss")


run_test_unary_elementwise(softmax_func, (1, 5))

Max fractional gradient difference for a: 0.0005%


In [None]:
def matmul_func(inputs, params: tuple[Tensor] = ()):
    a, b = params
    t = a.mmul(b, "t")
    return t.sum("loss")


def run_test_matmul(shape1, shape2):
    a = Tensor(np.random.randn(*shape1), name="a", requires_grad=True)
    b = Tensor(np.random.randn(*shape2), name="b", requires_grad=True)
    t = matmul_func(inputs=None, params=(a, b))
    t.backward()

    grad_check(func=matmul_func, inputs=None, params=(a, b))


run_test_matmul((10, 100), (100, 50))

Max fractional gradient difference for b: 0.0000%
Max fractional gradient difference for a: 0.0000%


### Broadcasting


In [None]:
run_test_binary_elementwise(add_func, (2, 10, 1), (10, 100))

Max fractional gradient difference for b: 0.0000%
Max fractional gradient difference for a: 0.0000%


In [None]:
run_test_matmul((2, 10, 100), (100, 10))

Max fractional gradient difference for b: 0.0001%
Max fractional gradient difference for a: 0.0000%


### Test loss functions


In [None]:
# def lt_func(inputs, params: tuple = ()):
#     a, b = params
#     loss = (a < b).sum("loss")
#     return loss

# run_test_binary_elementwise(lt_func, (100, 100), (100, 100))

# a = Tensor(np.random.randn(100, 100), name="a")
# b = Tensor(np.random.randn(100, 100), name="b")

# t = lt_func(inputs=None, params=(a, b))
# t.backward()

In [None]:
from tidygrad.functional import BCE_loss

In [None]:
def bceloss_func(inputs, params: tuple = ()):
    y = inputs[0]
    x = params[0]

    loss = BCE_loss(x, y).sum("loss")
    return loss


x = Tensor(np.random.randn(100), name="x", requires_grad=True)
y = Tensor(np.random.randn(100), name="y", requires_grad=True)

t = bceloss_func(inputs=(y,), params=(x,))
t.backward()

grad_check(func=bceloss_func, inputs=(y,), params=(x,))

Max fractional gradient difference for x: 0.0028%


### Test Dropout


In [None]:
from tidygrad.functional import dropout

In [None]:
def dropout_func(inputs, params: tuple = ()):
    p = params[0]

    np.random.seed(1337)
    t = dropout(p, 0.3, training=True)
    return t.sum("loss")


p = Tensor(np.random.randn(100), name="p", requires_grad=True)

t = dropout_func(inputs=None, params=(p,))
t.backward()

grad_check(func=dropout_func, inputs=None, params=(p,))

Max fractional gradient difference for p: 0.0000%


In [None]:
from tidygrad.functional import embedding

### Test Embedding


In [None]:
def embedding_func(inputs, params: tuple = ()):
    idxs = inputs[0]
    w = params[0]
    t = embedding(w, idxs, "t")
    return t.sum("loss")


idxs = [1, 2, 3, 4, 5, 6, 7, 8, 9]
w = Tensor(np.random.randn(10, 100), name="w", requires_grad=True)

t = embedding_func(inputs=(idxs,), params=(w,))
t.backward()

grad_check(func=embedding_func, inputs=(idxs,), params=(w,))

Max fractional gradient difference for w: 0.0000%


### Test sum and mean and std


In [None]:
def sum_test(inputs, params: tuple = ()):
    a = params[0]
    t = a.sum("t")
    return t.sum("loss")


run_test_unary_elementwise(sum_test, (100, 100))

Max fractional gradient difference for a: 0.0000%


In [None]:
def mean_test(inputs, params: tuple = ()):
    a = params[0]
    t = a.mean("t")
    return t.sum("loss")


run_test_unary_elementwise(mean_test, (100, 100))

Max fractional gradient difference for a: 0.0000%


In [None]:
def std_test(inputs, params: tuple = ()):
    a = params[0]
    t = a.std("t")
    return t.sum("loss")

run_test_unary_elementwise(std_test, (100, 100))

Max fractional gradient difference for a: 0.0049%


In [None]:
a = Tensor(np.random.randn(100, 100), name="a", requires_grad=True)

a ** 3

Tensor[100, 100](name="pow(a,3)" op=Pow parents=[a]):
    v=array[100, 100] n=10000 (78Kb) x∈[-41.412, 47.474] μ=0.066 σ=3.739
    ∇=array[100, 100] f32 n=10000 (39Kb) [38;2;127;127;127mall_zeros[0m