In [None]:
# | hide
import nbdev
from nbdev.showdoc import *

nbdev.nbdev_export()

# Tidy Utils


In [None]:
# | default_exp utils

In [None]:
# | export
from tidygrad.tensor import Tensor
import numpy as np

In [None]:
# | export


def grad_check(nn, x, y, params: tuple, eps=1e-7):
    for p in reversed(params):
        # Reshape to 1D so it's easier to sample random indices

        data_view = p.data.reshape(-1)
        grad_view = p.grad.reshape(-1)

        slow_grad_view = np.zeros_like(p.grad).reshape(-1)

        indices = np.random.choice(np.arange(len(grad_view)), size=1000)
        for idx in indices:
            loss = nn(x, y, params)

            old_val = data_view[idx]
            data_view[idx] = old_val + eps

            loss_plus_h = nn(x, y, params)

            slow_grad_view[idx] = (loss_plus_h.data - loss.data) / eps
            data_view[idx] = old_val
        max_grad_diff = np.max(
            np.abs(slow_grad_view[indices] - grad_view[indices])
           )
        
        if max_grad_diff > 1e-4:
            raise ValueError(
                f"Gradient check failed for {p.name}: Max error: {max_grad_diff}"
            )

In [None]:
from tidygrad.tensor import Tensor

x = Tensor(np.random.randn(32, 28 * 28), "X")
# Create a 1-hot encoded tensor with 1 random 1
y = np.zeros((32, 10))
y[np.arange(32), np.random.choice(10, 32)] = 1
y = Tensor(y, "y")

w1 = Tensor(np.random.randn(28 * 28, 100), "w1")
b1 = Tensor(np.random.randn(100), "b1")
w2 = Tensor(np.random.randn(100, 10), "w2")


def NN(x, y, params: tuple, debug=list()):
    w1, b1, w2 = params
    z1 = x.mmul(w1, "tmp").add(b1, "z1")
    a1 = z1.sigmoid()

    z2 = a1.mmul(w2)

    diff = z2.sub(y, "diff")
    l = diff.mul(diff, "l")
    loss = l.sum("loss")

    debug.append((z1, a1, z2, diff, l, loss))

    return loss


debug = []
loss = NN(x, y, params=(w1, b1, w2), debug=debug)

loss.backward()

grad_check(NN, x, y, (w1, b1, w2))