In [1]:
class Scalar():
    def __init__(self, data, children=(), op="", requires_grad=True):
        self.data = data
        self.grad = 0
        self.requires_grad = requires_grad
        self._backward = lambda: None
        self.children = set(children)
        self.op = op

    def __add__(self, other):
        other = other if isinstance(other, Scalar) else Scalar(other)
        out = Scalar(self.data + other.data, (self, other), "+")
        def _backward():
            if self.requires_grad:
                self.grad += float(out.grad)
            if other.requires_grad:
                other.grad += float(out.grad)
        out._backward = _backward
        return out

    def __mul__(self, other):
        other = other if isinstance(other, Scalar) else Scalar(other)
        out = Scalar(self.data * other.data, (self, other), "*")
        def _backward():
            if self.requires_grad:
                self.grad += other.data * out.grad
            if other.requires_grad:
                other.grad += self.data * out.grad
        out._backward = _backward
        return out

    def dot(self, other):
        other = other if isinstance(other, Scalar) else Scalar(other)
        out = Scalar(self.data.dot(other.data), (self, other), "dot")
        def _backward():
            if self.requires_grad:
                self.grad += out.grad.dot(other.data.transpose())
            if other.requires_grad:
                other.grad += self.data.transpose().dot(out.grad)
        out._backward = _backward
        return out

    def __pow__(self, other):
        out = Scalar(self.data ** other, (self,), f"**{other}")
        def _backward():
            if self.requires_grad:
                self.grad += other * (self.data ** (other - 1)) * out.grad
        out._backward = _backward
        return out

    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        return self + (-other)

    def __rsub__(self, other):
        return other + (-self)
    
    def __rmul__(self, other):
        return self * other

    def __neg__(self):
        return self * -1

    def __truediv__(self, other):
        return self * (other ** -1)

    def __rtruediv__(self, other):
        return other * (self ** -1)

    def backward(self):
        graph = [] # dynamic computation graph
        visited = set()
        def build_graph(v):
            if v not in visited:
                visited.add(v)
                for child in v.children:
                    build_graph(child)
                graph.append(v)

        build_graph(self)
        self.grad = 1
        for v in graph[::-1]:
            v._backward()

    def __repr__(self):
        return f"Scalar({self.data}, {self.grad})"

In [2]:
a = Scalar(1)
b = Scalar(2)
c = a * b
print(a, b, c)
c.backward()
print(a, b, c)

Scalar(1, 0) Scalar(2, 0) Scalar(2, 0)
Scalar(1, 2) Scalar(2, 1) Scalar(2, 1)


In [3]:
q = Scalar(1020, requires_grad=False)
w = Scalar(212)
e = Scalar(123123)
r = Scalar(546)
t = Scalar(42)
y = (q * w) - (e + (r / t))
print(q, w, e, r, t, y)
y.backward()
print(q, w, e, r, t, y)

Scalar(1020, 0) Scalar(212, 0) Scalar(123123, 0) Scalar(546, 0) Scalar(42, 0) Scalar(93104.0, 0)
Scalar(1020, 0) Scalar(212, 1020.0) Scalar(123123, -1.0) Scalar(546, -0.023809523809523808) Scalar(42, 0.30952380952380953) Scalar(93104.0, 1)
