In [None]:
import math
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

**New Arithmetic Operations**

In [None]:
from __future__ import annotations
import typing
from micrograd.util.graph import topological_sort


class Value:
    def __init__(
        self,
        data: float | int,
        _children: tuple[Value, ...] = (),
        _op="",
        label="",
    ):
        # the data maintained by this object
        self.data = data
        # the gradient of the output of the graph w.r.t this node
        self.grad = 0.0
        # a human-readable label for this node
        self.label = label

        # the function for computing the local gradient
        self._backward = lambda: None
        # the ancestors of this node in the graph
        self._prev = set(_children)
        # the operation used to compute this node
        self._op = _op

    def __repr__(self) -> str:
        return f"Value(data={self.data})"

    def __add__(self, other: float | int | Value) -> Value:
        # wrap other in a Value if not already
        other = other if isinstance(other, Value) else Value(other)

        out = Value(self.data + other.data, (self, other), "+")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward

        return out

    def __radd__(self, other: float | int | Value) -> Value:
        return self + other

    def __mul__(self, other: float | int | Value) -> Value:
        other = other if isinstance(other, Value) else Value(other)

        out = Value(self.data * other.data, (self, other), "*")

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward

        return out

    def __rmul__(self, other: float | int | Value) -> Value:
        return self * other

    def __neg__(self) -> Value:
        return self * -1

    def __sub__(self, other: float | int | Value) -> Value:
        return self + (-other)

    def __pow__(self, other: float | int) -> Value:
        assert isinstance(other, (int, float)), "broken precondition"

        out = Value(self.data**other, (self,), f"**{other}")

        def _backward():
            self.grad += other * (self.data ** (other - 1.0)) * out.grad

        out._backward = _backward

        return out

    def __truediv__(self, other: float | int | Value) -> Value:
        return self * other**-1

    def backward(self) -> None:
        self.grad = 1.0
        for node in reversed(topological_sort(self)):
            node = typing.cast(Value, node)
            node._backward()

In [None]:
a = Value(2.0, label="a")
b = Value(4.0, label="b")
c = a / b
c

**Nonlinearities**

In [None]:
import math


def tanh(x) -> float:
    return (math.exp(x) - math.exp(-x)) / (math.exp(x) + math.exp(-x))


tanh_vec = np.vectorize(tanh)

In [None]:
import numpy as np

x = np.linspace(-5, 5)
y = tanh_vec(x)

fig, ax = plt.subplots()
ax.plot(x, y)
ax.axhline(y=0, linestyle="--", color="black")
ax.axvline(x=0, linestyle="--", color="black")

In [None]:
def relu(x: float) -> float:
    return 0.0 if x <= 0.0 else x


relu_vec = np.vectorize(relu)

In [None]:
x = np.linspace(-5, 5)
y = relu_vec(x)

fig, ax = plt.subplots()
ax.plot(x, y)
ax.axhline(y=0, linestyle="--", color="black")
ax.axvline(x=0, linestyle="--", color="black")

In [None]:
from __future__ import annotations
import typing
from micrograd.util.graph import topological_sort


class Value:
    def __init__(
        self,
        data: float | int,
        _children: tuple[Value, ...] = (),
        _op="",
        label="",
    ):
        # the data maintained by this object
        self.data = data
        # the gradient of the output of the graph w.r.t this node
        self.grad = 0.0
        # a human-readable label for this node
        self.label = label

        # the function for computing the local gradient
        self._backward = lambda: None
        # the ancestors of this node in the graph
        self._prev = set(_children)
        # the operation used to compute this node
        self._op = _op

    def __repr__(self) -> str:
        return f"Value(data={self.data})"

    def __add__(self, other: float | int | Value) -> Value:
        # wrap other in a Value if not already
        other = other if isinstance(other, Value) else Value(other)

        out = Value(self.data + other.data, (self, other), "+")

        def _backward():
            self.grad += 1.0 * out.grad
            other.grad += 1.0 * out.grad

        out._backward = _backward

        return out

    def __radd__(self, other: float | int | Value) -> Value:
        return self + other

    def __mul__(self, other: float | int | Value) -> Value:
        other = other if isinstance(other, Value) else Value(other)

        out = Value(self.data * other.data, (self, other), "*")

        def _backward():
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad

        out._backward = _backward

        return out

    def __rmul__(self, other: float | int | Value) -> Value:
        return self * other

    def __neg__(self) -> Value:
        return self * -1

    def __sub__(self, other: float | int | Value) -> Value:
        return self + (-other)

    def __pow__(self, other: float | int) -> Value:
        assert isinstance(other, (int, float)), "broken precondition"

        out = Value(self.data**other, (self,), f"**{other}")

        def _backward():
            self.grad += other * (self.data ** (other - 1.0)) * out.grad

        out._backward = _backward

        return out

    def __truediv__(self, other: float | int | Value) -> Value:
        return self * other**-1

    def tanh(self) -> Value:
        x = self.data
        t = (math.exp(x) - math.exp(-x)) / (math.exp(x) + math.exp(-x))
        out = Value(t, (self,), "tanh")

        def _backward():
            self.grad += (1 - t**2) * out.grad

        out._backward = _backward
        return out

    def relu(self) -> Value:
        x = self.data
        t = 0.0 if x <= 0.0 else x
        out = Value(t, (self,), "relu")

        def _backward():
            self.grad += (0.0 if x < 0.0 else 1.0) * out.grad

        out._backward = _backward

        return out

    def backward(self) -> None:
        self.grad = 1.0
        for node in reversed(topological_sort(self)):
            node = typing.cast(Value, node)
            node._backward()

In [None]:
a = Value(2.0, label="a")
b = a.relu()

b.backward()
a.grad

**A Neural Network Library**

In [None]:
class Module:
    def __init__(self) -> None:
        pass

    def parameters(self) -> list[Value]:
        return []

    def zero_grad(self) -> None:
        for p in self.parameters():
            p.grad = 0

In [None]:
import random


class Neuron(Module):
    def __init__(self, nin: int, activation: str = "relu") -> None:
        self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1, 1))

        match activation:
            case "relu":
                self.activation = lambda v: v.relu()
                self.activation_type = "ReLU"
            case "tanh":
                self.activation = lambda v: v.tanh()
                self.activation_type = "Tanh"
            case "none":
                self.activation = lambda v: v
                self.activation_type = "Linear"
            case _:
                raise ValueError(f"unknown activation '{activation}'")

    def __call__(self, x: list[float] | list[int] | list[Value]) -> Value:
        assert len(x) == len(self.w), "broken precondition"

        # input may be wrapped in Value already, or cast here
        _x = typing.cast(
            list[Value], x if isinstance(x[0], Value) else [Value(v) for v in x]  # type: ignore
        )
        # w * x + b
        weighted_sum = sum((wi * xi for wi, xi in zip(self.w, _x)), self.b)
        # apply the activation
        return self.activation(weighted_sum)

    def parameters(self) -> list[Value]:
        return self.w + [self.b]

    def __repr__(self):
        return f"{self.activation_type}Neuron({len(self.w)})"

In [None]:
n = Neuron(4)
y = n([1.0, 2.0, 3.0, 4.0])

y.backward()
print([p.grad for p in n.parameters()])

In [None]:
class Layer(Module):
    def __init__(self, nin: int, nout: int, **kwargs) -> None:
        self.neurons = [Neuron(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x: list[float] | list[Value]) -> list[Value] | Value:
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs) == 1 else outs

    def parameters(self) -> list[Value]:
        return [p for neuron in self.neurons for p in neuron.parameters()]

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"

In [None]:
l = Layer(4, 1)
y = l([1.0, 2.0, 3.0, 4.0])
assert isinstance(y, Value)

y.backward()
print([p.grad for p in l.parameters()])

In [None]:
class MLP(Module):
    def __init__(self, nin: int, nouts: list[int]) -> None:
        sz = [nin] + nouts
        self.layers = [
            Layer(
                sz[i],
                sz[i + 1],
                activation="none" if i == len(nouts) - 1 else "relu",
            )
            for i in range(len(nouts))
        ]

    def __call__(self, x: list[float] | list[Value]) -> Value:
        _x = x if isinstance(x[0], Value) else [Value(v) for v in x]
        _x = typing.cast(list[Value], _x)

        for layer in self.layers:
            r = layer(_x)
            _x = [r] if isinstance(r, Value) else r

        assert len(_x) == 1, "broken invariant"
        return _x[0]

    def parameters(self) -> list[Value]:
        return [p for layer in self.layers for p in layer.parameters()]

    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

In [None]:
m = MLP(2, [16, 16, 1])
print(len(m.parameters()))

# forward pass
y = m([1.0, 2.0])

# backward pass
y.backward()