In [2]:
# dl-notes | wad

In [None]:
# -- micrograd from scratch impl.

In [59]:
# setup
import math
import random
from abc import ABC, abstractmethod

In [60]:
# node: holds scalar & gradient
class Node:
    def __init__(self, data, _prev=(), _op=''):
        self.data = data
        self.grad = 0.0
        self._prev = set(_prev)
        self._op = _op
        self._backward = lambda:None

    # addition
    def __add__(self, other):
        other = other if isinstance(other, Node) else Node(other)
        out = Node(self.data + other.data, (self, other), '+')

        def _backward():
            # d p += d child
            self.grad += out.grad
            other.grad += out.grad
        out._backward = _backward # assign func. sign.

        return out

    # multiplication
    def __mul__(self, other):
        other = other if isinstance(other, Node) else Node(other)
        out = Node(self.data * other.data, (self, other), '*')

        def _backward():
            # d p1 += d child * d p2
            self.grad += other.data * out.grad
            other.grad += self.data * out.grad
        out._backward = _backward

        return out

    # power
    def __pow__(self, other):
        assert isinstance(other, (int, float)), "Only supporting int, float powers"
        out = Node(self.data ** other.data, (self,), f'**{other}')

        def _backward():
            # d p += d child * (pw * x**(pw-1))
            self.grad += out.grad * (other * self.data**(other-1))
        out._backward = _backward

        return out

    # activation: ReLU
    def relu(self):
        out = Node(0 if self.data < 0 else self.data, (self,), 'ReLU')

        def _backward():
            # d x = 0(if out = 0), out.grad(if out != 0)
            self.grad += (out.data>0) * out.grad
        out._backward = _backward

        return out

    # main backprop
    def backward(self):
        topo = [] # topology (ordered)
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v) # add parent node after its children
        build_topo(self)

        self.grad = 1 # init. last parent.grad to 1
        for v in reversed(topo): # start from the end
            v._backward()

In [61]:
# nn module: module/neurone/layer/mlp

In [62]:
# base class
class Module(ABC):
    def zero_grad(self): # reset gradient to zero
        for p in self.params():
            p.grad = 0

    @abstractmethod
    def params(self):
        pass

In [63]:
# neurone
class Neurone(Module):
    def __init__(self, nin, nonlin=True):
        self.w = [Node(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Node(0)
        self.nonlin = nonlin

    def __call__(self, x):
        self.z = sum((wi*xi for wi,xi in zip(self.w, x)), self.b) # {w1x1 + wnxn + b}
        return z.relu() if self.nonlin else z

    def params(self):
        return self.w + [self.b]

    def __repr__(self):
        return f"{'ReLU' if self.nonlin else 'Linear'}Neurone({len(self.w)})" # LinearNeurone(n)

In [64]:
# layer
class Layer(Module):
    def __init__(self, nin, nout, **kwargs):
        self.neurones = [Neurone(nin, **kwargs) for _ in range(nout)]

    def __call__(self, x):
        out = [n(x) for n in self.neurones]
        return out[0] if len(out)==1 else out

    def params(self):
        return [p for n in self.neurones for p in n.params()] # single list of params

    def __repr__(self):
        return f"Layer of [{', '.join(str(n) for n in self.neurones)}]"

In [69]:
# mlp
class MLP(Module):
    def __init__(self, nin, nouts):
        ls = [nin] + nouts # all layers: nin(in-layer), nouts(outs of every layer)
        self.layers = [Layer(ls[i], ls[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))] # nonlin=True except last layer

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def params(self):
        return [p for l in self.layers for p in l.params()]

    def __repr__(self):
        sep = ",\n\t" # seperator
        return f"MLP of [\n\t{sep.join(str(l) for l in self.layers)}\n]"

In [71]:
mlp = MLP(2, [3,3,1])
print(mlp)

MLP of [
	Layer of [ReLUNeurone(2), ReLUNeurone(2), ReLUNeurone(2)],
	Layer of [ReLUNeurone(3), ReLUNeurone(3), ReLUNeurone(3)],
	Layer of [LinearNeurone(3)]
]
