In [18]:
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [19]:
from typing import * # type: ignore

In [20]:
np.random.seed(1337)
random.seed(1337)

In [21]:
from micrograd.engine import Value
from micrograd.nn import Neuron, Layer, MLP

In [22]:
n = MLP(3, [1, 1])
m = MLP(3, [2, 1])
n, m

(MLP of [Layer of [ReLUNeuron(3)], Layer of [LinearNeuron(1)]],
 MLP of [Layer of [ReLUNeuron(3), ReLUNeuron(3)], Layer of [LinearNeuron(2)]])

In [23]:
class _: # MLP
    from typing import List
    def __init__(self, nin: int, nouts: List[int]):
        sz = [nin] + nouts
        self.layers = [
            Layer(sz[i], sz[i + 1], nonlin=i != len(nouts) - 1)
            for i in range(len(nouts))
        ]
# MLP CONSTRUCTOR
# this is straightforward right, we make layers of
# cin, cout combos but we only make #cout of them
# each layers takes nin starting with nin, but then
# all other layers take the nouts[i] of the previous

# LINEAR OUTPUT
# the last layers is linear so we can predict any number not just [0,int)

# RELU LAYERS
# ReLU (Rectified Linear Unit) introduces non-linearity into neural networks
# due to its non-linear nature. The ReLU function is defined as:
#
# f(x) = max(0, x)
#
# without relus (or any non linear activation) nn's would be linear funtions

#  By definition, the ReLU is 𝑚𝑎𝑥(0,𝑥). Therefore, if we split the domain from
# (−∞,0] or [0,∞), then the function is linear. However, it's easy to see
# that 𝑓(−1)+𝑓(1)≠𝑓(0). Hence, by definition, ReLU is not linear. 
# https://datascience.stackexchange.com/a/26481


In [24]:
def internals(n: MLP) -> None:
  for layer in n.layers:
    print(layer, '---')
    for neuron in layer.neurons:
      print(neuron, '*')
      for value in neuron.parameters(): # .w and [.b]
        print(value, '.')

In [25]:
internals(n)
# MLP Diagram:
#
#   Input (3 features)
#       ↓
# Layer 1 (ReLUNeuron) # w: [0.2, 0.1, 0.3], b: 0.0
#       ↓
# Layer 2 (LinearNeuron) # w: [0.2], b: 0.0
#       ↓
#  Output (1 output)

Layer of [ReLUNeuron(3)] ---
ReLUNeuron(3) *
Value(data=0.23550571390294128, grad=0) .
Value(data=0.06653114721000164, grad=0) .
Value(data=-0.26830328150124894, grad=0) .
Value(data=0, grad=0) .
Layer of [LinearNeuron(1)] ---
LinearNeuron(1) *
Value(data=0.1715747078045431, grad=0) .
Value(data=0, grad=0) .


In [26]:
internals(m)
# here's why this makes sense
# you take three inputs, every neuron needs 3 weights plus a bias
# but the number outputs of the first layer is two
# so you need two neurons in the first layer
# the linear layer gives two inputs and thus has two weights

Layer of [ReLUNeuron(3), ReLUNeuron(3)] ---
ReLUNeuron(3) *
Value(data=-0.6686254326224383, grad=0) .
Value(data=0.6487474938152629, grad=0) .
Value(data=-0.23259038277158273, grad=0) .
Value(data=0, grad=0) .
ReLUNeuron(3) *
Value(data=0.5792256498313748, grad=0) .
Value(data=0.8434530197925192, grad=0) .
Value(data=-0.3847332240409951, grad=0) .
Value(data=0, grad=0) .
Layer of [LinearNeuron(2)] ---
LinearNeuron(2) *
Value(data=0.9844941451716409, grad=0) .
Value(data=-0.5901079958448365, grad=0) .
Value(data=0, grad=0) .


In [27]:
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0]

In [29]:
nn = MLP(3, [4, 4, 1]) # i think this ignores our random seed sadly
for _ in range(50):
  ypred = [nn(x) for x in xs]
  loss = sum((a - b)**2 for a, b in zip(ypred, ys))
  for p in nn.parameters():
      p.grad = 0.0
  loss.backward()
  for p in nn.parameters():
      p.data += -0.05 * p.grad
  print(_+1, loss.data, [f"{x.data:1.2f}" for x in ypred])


1 7.256586208060731 ['1.71', '0.14', '1.19', '0.19']
2 4.663089955756583 ['-0.55', '-0.48', '-0.58', '-0.35']
3 3.604799237460308 ['-0.06', '-0.28', '-0.20', '-0.15']
4 3.3813552469202715 ['0.02', '-0.26', '-0.14', '-0.07']
5 3.1877095502118094 ['0.05', '-0.29', '-0.14', '-0.02']
6 3.058174697261023 ['-0.01', '-0.33', '-0.19', '0.03']
7 2.589567291131887 ['0.17', '-0.47', '-0.14', '0.06']
8 2.1974764529417348 ['0.26', '-0.58', '-0.16', '0.12']
9 2.262004527664124 ['-0.02', '-0.75', '-0.34', '0.15']
10 1.545379542101279 ['0.45', '-0.83', '-0.18', '0.27']
11 1.1205726782305505 ['0.61', '-0.90', '-0.29', '0.33']
12 0.7617626100877495 ['0.63', '-0.86', '-0.53', '0.38']
13 0.43591145058177 ['0.89', '-0.94', '-0.58', '0.50']
14 0.24574837373975458 ['0.86', '-0.95', '-0.88', '0.54']
15 0.18104420395966012 ['1.19', '-0.95', '-0.83', '0.66']
16 0.14074749247945073 ['1.07', '-1.04', '-1.02', '0.63']
17 0.09826102960666201 ['1.05', '-0.93', '-0.99', '0.70']
18 0.069977062653079 ['1.01', '-1.01', 