In [1]:
import numpy as np

class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    # Convert data to a NumPy array
    if isinstance(data, np.ndarray):
      self.data = data
    else:
      self.data = np.array(data)
      
    # Initialize the gradient as an array of zeros with the same shape as the data
    self.grad = np.zeros_like(self.data)
    
    # Placeholder for the backward function
    self._backward = lambda: None
    
    # Set of previous nodes (parents)
    self._prev = set(_children)
    
    # Operation associated with the current node
    self._op = _op
    
    # Optional label for the node
    self.label = label

  
  def __add__(self, other):
    # Convert other to a Value if it is not already
    other = other if isinstance(other, Value) else Value(other)
    
    # Create a new Value instance representing the addition operation
    out = Value(self.data + other.data, (self, other), '+')

    def _backward():
      # Calculate the gradients using the chain rule and update the gradients of the operands
      self.grad = self.grad + np.multiply(1.0, out.grad)
      other.grad = other.grad + np.multiply(1.0, out.grad)

    # Assign the backward function to the new Value instance
    out._backward = _backward
    return out


  def __mul__(self, other):
    # Convert other to a Value if it is not already
    other = other if isinstance(other, Value) else Value(other)
    
    # Create a new Value instance representing the multiplication operation
    out = Value(self.data * other.data, (self, other), '*')

    def _backward():
      # Calculate the gradients using the chain rule and update the gradients of the operands
      self.grad = self.grad + np.multiply(other.data, out.grad)
      other.grad = other.grad + np.multiply(self.data, out.grad)

    # Assign the backward function to the new Value instance
    out._backward = _backward
    return out


  def __pow__(self, other):
    if isinstance(other, (int, float)):
      # If the exponent is a scalar, perform element-wise power operation
      out_data = np.power(self.data, other)
      out = Value(out_data, (self,), f'**{other}')

      def _backward():
        # Calculate the gradients using the chain rule and update the gradients of the operand
        self.grad = self.grad + np.multiply(np.multiply(other, np.power(self.data, other - 1)), out.grad)

      out._backward = _backward
      return out
    elif isinstance(other, Value):
      # If the exponent is a Value instance, perform element-wise power operation
      out_data = np.power(self.data, other.data)
      out = Value(out_data, (self, other), f'**')

      def _backward():
        # Calculate the gradients using the chain rule and update the gradients of the operands
        self.grad = self.grad + np.multiply(np.multiply(other.data, np.power(self.data, other.data - 1)), out.grad)
        other.grad = other.grad + np.multiply(np.log(self.data), out.grad)

      out._backward = _backward
      return out
    else:
      raise TypeError("Unsupported operand type(s) for **: 'Value' and '{}'".format(type(other).__name__))

  def __radd__(self, other):
    # Perform right addition by the Value instance
    return np.add(self, other)

  def __rmul__(self, other):
    # Perform right multiplication by the Value instance
    return np.multiply(self, other)


  def __truediv__(self, other):
    # Perform true division by the Value instance
    return np.multiply(self, other**-1)


  def __neg__(self):
    # Perform negation of the Value instance
    return np.multiply(self, -1)


  def __sub__(self, other):
    # Perform subtraction of a Value instance
    return np.add(self, (-other))


  def exp(self):
    # Compute the element-wise exponential of the Value instance
    x = self.data
    out = Value(np.exp(x), (self, ), 'exp')

    def _backward():
        # Calculate the gradients using the chain rule
        self.grad = self.grad + np.multiply(out.data, out.grad)

    # Assign the backward function to the new Value instance
    out._backward = _backward
    return out


  def tanh(self):
    # Compute the element-wise hyperbolic tangent of the Value instance
    x = self.data
    t = np.tanh(x)
    out = Value(t, (self, ), 'tanh')

    def _backward():
        # Calculate the gradients using the chain rule
        self.grad = self.grad + np.multiply((1 - t**2), out.grad)

    # Assign the backward function to the new Value instance
    out._backward = _backward
    return out


  def backward(self):
    # Perform backpropagation to compute gradients
    topo = []
    visited = set()

    def build_topo(v):
        if v not in visited:
            visited.add(v)
            for child in v._prev:
                build_topo(child)
            topo.append(v)

    # Build the topological order of nodes
    build_topo(self)

    # Set the gradient of the output to ones (assuming scalar loss)
    self.grad = np.ones_like(self.data)

    # Perform backward pass through the nodes in reverse topological order
    for node in reversed(topo):
        node._backward()


  def __repr__(self):
    # String representation of the Value instance
    return f"Value(data={self.data})"

In [2]:
import random

class Neuron:

  def __init__(self, nin):
    # Initialize the neuron with random weights and bias between -1 and 1
    self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))
  
  def __call__(self, x):
    # Calculate the weighted sum of inputs multiplied by weights and add the bias
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    # Apply the hyperbolic tangent function to the activation
    out = act.tanh()
    # Return the output of the neuron
    return out
  
  def parameters(self):
    # Return the weights and bias of the neuron as parameters
    return self.w + [self.b]
  
class Layer:

  def __init__(self, nin, nout):
    # Create a layer with a specified number of input and output neurons
    self.neurons = [Neuron(nin) for _ in range(nout)]
  
  def __call__(self, x):
    # Compute the output of each neuron in the layer given an input
    outs = [n(x) for n in self.neurons]
    # If there is only one output neuron, return it directly; otherwise, return a list of outputs
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    # Return the parameters of all neurons in the layer
    return [p for neuron in self.neurons for p in neuron.parameters()]
  
class MLP:

  def __init__(self, nin, nouts):
    # Create a multi-layer perceptron (MLP) with the specified number of input and output neurons for each layer
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

  def __call__(self, x):
    # Forward propagate the input through all layers of the MLP
    for layer in self.layers:
      x = layer(x)
    # Return the final output of the MLP
    return x

  def parameters(self):
    # Return the parameters of all layers in the MLP
    return [p for layer in self.layers for p in layer.parameters()]

In [3]:
# Set the input values
x = [2.0, 3.0, -1.0]

# Create an MLP object with 3 input neurons, 3 hidden layers (each with 4 neurons), and 1 output neuron
n = MLP(3, [4, 4, 1])

# Call the MLP object with the input values
# This triggers the __call__ method of the MLP class
n(x)

Value(data=-0.04539273245213295)

In [4]:
# Set the input values for the training examples
xs = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0, 1.0],
    [1.0, 1.0, -1.0],
]

# Set the desired target values for the training examples
ys = [1.0, -1.0, -1.0, 1.0]

# Training data:
# xs[0] corresponds to ys[0]
# xs[1] corresponds to ys[1]
# xs[2] corresponds to ys[2]
# xs[3] corresponds to ys[3]

In [5]:
for k in range(20):
  
    # Forward pass
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt) ** 2 for ygt, yout in zip(ys, ypred))

    # Backward pass
    for p in n.parameters():
        p.grad = 0.0
    
    # Compute gradients using the backward method
    loss.backward()

    # Update weights
    for p in n.parameters():
        p.data += -0.5 * p.grad

    print(k, loss.data)

0 6.153565696651676
1 1.7064585525057832
2 3.825257302112228
3 0.006969793533564184
4 0.003602679610900941
5 0.002871364024622368
6 0.00247892186413578
7 0.002224436470499429
8 0.00204228133097694
9 0.0019035138719979985
10 0.0017931104222341872
11 0.0017024056001941146
12 0.001626013950690499
13 0.0015603955852783792
14 0.0015031195695307475
15 0.0014524565771716698
16 0.001407140067626
17 0.0013662194600172694
18 0.0013289662394512785
19 0.001294811882827344


In [6]:
# ypred = [n(x) for x in xs]
ypred

[Value(data=0.9828565573037482),
 Value(data=-0.9955379560235583),
 Value(data=-0.9736010658055839),
 Value(data=0.9831447132240693)]