In [1]:
import numpy as np

In [62]:
class Parameter:
    def __init__(self, value: float, name: str) -> None:
        self._value = value
        self._name = name

        self._grad = 0.0
        self._backward = lambda: None

    def __repr__(self) -> str:
        return f"Parameter {self._name} = {self._value}; dL/d[{self._name}] = {self._grad}"
    
    def backward(self):
        self._grad = 1
        self._backward()
        
    def zero_grad(self):
        self._grad = 0
        self._backward()

    def __add__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value + other._value,
            f'[{self._name} + {other._name}]'
        )

        def _backward():
            self._grad += 1.0 * result._grad  #dL / dself
            other._grad += 1.0 * result._grad # dL / dother

        result._backward = _backward

        return result
    
    def __mul__(self, other: 'Parameter') -> 'Parameter':
        result = Parameter(
            self._value * other._value,
            f'{self._name} * {other._name}'
        )

        def _backward():
            self._grad += other._value * result._grad #dL / dself
            other._grad += self._value * result._grad # dL / dother

        result._backward = _backward

        return result

    def sigmoid(self) -> 'Parameter':
        # f(x) = 1 / (1 + exp(self._value))
        # f'(x) = f(x) * (1 - f(x))

        val = 1.0 / (1.0 + np.exp(-self._value))

        result = Parameter(
            val,
            f"σ({self._name})"
        )

        def _backward():
            self._grad += result._grad * val * (1 - val)

        result._backward = _backward

        return result

    def relu(self) -> 'Parameter':
        result = Parameter(max(0, self._value), f'ReLU({self._name})')
        
        def _backward():
            self._grad += result._grad * 1 if self._value > 0 else 0 
            
        self._backward = _backward
        
        return result

    def softplus(self):
        result = Parameter(np.log1p(np.exp(self._value), f'Softplus({self._name})'))
        
        def _backward():
            self._grad += 1.0 / (1.0 + np.exp(-self._value))
            
        self._backward = _backward
        
        return result
    
        
def sgd(f, x: list[Parameter], params: list[Parameter]=None, lr=1e-2, steps=100):
    for s in range(steps):
        result = f(*x)
        result.backward()
        for p in params:
            print(p._grad)
            print(p)
            p._value -= lr * p._grad
            p._grad = 0

In [63]:
x = Parameter(6, 'x')
sgd(lambda x: (x+Parameter(1, '1'))*(x+Parameter(2, '2')), [x], [x], lr=1e-1, steps=10000)

0.0
Parameter x = 6; dL/d[x] = 0.0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parameter x = 6.0; dL/d[x] = 0
0
Parame

In [50]:
x

Parameter x = -244.0250000000001; dL/d[x] = 50000.0