In [1]:
# for any comments that refer to "the video"
# here's the link - https://youtu.be/VMj-3S1tku0

In [2]:
import numpy as np

In [3]:
def topo_sort_v1(a):
    done = []
    def visit(val):
        if val.children:
            for i in val.children:
                visit(i)
        if val not in done:
            done.append(val)
        return done[::-1]
    return visit(a)

In [4]:
def topo_sort_v2(a):
    done = []
    def visit(val):
        if val not in done:
            if val.children:
                for i in val.children:
                    visit(i)
            done.append(val)
        return done[::-1]
    return visit(a)

In [5]:
class Value:
    def __init__(self, value, label='', children=(), op='', name=''):
        self.value = value
        self.label = label
        self.grad = 0.0
        self.children = set(children)
        self.op = op
        self._backward = lambda verbose=None: None
        self._forward = lambda verbose=None: None
        self.name = name
        self.isconstant = False
        # print(f"{f'"{self.name}" = ' if self.name else f'{self.label} = '}{self.value:.4f}")

    def __repr__(self):
        return f"{f'"{self.name}" = ' if self.name else ''}{f'{self.label} = ' if not self.isconstant else ''}{round(self.value, 4)}, grad = {round(self.grad, 8)}"

    
    # UNARY OPERATORS
    def __pos__(self):
        return self

    def __neg__(self):
        return (-1) * self

    
    # ADDITION
    def __add__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
            
        res = Value(self.value + other.value, label=f'[{self.label} + {other.label}]', children=(self, other), op='+')
        def _backward(verbose=True):
            self.grad += res.grad
            other.grad += res.grad
            if verbose == 2:
                if not self.children and not self.isconstant:
                    print(self)
                if not other.children and not other.isconstant:
                    print(other)
            elif verbose:
                print(self)
                print(other)
            
        def _forward():
            res.value = self.value + other.value

        res._forward = _forward
        res._backward = _backward
        return res

    def __radd__(self, other):
        return self + other

    
    # SUBTRACTION
    def __sub__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True

        res = self + -other
        res.label = f'[{self.label} - {other.label}]'
        res.op = '-'
        return res

    def __rsub__(self, other):
        other = Value(other, f'{other}', name=f'const {other}')
        other.isconstant = True

        res = -self + other
        res.label = f'[{other.value} - {self.label}]'
        return res

    
    # MULTIPLICATION
    def __mul__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
            
        res = Value(self.value * other.value, label=f'({self.label} * {other.label})', children=(self, other), op='*')
        def _backward(verbose=True):
            self.grad += other.value * res.grad
            other.grad += self.value * res.grad
            if verbose == 2:
                if not self.children and not self.isconstant:
                    print(self)
                if not other.children and not other.isconstant:
                    print(other)
            elif verbose:
                print(self)
                print(other)

        def _forward():
            res.value = self.value * other.value
            
        res._forward = _forward
        res._backward = _backward
        return res

    def __rmul__(self, other):
        return self * other
        

    # EXPONENTIAL OPERATOR
    def __pow__(self, other):  # for x**y and x**2
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
        if not (not other.isconstant and self.value < 0):  # bans expressions like -2**x, but allows 2**x, x**-3, x**3, 2**3 and 2**-3
            res = Value(self.value ** other.value, label=f'{self.label} ^ {other.label}', children=(self, other), op='^')
            def _backward(verbose=True):
                self.grad += self.value ** (other.value - 1) * (other.value) * res.grad
                if self.value > 0:
                    other.grad += np.log(self.value) * res.value * res.grad
                if verbose == 2:
                    if not self.children and not self.isconstant:
                        print(self)
                    if not other.children and not other.isconstant:
                        print(other)
                elif verbose:
                    print(self)
                    print(other)
    
            def _forward():
                res.value = self.value ** other.value
    
            res._forward = _forward
            res._backward = _backward
            return res
        print('why did you do that. you killed the program. you should be ashamed of yourself.')
        print(f'{self.label} = {self.value} < 0 and {other.label} is not a constant!!! never do this again.')
        print('sentenced to KeyboardInterrupt')
        raise KeyboardInterrupt

        
    def __rpow__(self, other):  # for 2**x with: self = x, other = 2
        other = Value(other, f'{other}', name=f'const {other}')
        other.isconstant = True
        return other**self
        
    
    # DIVISON        
    def __truediv__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
            
        res = self * other**(-1)
        res.label = f'({self.label} / {other.label})'
        res.op = '/'
        return res

    def __rtruediv__(self, other):
        other = Value(other, f'{other}', name=f'const {other}')
        other.isconstant = True

        res = self**(-1) * other
        res.label = f'({other.label} / {self.label})'
        res.op = '/'
        return res


    # COMPARISON OPERATORS
    def __lt__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
        return self.value < other.value

    def __gt__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
        return self.value > other.value

    def __le__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
        return self.value <= other.value

    def __ge__(self, other):
        if isinstance(other, int) or isinstance(other, float):
            other = Value(other, f'{other}', name=f'const {other}')
            other.isconstant = True
        return self.value >= other.value

    
    # apparently i cant overload == and !=, that breaks other stuff
    
    # def __eq__(self, other):
    #     if isinstance(other, int) or isinstance(other, float):
    #         other = Value(other, f'{other}', name=f'const {other}')
    #         other.isconstant = True
    #     return self.value == other.value

    # def __ne__(self, other):
    #     if isinstance(other, int) or isinstance(other, float):
    #         other = Value(other, f'{other}', name=f'const {other}')
    #         other.isconstant = True
    #     return self.value != other.value

    
    # FORWARD PROPAGATION
    def forward(self):
        order = topo_sort_v2(self)[::-1]
        for i in order:
            i._forward()

    
    # BACKWARD PROPAGATION
    def backward(self, verbose=True):
        order = topo_sort_v2(self)
        for i in order:
            i.grad = 0
        self.grad=1
        if verbose:
            print(self)
            print()
        for i in order:
            i._backward(verbose)


    # ACTIVATION FUNCTIONS
    def relu(self):
        _children = self.children.copy()
        _children.add(self)
        res = Value(max(0, self.value), f'RELU({self.name if self.name else self.label})', children=_children, op='relu') 
        
        def _forward():
            res.value = max(0, self.value)
        
        def _backward(verbose=True):
            self.grad += (self > 0) * res.grad
            if verbose == 2:
                if not self.children and not self.isconstant:
                    print(self)
            elif verbose:
                print(self)
                
        res._backward = _backward
        res._forward = _forward
        return res

    def softplus(self):
        _children = self.children.copy()
        _children.add(self)
        res = Value(np.log(1 + np.exp(self.value)), f'Softplus({self.name if self.name else self.label})', children=_children, op='softplus') 
        
        def _forward():
            res.value = np.log(1 + np.exp(self.value))
        
        def _backward(verbose=True):
            self.grad += (1 / (1 + np.exp(-self.value))) * res.grad
            if verbose == 2:
                if not self.children and not self.isconstant:
                    print(self)
            elif verbose:
                print(self)
                
        res._backward = _backward
        res._forward = _forward
        return res

    
    # VISULATIZATION HELP
    def print_tree(self):
        for i in topo_sort_v2(self): print(i)

In [6]:
def gradient_descent(node, rate, iterations, verbose=True):
    node.backward(verbose)
    for i in range(iterations):
        order = topo_sort_v2(node)
        for n in order:
            if not n.children and not n.isconstant:
                n.value -= rate*n.grad
        if verbose:
            print(f'\ni = {i}')
        node.forward()
        node.backward(verbose)
    return node

In [7]:
# TEST ALL THE DIFFERENT OPERATIONS AND COMBINATIONS

x = Value(4, 'x')
y = Value(-3, 'y')
z = Value(4, 'z')
print(x)
print(y)
print(z)
print()
print()
print('3 + x', end=': '); print(3 + x)
print('x + 3', end=': '); print(x + 3)
print('x + y', end=': '); print(x + y)
print()
print('3 - x', end=': '); print(3 - x)
print('x - 3', end=': '); print(x - 3)
print('x - y', end=': '); print(x - y)
print()
print('3 * x', end=': '); print(3 * x)
print('x * 3', end=': '); print(x * 3)
print('x * y', end=': '); print(x * y)
print()
print('-x', end=': '); print(-x)
print()
print('4 > x', end=': '); print(4 > x)
print('x > 4', end=': '); print(x > 4)
print('4 <= x', end=': '); print(4 <= x)
print()
print('using == and != does not yield anything useful.')
print('it doesnt look at the equality of the object values')
print('but rather the equality of the objects themselves')
print()
print('x == 4', end=': '); print(x == 4)
print('x == z', end=': '); print(x == z)
print('z != x', end=': '); print(z != x)
print()
print('3 / x', end=': '); print(3 / x)
print('x / 3', end=': '); print(x / 3)
print()
print(); print('x / y'); z = x / y; z.backward(False); print(z); print(f'{x}\n{y}')
print(); print('y / x'); z = y / x; z.backward(False); print(z); print(f'{x}\n{y}')
print(); print('x ^ y'); z = x ** y; z.backward(False); print(z); print(f'{x}\n{y}')
print()
try: print('y ^ x'); z = y ** x; z.backward(False); print(z); print(f'{x}\n{y}') 
except KeyboardInterrupt: print('fine, you get to live because this was a test')
print(); print('3 ^ x'); z = 3 ** x; z.backward(False); print(z); print(x)

x = 4, grad = 0.0
y = -3, grad = 0.0
z = 4, grad = 0.0


3 + x: [x + 3] = 7, grad = 0.0
x + 3: [x + 3] = 7, grad = 0.0
x + y: [x + y] = 1, grad = 0.0

3 - x: [3 - x] = -1, grad = 0.0
x - 3: [x - 3] = 1, grad = 0.0
x - y: [x - y] = 7, grad = 0.0

3 * x: (x * 3) = 12, grad = 0.0
x * 3: (x * 3) = 12, grad = 0.0
x * y: (x * y) = -12, grad = 0.0

-x: (x * -1) = -4, grad = 0.0

4 > x: False
x > 4: False
4 <= x: True

using == and != does not yield anything useful.
it doesnt look at the equality of the object values
but rather the equality of the objects themselves

x == 4: False
x == z: False
z != x: True

3 / x: (3 / x) = 0.75, grad = 0.0
x / 3: (x / 3) = 1.3333, grad = 0.0


x / y
(x / y) = -1.3333, grad = 1
x = 4, grad = -0.33333333
y = -3, grad = -0.44444444

y / x
(y / x) = -0.75, grad = 1
x = 4, grad = 0.1875
y = -3, grad = 0.25

x ^ y
x ^ y = 0.0156, grad = 1
x = 4, grad = -0.01171875
y = -3, grad = 0.02166085

y ^ x
why did you do that. you killed the program. you should be ashamed o

In [8]:
# TRY BACKPROPAGATION ON EXAMPLE FROM VIDEO (manually and automatically)

a = Value(2, 'a', name='a')
b = Value(-3, 'b', name='b')
e = a * b
c = Value(10, 'c', name='c')
d = e + c
f = Value(-2, 'f', name='f')
L = d * f

e.name = 'e'
d.name = 'd'
L.name = 'L'

L.print_tree()  # make sure all values are correct (gradients set to 0)

"L" = ([(a * b) + c] * f) = -8, grad = 0.0
"f" = f = -2, grad = 0.0
"d" = [(a * b) + c] = 4, grad = 0.0
"c" = c = 10, grad = 0.0
"e" = (a * b) = -6, grad = 0.0
"a" = a = 2, grad = 0.0
"b" = b = -3, grad = 0.0


In [9]:
# MANUAL BACKPROPAGATION

L.grad = 1
L._backward(verbose=False)
f._backward(verbose=False)
d._backward(verbose=False)
e._backward(verbose=False)

L.print_tree()

"L" = ([(a * b) + c] * f) = -8, grad = 1
"f" = f = -2, grad = 4.0
"d" = [(a * b) + c] = 4, grad = -2.0
"c" = c = 10, grad = -2.0
"e" = (a * b) = -6, grad = -2.0
"a" = a = 2, grad = 6.0
"b" = b = -3, grad = -4.0


In [10]:
# AUTOMATED BACKPROPAGATION

L.backward()  # (we get the same results as before)

"L" = ([(a * b) + c] * f) = -8, grad = 1

"d" = [(a * b) + c] = 4, grad = -2
"f" = f = -2, grad = 4
"e" = (a * b) = -6, grad = -2
"c" = c = 10, grad = -2
"a" = a = 2, grad = 6
"b" = b = -3, grad = -4


In [11]:
# FORWARD PROPAGATION TEST

a.value = 5  # set a = 5 and check how that affects L
L.forward()
L

# the value indeed changed, forward is working correctly

"L" = ([(a * b) + c] * f) = 10, grad = 1

In [12]:
# GRADIENT DESCENT TEST 1

x = Value(5.59, 'x')
y = Value(5.59, 'y')
z = Value(5.59, 'z')
f = 4 + (x * y) ** 2 / z + 4; f.name = 'f'

print(f'f = {f.value}')

f = 182.67687899999999


In [13]:
gradient_descent(f, 0.01, 1000, verbose=False)  # find the function's local minimum

"f" = [[((x * y) ^ 2 / z) + 4] + 4] = 8.0042, grad = 1

In [14]:
print(f'f_min = {f.value}\n')  # visualize results (print local minimum as well as the new parameter values)
print(x)
print(y)
print(z)

f_min = 8.004189266940877

x = 0.4121, grad = 0.0203304
y = 0.4121, grad = 0.0203304
z = 6.8857, grad = -0.0006084


In [15]:
# GRADIENT DESCENT TEST 2

x1 = Value(2, 'x1')
x2 = Value(3, 'x2')
x3 = Value(4, 'x3')
f = x1**2 + 17*x2**2 + 35*x3**2; f.name='f'

print(f'f = {f.value}')

f = 717


In [16]:
gradient_descent(f, 0.01, 600, verbose=False)

"f" = [[x1 ^ 2 + (x2 ^ 2 * 17)] + (x3 ^ 2 * 35)] = 0.0, grad = 1

In [17]:
print(f'f_min = {f.value}\n')
print(x1)
print(x2)
print(x3)

f_min = 1.1839976007154615e-10

x1 = 0.0, grad = 2.176e-05
x2 = 0.0, grad = 0.0
x3 = 0.0, grad = 0.0


In [18]:
# ACTIVATION FUNCTION TEST: RELU (1)

x1 = Value(2, 'x1')
x2 = Value(3, 'x2')
x3 = Value(4, 'x3')
x4 = Value(1, 'x4')
f = x1**2 + 17*x2**2 + 25*x3**2 - x4; f.name='f'
L = f.relu(); L.name = 'L'

print(f'L = {L.value}')

L = 556


In [19]:
L.backward(verbose=2)  # verbose=2 verboses parameters only

"L" = RELU(f) = 556, grad = 1

x3 = 4, grad = 200
x2 = 3, grad = 102
x1 = 2, grad = 4
x4 = 1, grad = -1


In [20]:
gradient_descent(L, 0.001, 1000, verbose=False)

"L" = RELU(f) = 0, grad = 1

In [21]:
print(f'L_min = {L.value}\n')
print(x1)
print(x2)
print(x3)
print(x4)

L_min = 0

x1 = 1.1327, grad = 0.0
x2 = 0.0002, grad = 0.0
x3 = 0.0, grad = 0.0
x4 = 1.284, grad = 0


In [22]:
# ACTIVATION FUNCTION TEST: RELU (2)

x1 = Value(2, 'x1')
x2 = Value(3, 'x2')
x3 = Value(4, 'x3')
x4 = Value(800, 'x4')
f = x1**2 + 17*x2**2 + 25*x3**2 - x4; f.name='f'
L = f.relu(); L.name = 'L'

print(f'L = {L.value}')

L = 0


In [23]:
L.backward(verbose=2)

"L" = RELU(f) = 0, grad = 1

x1 = 2, grad = 0
x2 = 3, grad = 0
x3 = 4, grad = 0
x4 = 800, grad = 0


In [24]:
gradient_descent(L, 0.01, 1000, verbose=False)  # this does nothing though since all gradients are = 0, L is already at its minumum

"L" = RELU(f) = 0, grad = 1

In [25]:
print(f'L_min = {L.value}\n')
print(x1)
print(x2)
print(x3)
print(x4)

L_min = 0

x1 = 2.0, grad = 0.0
x2 = 3.0, grad = 0.0
x3 = 4.0, grad = 0.0
x4 = 800.0, grad = 0


In [26]:
# ACTIVATION FUNCTION TEST: Softplus (1)

x1 = Value(2, 'x1')
x2 = Value(3, 'x2')
x3 = Value(4, 'x3')
x4 = Value(1, 'x4')
f = x1**2 + 17*x2**2 + 25*x3**2 - x4; f.name='f'
L = f.softplus(); L.name = 'L'

print(f'L = {L.value}')

L = 556.0


In [27]:
L.backward(verbose=2)

"L" = Softplus(f) = 556.0, grad = 1

x4 = 1, grad = -1.0
x2 = 3, grad = 102.0
x1 = 2, grad = 4.0
x3 = 4, grad = 200.0


In [28]:
gradient_descent(L, 0.01, 10000, verbose=False)

"L" = Softplus(f) = 0.0098, grad = 1

In [29]:
print(f'L_min = {L.value}\n')
print(x1)
print(x2)
print(x3)
print(x4)

L_min = 0.009841417248237961

x1 = 0.0014, grad = 2.802e-05
x2 = 0.0, grad = 0.0
x3 = 0.0, grad = 0.0
x4 = 4.6162, grad = -0.00979315


In [42]:
# ACTIVATION FUNCTION TEST: Softplus (2)

x1 = Value(2, 'x1')
x2 = Value(3, 'x2')
x3 = Value(4, 'x3')
x4 = Value(400, 'x4')
f = x1**2 + 17*x2**2 + 25*x3**2 - x4; f.name='f'
L = f.softplus(); L.name = 'L'

print(f'L = {L.value}')

L = 157.0


In [43]:
L.backward(verbose=2)

"L" = Softplus(f) = 157.0, grad = 1

x4 = 400, grad = -1.0
x2 = 3, grad = 102.0
x1 = 2, grad = 4.0
x3 = 4, grad = 200.0


In [44]:
gradient_descent(L, 0.001, 10000, verbose=False)

"L" = Softplus(f) = 0.0, grad = 1

In [45]:
print(f'L_min = {L.value}\n')
print(x1)
print(x2)
print(x3)
print(x4)

L_min = 3.660218484573215e-07

x1 = 1.984, grad = 1.45e-06
x2 = 2.612, grad = 3.251e-05
x3 = 3.2574, grad = 5.961e-05
x4 = 400.004, grad = -3.7e-07
