In [3]:
import numpy as np
from collections import OrderedDict

Multilayer Perceptron

In [4]:
class Module:
    
    def __init__(self):
        self.modules = OrderedDict()
        self._parameters = OrderedDict()

    def add_module(self, module, name:str):
        if hasattr(self, name) and name not in self.modules:
            raise KeyError("attribute '{}' already exists".format(name))
        elif '.' in name:
            raise KeyError("module name can't contain \".\"")
        elif name == '':
            raise KeyError("module name can't be empty string \"\"")
        self.modules[name] = module

    def register_parameter(self, name, param):
        if '.' in name:
            raise KeyError("parameter name can't contain \".\"")
        elif name == '':
            raise KeyError("parameter name can't be empty string \"\"")
        elif hasattr(self, name) and name not in self._parameters:
            raise KeyError("attribute '{}' already exists".format(name))
        else:
            self._parameters[name] = param

    def parameters(self, recurse=True):
        for name, param in self._parameters.items():
            if param.requires_grad:
                yield name, param
        if recurse:
            for name, module in self._modules.items():
                for name, param in module.parameters(recurse):
                    if param.requires_grad:
                        yield name, param

    def __dir__(self):
        module_attrs = dir(self.__class__)
        attrs = list(self.__dict__.keys())
        modules = list(self._modules.keys())
        parameters = list(self._parameters.keys())
        keys = module_attrs + attrs + modules + parameters

        # Eliminate attrs that are not legal Python variable names
        keys = [key for key in keys if not key[0].isdigit()]

        return sorted(keys)

    def __getattr__(self, name: str):
        if '_modules' in self.__dict__:
            modules = self.__dict__['_modules']
            if name in modules:
                return modules[name]
        if '_parameters' in self.__dict__:
            parameters = self.__dict__['_parameters']
            if name in parameters:
                return parameters[name]
        raise AttributeError("'{}' object has no attribute '{}'".format(
            type(self).__name__, name))

    def __setattr__(self, name, value):
        if isinstance(value, Module):
            self._modules[name] = value
        elif isinstance(value, np.ndarray):
            self.register_parameter(name, value)
        else:
            object.__setattr__(self, name, value)

    def forward(self, *args, **kwargs) -> np.ndarray:
        pass

    def backward(self, *args, **kwargs) -> np.ndarray:
        pass

    def __call__(self, *args, **kwargs):
        return self.forward(*args, **kwargs)

Simplest implementation of MLP supporting forward and backward pass for gradient descent

In [5]:


class Model(Module):
    
    def __init__(self):
        super(Model, self).__init__()

    def forward(self, input) -> np.ndarray:
        for name, module in self.modules.items():
            input = module(input)
        return input

    def backward(self, z: np.ndarray):
        for name, module in reversed(self.modules.items()):
            z = module.backward(z)
        return z

Loss functions supporting forward and backward pass

In [6]:
class MSELoss(Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return np.square(np.subtract(target, input)).mean()

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return (-2*(target-input))


class BCELoss(Module):
    
    def __init__(self):
        super(BCELoss, self).__init__()

    def forward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -(np.multiply(target, np.log(input))+np.multiply((1-target), np.log(1-input)))

    def backward(self, input: np.ndarray, target: np.ndarray) -> np.ndarray:
        return -target / input  + (1 - target) / (1 - input)
    

3 most commonly used activation functions for ANN implementations

In [7]:
class Sigmoid(Module):
    def __init__(self):
        super(Sigmoid, self).__init__()

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return 1.0 / (1.0 + np.exp(-input))

    def backward(self, da) -> np.ndarray:
        z = self.forward(self.fw_input) 
        return np.multiply(da, np.multiply(z, 1 - z)) 


class Tanh(Module):

    def __init__(self):
        super(Tanh, self).__init__()

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return (np.exp(2*input) - 1) / (np.exp(2*input) + 1)

    def backward(self, da) -> np.ndarray:
        return np.multiply(da, 1-np.square(self.forward(self.fw_input)))

class ReLU(Module):
    def __init__(self):
        super(ReLU, self).__init__()

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_input = input
        return np.maximum(input, 0.0) 

    def backward(self, da) -> np.ndarray:
        return np.multiply(da, np.where(self.fw_input>0, 1, 0))

Input, Hidden and Output layer for MLP

In [8]:
class Linear(Module):
     
    def __init__(self, in_features, out_features):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.W = np.random.randn(out_features, in_features)
        self.dW = np.zeros_like(self.W)
        self.b = np.zeros((out_features, 1)) # Watch-out for the shape
        self.db = np.zeros_like(self.b)

    def forward(self, input: np.ndarray) -> np.ndarray:
        self.fw_inputs = input
        self.m = self.fw_inputs.shape[1]
        net = np.matmul(self.W, input) + self.b
        return net

    def backward(self, dz: np.ndarray) -> np.ndarray:
        da = self.W.T @ dz
        self.dW = (1/self.m)*np.matmul(dz, self.fw_inputs.T)
        self.db = (1/self.m)*np.sum(dz, axis = 1, keepdims=True)
        return da
        

Utility function to create sample dataset to play with

In [9]:
def dataset_Circles(m=10, radius=0.7, noise=0.0, verbose=False):

    X = (np.random.rand(2, m) * 2.0) - 1.0
    if (verbose): print('X: \n', X, '\n')

    N = (np.random.rand(2, m)-0.5) * noise
    if (verbose): print('N: \n', N, '\n')
    Xnoise = X + N
    if (verbose): print('Xnoise: \n', Xnoise, '\n')

    XSquare = Xnoise ** 2
    if (verbose): print('XSquare: \n', XSquare, '\n')

    RSquare = np.sum(XSquare, axis=0, keepdims=True)
    if (verbose): print('RSquare: \n', RSquare, '\n')
    R = np.sqrt(RSquare)
    if (verbose): print('R: \n', R, '\n')

    Y = (R > radius).astype(float)
    if (verbose): print('Y: \n', Y, '\n')

    return X, Y

Utility function for MLP learning process evaluation

In [10]:
def gradient_check(network:Module, loss_function:Module, X:np.ndarray, Y:np.ndarray, epsilon=1e-7):
    gradapprox = []
    grad_backward = []

    for name, layer in network.modules.items():
        # Compute gradapprox
        if not hasattr(layer, "W"):
            continue
        if not hasattr(layer, "dW"):
            continue
        shape = layer.W.shape
        for i in range(shape[0]):
            for j in range(shape[1]):
                origin_W = np.copy(layer.W[i][j])

                layer.W[i][j] = origin_W + epsilon
                A_plus = network(X)
                J_plus = np.mean(loss_function(A_plus, Y))

                layer.W[i][j] = origin_W - epsilon
                A_minus = network(X)
                J_minus = np.mean(loss_function(A_minus, Y))

                # Compute gradapprox[i]
                gradapprox.append((J_plus - J_minus) / (2 * epsilon))
                grad_backward.append(layer.dW[i][j])
                layer.W[i][j] = origin_W

    # Compare gradapprox to backward propagation gradients by computing difference.
    gradapprox = np.reshape(gradapprox, (-1, 1))
    grad_backward = np.reshape(grad_backward, (-1, 1))

    numerator = np.linalg.norm(grad_backward - gradapprox)
    denominator = np.linalg.norm(grad_backward) + np.linalg.norm(gradapprox)
    difference = numerator / denominator

    if difference > 2e-7 or not difference:
        print(
            "\033[91m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m")
    else:
        print(
            "\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m")

Perform and evaluate forward and backward pass through MLP

In [11]:
dataset_features_X, dataset_labels_Y = dataset_Circles(m=128, radius=0.7, noise=0.0)

mlp = Model()
mlp.add_module(Linear(2, 20), 'first-hidden')
mlp.add_module(Sigmoid(), 'activation-1')
mlp.add_module(Linear(20, 20), 'second-hidden')
mlp.add_module(Sigmoid(), 'activation-2')
mlp.add_module(Linear(20, 20), 'third-hidden')
mlp.add_module(Tanh(), 'activation-3')
mlp.add_module(Linear(20, 20), 'fourth-hidden')
mlp.add_module(ReLU(), 'activation-4')
mlp.add_module(Linear(20, 20), 'fifth-hidden')
mlp.add_module(Sigmoid(), 'activation-5')
mlp.add_module(Linear(20, 1), 'sixth-hidden')
mlp.add_module(Sigmoid(), 'activation-6')

y_hat = mlp.forward(dataset_features_X)

bce = BCELoss()
loss = bce.forward(y_hat, dataset_labels_Y)

back = bce.backward(y_hat, dataset_labels_Y)

output = mlp.backward(back)

(gradient_check(mlp, bce, dataset_features_X, dataset_labels_Y))


mlp = Model()
mlp.add_module(Linear(2, 20), 'first-hidden')
mlp.add_module(Sigmoid(), 'activation-1')
mlp.add_module(Linear(20, 20), 'second-hidden')
mlp.add_module(Sigmoid(), 'activation-2')
mlp.add_module(Linear(20, 20), 'third-hidden')
mlp.add_module(Tanh(), 'activation-3')
mlp.add_module(Linear(20, 20), 'fourth-hidden')
mlp.add_module(ReLU(), 'activation-4')
mlp.add_module(Linear(20, 20), 'fifth-hidden')
mlp.add_module(Sigmoid(), 'activation-5')
mlp.add_module(Linear(20, 1), 'sixth-hidden')
mlp.add_module(Sigmoid(), 'activation-6')

y_hat = mlp.forward(dataset_features_X)

mse = MSELoss()
loss = mse.forward(y_hat, dataset_labels_Y)

back = mse.backward(y_hat, dataset_labels_Y)

output = mlp.backward(back)

(gradient_check(mlp, mse, dataset_features_X, dataset_labels_Y))

[92mYour backward propagation works perfectly fine! difference = 1.2071803817734068e-08[0m
[92mYour backward propagation works perfectly fine! difference = 2.8512093632985954e-09[0m
