In [3]:
import numpy as np

In [85]:
class CreateDataset:
    """ create the 2 XOR datasets, X & Y, for the DNN model """
    
    # create the entry dataset X
    def create_X(self, X_size):
        self.X = np.random.randint(2, size=(2, X_size))
        return self.X
    
    # create the label dataset Y
    def create_Y(self, X, X_size):
        self.Y = np.sum(X, axis=0).reshape((1,X_size))
        self.Y[self.Y != 1] = 0
        return self.Y

In [86]:
class Activation:
    """ the abstract class for all activation funtion classes"""
    
    # the basic formula of the activation function for the forward pass
    def formula(self, Z):
        raise NotImplementdError
    
    # to calculate the derivative of the activation function for the backward pass
    def derivative(self, input):
        raise NotImplementdError
    
    # to be used to finetune the initialized weight according to the activation function set for the first layer
    def heuristic(self, layer_dims):
        raise NotImplementdError
    

class Sigmoid(Activation):
    """ all the functions related to the sigmoid activation function """
    
    # the basic formula of the sigmoid function for the forward pass
    def formula(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    # to calculate the derivative of the sigmoid function for the backward pass
    def derivative(self, A):
        return A * (1 - A)
    
    # to be used to finetune the initialized weight if sigmoid function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    

class Tanh(Activation):
    """ all the functions related to the tanh activation function """
    
    # the basic formula of the tanh function for the forward pass
    def formula(self, Z):
        return (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    
    # to calculate the derivative of the tanh function for the backward pass
    def derivative(self, A):
        return 1 - A**2
    
    # to be used to finetune the initialized weight if tanh function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    
    
class Relu(Activation):
    """ all the functions related to the relu activation function """
    
    # the basic formula of the relu function for the forward pass
    def formula(self, Z):
        return (Z > 0) * Z
    
    # to calculate the derivative of the relu function for the backward pass
    def derivative(self, Z):
        return (Z > 0) * 1
    
    # to be used to finetune the initialized weight if relu function is set for the first layer
    def heuristic(self, layer_b4):
        return np.sqrt(2 / layer_b4) 

In [87]:
class Cost:
    """ the abstract class for all the cost functions """
    
    # calculate the cost function
    def formula(self, A, Y):
        raise NotImplementedError
    
    # calculate the derivative of the cost function (dA[L]) for the last layer
    def derivative(self, A, Y):
        raise NotImplementedError

class LossEntropy(Cost):
    """ Use Loss Entropy to calculate the cost """
    
    # calculate the Lose Entropy cost
    def formula(self, A, Y):
        self.m = Y.shape[1]
#         print(self.m)
#         print(f'Y: {Y}')
#         print(f'A: {A}')
        return - np.sum((Y * np.log(A) + (1-Y) * np.log(1-A)), axis=1) / self.m
    
    # calculate the derivative of the Lost Entropy cost
    def derivative(self, A, Y):
        return - ((np.divide(Y, A)) - (np.divide(1-Y, 1-A)))

In [93]:
class Layer:
    """ the abstract class for all layer classes """
    
    def __init__(self):
        self.input = None
        self.output = None 
    
    # implement forward pass
    def forward_pass(self, input):
        raise NotImplementedError
        
    # implement backward pass
    def backward_pass(self, input):
        raise NotImplementedError

        
class FCLayer(Layer):
    
    # initialize parameters
    def __init__(self, layer_b4, layer_after, activation):

        self.activation = activation
        self.W = np.random.randn(layer_after, layer_b4) * getattr(self.activation, 'heuristic')(self, layer_b4)
        self.b = np.zeros((layer_after, 1))
#         print(f'initialized W: {self.W}')
#         print(f'initialized b: {self.b}')
    
    # calculate forward pass: linear fn (Z = WX + b) and non-linear (A = g(Z))
    def forward_pass(self, X):#, activation):
        self.A_prev = X
        self.Z = np.dot(self.W, X) + self.b
        self.A = getattr(self.activation, 'formula')(self, self.Z)
        return self.A
    
    # calculate backward pass: 
    # dZ = dA * g'(Z))
    # dA[l-1] = W.T * dZ
    def backward_pass(self, dA, learning_rate):
        self.m = dA.shape[1]
        
#         print(f'shape of W: {np.shape(self.W)}')
        self.dZ = dA * getattr(self.activation, 'derivative')(self, self.A)
        pre = np.dot(self.W.T, self.dZ)
        
#         print(f'shape of dZ: {np.shape(self.dZ)}')
        self.dW = np.dot(self.dZ, self.A_prev.T) / self.m
#         print(f'shape of dW: {np.shape(self.dW)}')
        
        
        self.W -= learning_rate * self.dW
        self.b -= learning_rate * (np.sum(self.dZ) / self.m)
#         print(f'updated W: {self.W}')
#         print(f'updated b: {self.b}')
        
        return np.dot(self.W.T, self.dZ) # dA[l-1]        

In [134]:
class Network:
    """ build the whole L-layer DNN """
    
    def __init__(self):
        self.layers = []
    
    # combine individual layer to form the whole DNN
    def combine(self, layer):
        self.layers.append(layer)
        
    # train the DNN model
    def fit(self, X, Y, iteration, loss_fn, learning_rate):
        
        for i in range(iteration):
            
            A = X
            for layer in self.layers:
                A = layer.forward_pass(A)
#                 print(f'A: {A}')
                
            cost = getattr(loss_fn, 'formula')(A, Y)
            if i % 1000 == 0:
                print(f'cost de {i}: {cost}')
#             print(f'cost de {i}: {cost}')
            
            dA = getattr(loss_fn, 'derivative')(A, Y)
            
            for layer in reversed(self.layers):
                dA = layer.backward_pass(dA, learning_rate)
#                 print(f'dA: {dA}')
        return A

    # predict test dataset using the trained DNN model
    def predict(self, X_size, A, Y, loss_fn):
        
        self.A_train = A
        self.Y_train = Y
        
        # genreate test dataset
        self.X_test = ds.create_X(X_size)
        self.Y_test = ds.create_Y(self.X_test, X_size)
        
        self.A_test = self.X_test
        for layer in self.layers:
            self.A_test = layer.forward_pass(self.A_test)
        
        cost = getattr(loss_fn, 'formula')(self.A_test, self.Y_test)
        print(f'cost of test dataset: {cost}')
        
        self.A_train = (self.A_train > 0.5) * 1
        self.accuracy_train = (self.A_train == self.Y_train) * 1
        print(f'Accuracy of the train dataset: {np.average(self.accuracy_train) * 100}%')
        
        self.A_test = (self.A_test > 0.5) * 1
        self.accuracy_test = (self.A_test == self.Y_test) * 1
        print(f'Accuracy of the train dataset: {np.average(self.accuracy_test) * 100}%')        

In [141]:
# define variables
X_size = 4000
learning_rate = 0.1
loss_fn = LossEntropy()
iteration = 10000

# generate train dataset
ds = CreateDataset()
X = ds.create_X(X_size)
Y = ds.create_Y(X, X_size)

# define each layer and combine them to build the whole DNN
net = Network()
net.combine(FCLayer(2, 3, Relu))
net.combine(FCLayer(3, 1, Sigmoid))

# train the DNN model
A = net.fit(X, Y, iteration, loss_fn, learning_rate)

# predict a result with a test dataset using the trained DNN model
net.predict(X_size, A, Y, loss_fn)

cost de 0: [0.69310324]
cost de 1000: [0.09215966]
cost de 2000: [0.02284007]
cost de 3000: [0.01184969]
cost de 4000: [0.00782838]
cost de 5000: [0.00572317]
cost de 6000: [0.00449707]
cost de 7000: [0.00368809]
cost de 8000: [0.00312169]
cost de 9000: [0.00270093]
cost of test dataset: [0.00248328]
Accuracy of the train dataset: 100.0%
Accuracy of the train dataset: 100.0%
