In [274]:
import numpy as np

In [275]:
class CreateDataset:
    """ create the 2 XOR datasets, X & Y, for the DNN model """
    
    # create the entry dataset X
    def create_X(self, X_size):
        self.X = np.random.randint(2, size=(2, X_size))
        return self.X
    
    # create the label dataset Y
    def create_Y(self, X):
        self.Y = np.sum(X, axis=0).reshape((1,4))
        self.Y[self.Y != 1] = 0
        return self.Y

In [276]:
class Activation:
    """ the abstract class for all activation funtion classes"""
    
    # the basic formula of the activation function for the forward pass
    def formula(self, Z):
        raise NotImplementdError
    
    # to calculate the derivative of the activation function for the backward pass
    def derivative(self, input):
        raise NotImplementdError
    
    # to be used to finetune the initialized weight according to the activation function set for the first layer
    def heuristic(self, layer_dims):
        raise NotImplementdError
    

class Sigmoid(Activation):
    """ all the functions related to the sigmoid activation function """
    
    # the basic formula of the sigmoid function for the forward pass
    def formula(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    # to calculate the derivative of the sigmoid function for the backward pass
    def derivative(self, A):
        return A * (1 - A)
    
    # to be used to finetune the initialized weight if sigmoid function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    

class Tanh(Activation):
    """ all the functions related to the tanh activation function """
    
    # the basic formula of the tanh function for the forward pass
    def formula(self, Z):
        return (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    
    # to calculate the derivative of the tanh function for the backward pass
    def derivative(self, A):
        return 1 - A**2
    
    # to be used to finetune the initialized weight if tanh function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    
    
class Relu(Activation):
    """ all the functions related to the relu activation function """
    
    # the basic formula of the relu function for the forward pass
    def formula(self, Z):
        return (Z > 0) * Z
    
    # to calculate the derivative of the relu function for the backward pass
    def derivative(self, Z):
        return (Z > 0) * 1
    
    # to be used to finetune the initialized weight if relu function is set for the first layer
    def heuristic(self, layer_b4):
        return np.sqrt(2 / layer_b4) 

In [277]:
class Cost:
    """ the abstract class for all the cost functions """
    
    # calculate the cost function
    def formula(self, A, Y):
        raise NotImplementedError
    
    # calculate the derivative of the cost function (dA[L]) for the last layer
    def derivative(self, A, Y):
        raise NotImplementedError

class LossEntropy(Cost):
    """ Use Loss Entropy to calculate the cost """
    
    # calculate the Lose Entropy cost
    def formula(self, A, Y):
        self.m = Y.shape[1]
        print(self.m)
        print(f'Y: {Y}')
        print(f'A: {A}')
        return - np.sum((Y * np.log(A) + (1-Y) * np.log(1-A)), axis=1) / self.m
    
    # calculate the derivative of the Lost Entropy cost
    def derivative(self, A, Y):
        return - ((np.divide(Y, A)) - (np.divide(1-Y, 1-A)))

In [278]:
class Layer:
    """ the abstract class for all layer classes """
    
    def __init__(self):
        self.input = None
        self.output = None 
    
    # implement forward pass
    def forward_pass(self, input):
        raise NotImplementedError
        
    # implement backward pass
    def backward_pass(self, input):
        raise NotImplementedError

        
class FCLayer(Layer):
    
    # initialize parameters
    def __init__(self, layer_b4, layer_after, activation):

        self.W = np.random.randn(layer_after, layer_b4) * getattr(activation, 'heuristic')(self, layer_b4)
        self.b = np.zeros((layer_after, 1))
        print(f'initialized W: {self.W}')
        print(f'initialized b: {self.b}')
    
    # calculate forward pass: linear fn (Z = WX + b) and non-linear (A = g(Z))
    def forward_pass(self, X):#, activation):
        self.A_prev = X
        self.Z = np.dot(self.W, X) + self.b
        self.A = getattr(activation, 'formula')(self, self.Z)
        return self.A
    
    # calculate backward pass: 
    # dZ = dA * g'(Z))
    # dA[l-1] = W.T * dZ
    def backward_pass(self, dA, learning_rate):    
        print(f'shape of W: {np.shape(self.W)}')
        self.dZ = dA * getattr(activation, 'derivative')(self, self.A)
        pre = np.dot(self.W.T, self.dZ)
        
        print(f'shape of dZ: {np.shape(self.dZ)}')
        self.dW = np.dot(self.dZ, self.A_prev.T)
        print(f'shape of dW: {np.shape(self.dW)}')
        
        
        self.W -= learning_rate * self.dW
        self.b -= learning_rate * np.sum(self.dZ)
        print(f'updated W: {self.W}')
        print(f'updated b: {self.b}')
        
        return np.dot(self.W.T, self.dZ) # dA[l-1]        

In [279]:
class Network:
    """ build the whole L-layer DNN """
    
    def __init__(self):
        self.layers = []
    
    # combine individual layer to form the whole DNN network
    def combine(self, layer):
        self.layers.append(layer)
        
    # train the DNN network model
    def fit(self, X, iteration, loss_fn):
        
        for i in range(iteration):
            
            A = X
            for layer in self.layers:
                A = layer.forward_pass(A)
                print(f'A: {A}')
                
            cost = getattr(loss_fn, 'formula')(A, Y)
            print(f'cost: {cost}')
            
            dA = getattr(loss_fn, 'derivative')(A, Y)
            
            for layer in reversed(self.layers):
                dA = layer.backward_pass(dA, learning_rate)
                print(f'dA: {dA}')

In [280]:
# define variables
X_size = 4
learning_rate = 0.1
loss_fn = LossEntropy()
iteration = 1

# generate training dataset
ds = CreateDataset()
X = ds.create_X(X_size)
Y = ds.create_Y(X)

# define each layer and combine them to build the whole DNN network
net = Network()
net.combine(FCLayer(2, 3, Relu))
net.combine(FCLayer(3, 1, Sigmoid))

# train the DNN network model
net.fit(X, iteration, loss_fn)

initialized W: [[ 0.33045622  1.56135363]
 [ 0.00253582 -0.49874634]
 [ 2.14722607 -0.056105  ]]
initialized b: [[0.]
 [0.]
 [0.]]
initialized W: [[ 0.62100739 -0.57822457  0.36866233]]
initialized b: [[0.]]
A: [[0.86896175 0.8265475  0.5        0.5       ]
 [0.37843162 0.37783533 0.5        0.5       ]
 [0.89003719 0.48597743 0.5        0.5       ]]
A: [[0.65677074 0.61632534 0.55125002 0.55125002]]
4
Y: [[0 1 0 0]]
A: [[0.65677074 0.61632534 0.55125002 0.55125002]]
cost: [0.78897894]
shape of W: (1, 3)
shape of dZ: (1, 4)
shape of dW: (1, 3)
updated W: [[ 0.5429284  -0.64074293  0.27662113]]
updated b: [[-0.13193975]]
dA: [[ 0.35569975 -0.20057038  0.28060449  0.28060449]
 [-0.41978298  0.23670534 -0.33115848 -0.33115848]
 [ 0.18122844 -0.10219028  0.14296753  0.14296753]]
shape of W: (3, 2)
shape of dZ: (3, 4)
shape of dW: (3, 2)
updated W: [[ 0.3230517   1.55819581]
 [ 0.01266345 -0.49433007]
 [ 2.14348621 -0.0574352 ]]
updated b: [[-0.00441525]
 [-0.00441525]
 [-0.00441525]]
dA: [

In [281]:
cost = loss_fn.formula(A, Y)
print(f'cost: {cost}')

dA = loss_fn.derivative(A, Y)
print(f'dA: {dA}')
dA = layer.backward_pass(dA, learning_rate)
print(dA)

4
Y: [[0 1 0 0]]
A: [[0.75906421 0.58439413 0.58439413 0.58439413]]
cost: [0.92911006]
dA: [[ 4.15048342 -1.71117393  2.40612578  2.40612578]]
[[0.67487298 0.03095926]]
[[0.01965673]]
[[ 0.53819613 -0.22188914  0.40078088  0.40078088]
 [ 0.02468932 -0.01017899  0.0183855   0.0183855 ]]


In [282]:
actS = Sigmoid()
print(actS.formula(3))
print(actS.derivative(2))
print(actS.heuristic(2))

actT = Tanh()
print(actT.formula(3))
print(actT.derivative(2))
print(actT.heuristic(2))

actR = Relu()
print(actR.formula(3))
print(actR.derivative(2))
print(actR.heuristic(2))

0.9525741268224334
-2
0.7071067811865476
0.9950547536867306
-3
0.7071067811865476
3
1
1.0


In [283]:
np.random.rand(4,3)

array([[0.50833031, 0.84784872, 0.2268602 ],
       [0.50219847, 0.05825357, 0.37045537],
       [0.98577938, 0.00154966, 0.89227618],
       [0.22882473, 0.01854588, 0.51847913]])

In [284]:
np.shape(Y[0])

(4,)

In [285]:
a = np.arange(6).reshape((3, 2))
print(np.shape(a))

(3, 2)


In [286]:
a = a.reshape((2, 3))
print(np.shape(a))

(2, 3)


In [287]:
ds = CreateDataset()
X_size = 4
X = ds.create_X(X_size)
print(X)
Y = ds.create_Y(X)
print(Y)
print(np.shape(Y))
print(Y.shape[1])

[[1 0 0 0]
 [1 1 0 0]]
[[0 1 0 0]]
(1, 4)
4


In [288]:
X_size = 4
X = np.random.randint(2, size=(2,X_size))
print(X)
print(type(X))
Y = np.sum(X, axis=0)
print(Y)
print((Y != 1))
Y[Y != 1] = 0
print(Y)
print(np.shape(Y))

[[0 0 0 0]
 [0 1 1 0]]
<class 'numpy.ndarray'>
[0 1 1 0]
[ True False False  True]
[0 1 1 0]
(4,)
