In [1]:
import numpy as np

In [2]:
class CreateDataset:
    """ create the 2 XOR datasets, X & Y, for the DNN model """
    
    # create the entry dataset X
    def create_X(self, X_size):
        self.X = np.random.randint(2, size=(2, X_size))
        return self.X
    
    # create the label dataset Y
    def create_Y(self, X):
        self.Y = np.sum(X, axis=0)
        self.Y[self.Y != 1] = 0
        return self.Y

In [82]:
class Activation:
    """ the abstract class for all activation funtion classes"""
    
    # the basic formula of the activation function for the forward pass
    def formula(self, Z):
        raise NotImplementdError
    
    # to calculate the derivative of the activation function for the backward pass
    def derivative(self, input):
        raise NotImplementdError
    
    # to be used to finetune the initialized weight according to the activation function set for the first layer
    def heuristic(self, layer_dims):
        raise NotImplementdError
    

class Sigmoid(Activation):
    """ all the functions related to the sigmoid activation function """
    
    # the basic formula of the sigmoid function for the forward pass
    def formula(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    # to calculate the derivative of the sigmoid function for the backward pass
    def derivative(self, Z):
        self.A = Sigmoid.formula(self, Z)
        return self.A * (1 - self.A)
    
    # to be used to finetune the initialized weight if sigmoid function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    

class Tanh(Activation):
    """ all the functions related to the tanh activation function """
    
    # the basic formula of the tanh function for the forward pass
    def formula(self, Z):
        return (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    
    # to calculate the derivative of the tanh function for the backward pass
    def derivative(self, Z):
        self.A = Tanh.formula(self, Z)
        return 1 - self.A**2
    
    # to be used to finetune the initialized weight if tanh function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    
    
class Relu(Activation):
    """ all the functions related to the relu activation function """
    
    # the basic formula of the relu function for the forward pass
    def formula(self, Z):
        return (Z > 0) * Z
    
    # to calculate the derivative of the relu function for the backward pass
    def derivative(self, Z):
        return (Z > 0) * 1
    
    # to be used to finetune the initialized weight if relu function is set for the first layer
    def heuristic(self, layer_b4):
        return np.sqrt(2 / layer_b4) 

In [92]:
class Layer:
    """ the abstract class for all layer classes """
    
    def __init__(self):
        self.input = None
        self.output = None
    
    # implement forward pass
    def forward_pass(self, input):
        raise NotImplementedError
        
    # implement backward pass
    def backward_pass(self, input):
        raise NotImplementedError

        
class FCLayer(Layer):
    
    # initialize parameters
    def __init__(self, layer_b4, layer_after, activation):

        self.W = np.random.randn(layer_after, layer_b4) * getattr(activation, 'heuristic')(self, layer_b4)
        self.b = np.zeros((layer_after, 1))
        print(self.W)
        print(self.b)
    
    # calculate forward pass: linear fn (Z = WX + b) and non-linear (A = g(Z))
    def forward_pass(self, X, activation):
        
        self.Z = np.dot(self.W, X) + self.b
        self.A = getattr(activation, 'formula')(self, self.Z)
        return self.Z, self.A
    
    # calculate backward pass: 
    # dA[L] = -(Y/A) + ((1-Y)/(1-A))
    # dZ = dA * g'(Z))
    # dA[l-1] = W.T * dZ
    def backward_pass(self, dA):    
        self.dZ = dA * getattr(activation, 'derivative')(self, self.Z)
        return np.dot(self.W.T, self.dZ) # dA[l-1]        

In [93]:
activation = Sigmoid
layer = FCLayer(2, 2, activation)
Z, A = layer.forward_pass(X, activation)
print(Z, A)
# dA = layer.backward_pass(Y=Y)
dA = layer.backward_pass(dA)
print(dA)

[[0.54491188 0.49700392]
 [0.36887662 1.25592451]]
[[0.]
 [0.]]
[[0.54491188 0.49700392 1.0419158  0.54491188]
 [0.36887662 1.25592451 1.62480114 0.36887662]] [[0.63295431 0.62175498 0.73921949 0.63295431]
 [0.5911875  0.77832374 0.8354562  0.5911875 ]]
[[ 0.01169974  0.01389171 -0.00368094  0.01169974]
 [ 0.02182446  0.02317318 -0.00613412  0.02182446]]


In [16]:
ds = CreateDataset()
X_size = 4
X = ds.create_X(X_size)
print(X)
Y = ds.create_Y(X)
print(Y)

[[1 0 1 1]
 [0 1 1 0]]
[1 1 0 1]


In [150]:
X_size = 4
X = np.random.randint(2, size=(2,X_size))
print(X)
print(type(X))
Y = np.sum(X, axis=0)
print(Y)
print((Y != 1))
Y[Y != 1] = 0
print(Y)

[[0 1 1 0]
 [1 0 1 1]]
<class 'numpy.ndarray'>
[1 1 2 1]
[False False  True False]
[1 1 0 1]


In [54]:
actS = Sigmoid()
print(actS.formula(3))
print(actS.derivative(2))
print(actS.heuristic(2))

actT = Tanh()
print(actT.formula(3))
print(actT.derivative(2))
print(actT.heuristic(2))

actR = Relu()
print(actR.formula(3))
print(actR.derivative(2))
print(actR.heuristic(2))

0.9525741268224334
0.10499358540350662
0.7071067811865476
0.9950547536867306
0.0706508248531642
0.7071067811865476
3
1
1.0


In [153]:
np.random.rand(4,3)

array([[0.87558993, 0.93684569, 0.94847332],
       [0.96739116, 0.23928585, 0.85061525],
       [0.06319912, 0.46525894, 0.21063519],
       [0.9365597 , 0.49253958, 0.06764355]])

In [None]:
self.dA = - (np.divide(Y, self.A)) + (np.divide(1-Y, 1-self.A))