In [1]:
import numpy as np

In [129]:
class CreateDataset:
    """ create the 2 XOR datasets, X & Y, for the DNN model """
    
    # create the entry dataset X
    def create_X(self, X_size):
        self.X = np.random.randint(2, size=(2, X_size))
        return self.X
    
    # create the label dataset Y
    def create_Y(self, X):
        self.Y = np.sum(X, axis=0)
        self.Y[self.Y != 1] = 0
        return self.Y.reshape((1,4))

In [82]:
class Activation:
    """ the abstract class for all activation funtion classes"""
    
    # the basic formula of the activation function for the forward pass
    def formula(self, Z):
        raise NotImplementdError
    
    # to calculate the derivative of the activation function for the backward pass
    def derivative(self, input):
        raise NotImplementdError
    
    # to be used to finetune the initialized weight according to the activation function set for the first layer
    def heuristic(self, layer_dims):
        raise NotImplementdError
    

class Sigmoid(Activation):
    """ all the functions related to the sigmoid activation function """
    
    # the basic formula of the sigmoid function for the forward pass
    def formula(self, Z):
        return 1 / (1 + np.exp(-Z))
    
    # to calculate the derivative of the sigmoid function for the backward pass
    def derivative(self, Z):
        self.A = Sigmoid.formula(self, Z)
        return self.A * (1 - self.A)
    
    # to be used to finetune the initialized weight if sigmoid function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    

class Tanh(Activation):
    """ all the functions related to the tanh activation function """
    
    # the basic formula of the tanh function for the forward pass
    def formula(self, Z):
        return (np.exp(Z) - np.exp(-Z)) / (np.exp(Z) + np.exp(-Z))
    
    # to calculate the derivative of the tanh function for the backward pass
    def derivative(self, Z):
        self.A = Tanh.formula(self, Z)
        return 1 - self.A**2
    
    # to be used to finetune the initialized weight if tanh function is set for the first layer
    def heuristic(self, layer_dims):
        return np.sqrt(1 / layer_dims)
    
    
class Relu(Activation):
    """ all the functions related to the relu activation function """
    
    # the basic formula of the relu function for the forward pass
    def formula(self, Z):
        return (Z > 0) * Z
    
    # to calculate the derivative of the relu function for the backward pass
    def derivative(self, Z):
        return (Z > 0) * 1
    
    # to be used to finetune the initialized weight if relu function is set for the first layer
    def heuristic(self, layer_b4):
        return np.sqrt(2 / layer_b4) 

In [152]:
class Layer:
    """ the abstract class for all layer classes """
    
    def __init__(self):
        self.input = None
        self.output = None
    
    # implement forward pass
    def forward_pass(self, input):
        raise NotImplementedError
        
    # implement backward pass
    def backward_pass(self, input):
        raise NotImplementedError

        
class FCLayer(Layer):
    
    # initialize parameters
    def __init__(self, layer_b4, layer_after, activation):

        self.W = np.random.randn(layer_after, layer_b4) * getattr(activation, 'heuristic')(self, layer_b4)
        self.b = np.zeros((layer_after, 1))
        print(self.W)
        print(self.b)
    
    # calculate forward pass: linear fn (Z = WX + b) and non-linear (A = g(Z))
    def forward_pass(self, X):#, activation):
        
        self.Z = np.dot(self.W, X) + self.b
        self.A = getattr(activation, 'formula')(self, self.Z)
        return self.Z, self.A
    
    # calculate backward pass: 
    # dA[L] = -(Y/A) + ((1-Y)/(1-A))
    # dZ = dA * g'(Z))
    # dA[l-1] = W.T * dZ
    def backward_pass(self, dA, learning_rate):    
        self.dZ = dA * getattr(activation, 'derivative')(self, self.Z)
        self.dW = np.dot(self.dZ, self.A.T)
        
        self.W -= learning_rate * self.dW
        self.b -= learning_rate * np.sum(self.dZ)
        print(self.W)
        print(self.b)
        
        return np.dot(self.W.T, self.dZ) # dA[l-1]        

In [173]:
class Cost:
    """ the abstract class for all the cost functions """
    
    # calculate the cost function
    def formula(self, A, Y):
        raise NotImplementedError
    
    # calculate the derivative of the cost function (dA[L]) for the last layer
    def derivative(self, A, Y):
        raise NotImplementedError

class LossEntropy(Cost):
    """ Use Loss Entropy to calculate the cost """
    
    # calculate the Lose Entropy cost
    def formula(self, A, Y):
        self.m = Y.shape[1]
        print(self.m)
        print(f'Y: {Y}')
        print(f'A: {A}')
        return - np.sum((Y * np.log(A) + (1-Y) * np.log(1-A)), axis=1) / self.m
    
    # calculate the derivative of the Lost Entropy cost
    def derivative(self, A, Y):
        return - (np.divide(Y, A)) + (np.divide(1-Y, 1-A))

In [177]:
activation = Sigmoid
learning_rate = 0.1
loss_fn = LossEntropy()

layer = FCLayer(2, 1, activation)

Z, A = layer.forward_pass(X)
print(Z, A)

cost = loss_fn.formula(A, Y)
print(f'cost: {cost}')

dA = loss_fn.derivative(A, Y)
print(f'dA: {dA}')
dA = layer.backward_pass(dA, learning_rate)
print(dA)

[[0.70970015 0.06578643]]
[[0.]]
[[0.06578643 0.06578643 0.70970015 0.06578643]] [[0.51644068 0.51644068 0.6703349  0.51644068]]
4
Y: [[1 1 1 1]]
A: [[0.51644068 0.51644068 0.6703349  0.51644068]]
cost: [0.5955906]
dA: [[-1.93633082 -1.93633082 -1.49179164 -1.93633082]]
[[0.80671766 0.16280394]]
[[0.17803431]]
[[-0.39009584 -0.39009584 -0.26594666 -0.39009584]
 [-0.07872536 -0.07872536 -0.05367078 -0.07872536]]


In [145]:
actS = Sigmoid()
print(actS.formula(3))
print(actS.derivative(2))
print(actS.heuristic(2))

actT = Tanh()
print(actT.formula(3))
print(actT.derivative(2))
print(actT.heuristic(2))

actR = Relu()
print(actR.formula(3))
print(actR.derivative(2))
print(actR.heuristic(2))

0.9525741268224334
0.10499358540350662
0.7071067811865476
0.9950547536867306
0.0706508248531642
0.7071067811865476
3
1
1.0


In [153]:
np.random.rand(4,3)

array([[0.87558993, 0.93684569, 0.94847332],
       [0.96739116, 0.23928585, 0.85061525],
       [0.06319912, 0.46525894, 0.21063519],
       [0.9365597 , 0.49253958, 0.06764355]])

In [110]:
np.shape(Y[0])

()

In [124]:
a = np.arange(6).reshape((3, 2))
print(np.shape(a))

(3, 2)


In [125]:
a = a.reshape((2, 3))
print(np.shape(a))

(2, 3)


In [135]:
ds = CreateDataset()
X_size = 4
X = ds.create_X(X_size)
print(X)
Y = ds.create_Y(X)
print(Y)
print(np.shape(Y))
print(Y.shape[1])

[[0 0 1 0]
 [1 1 0 1]]
[[1 1 1 1]]
(1, 4)
4


In [123]:
X_size = 4
X = np.random.randint(2, size=(2,X_size))
print(X)
print(type(X))
Y = np.sum(X, axis=0)
print(Y)
print((Y != 1))
Y[Y != 1] = 0
print(Y)
print(np.shape(Y))

[[0 1 1 1]
 [0 0 1 1]]
<class 'numpy.ndarray'>
[0 1 2 2]
[ True False  True  True]
[0 1 0 0]
(4,)
