In [5]:
import numpy as np

In [108]:
''' create dataset '''
def create_dataset(col):
    
    # create training entry set - X
    X = np.random.randint(2, size=(2,col))

    # create label dateset - Y
    Y = np.sum(X, axis=0, keepdims=1)
    Y[Y!=1] = 0

    # add noise in training entry set - X_test
    X_mask = np.random.randn(2,col)
    X_output = X + (X_mask / 10)
    
    return X_output, Y

''' initialize parameters - W & b'''
def initialize_parameters(n_x, n_h, n_y):
    
    parameters = {}        
#     parameters[f'W{l}'] = np.random.randn(n_h, n_x) * 0.01
    parameters['W1'] = np.random.randn(n_h, n_x) #* 0.01
    parameters['b1'] = np.zeros((n_h, 1))
    parameters['W2'] = np.random.randn(n_y, n_h) #* 0.01
    parameters['b2'] = np.zeros((n_y, 1))
    
    return parameters

''' activation function and its derivative '''
def activation_fn(Z):
    A = 1 / (1 + np.exp(-Z))
    return A

def activation_fn_back(A):
    return A * (1 - A)

''' forward propagation '''
def forward_linear(X, W, b):
    Z = np.dot(W, X) + b
    return Z

def forward_activation(X, parameters):
    cache = {}
    
    cache['Z1'] = forward_linear(X, parameters['W1'], parameters['b1'])
    cache['A1'] = activation_fn(cache['Z1'])
    cache['Z2'] = forward_linear(cache['A1'], parameters['W2'], parameters['b2'])
    cache['A2'] = activation_fn(cache['Z2'])
    
    return cache

''' cost function '''
def cost(Y, A):
    m = Y.shape[1]
    J = - np.sum (Y * np.log(A) + (1 - Y) * np.log(1 - A)) / m
    return J

''' backward propagation - calculate gradient descent'''
def backward_pass(X, Y, cache):
    
    # last layer
    grads = {}
    m = Y.shape[1]
    dZ2 = cache['A2'] - Y
    grads['dW2'] = np.dot(dZ2, cache['A1'].T) / m
    grads['db2'] = np.sum(dZ2) / m

    # hidden layer
    dA1 = np.dot(parameters['W2'].T, dZ2)
    dZ1 = dA1 * activation_fn_back(cache['A1'])
    grads['dW1'] = np.dot(dZ1, X.T) / m
    grads['db1'] = np.sum(dZ1) / m

    return grads

''' update parameters - W & b'''
def update_parameters(parameters, grads, learning_rate):
    
    parameters['W2'] -= learning_rate * grads['dW2']
    parameters['b2'] -= learning_rate * grads['db2']
    parameters['W1'] -= learning_rate * grads['dW1']
    parameters['b1'] -= learning_rate * grads['db1']
    
    return parameters

In [136]:
''' train the model '''
col = 5000
n_x, n_h, n_y = 2, 5, 1
iteration = 5000

X, Y = create_dataset(col)
parameters = initialize_parameters(n_x, n_h, n_y)

for i in range(iteration):
    cache = forward_activation(X, parameters)
    J = cost(Y, cache['A2'])
    grads = backward_pass(X, Y, cache)
    parameters = update_parameters(parameters, grads, learning_rate=0.5)
    
    if i % 1000 == 0:
#         print(i)
        print ('J: ', J)

J:  0.7051140445197301
J:  0.03648036782032345
J:  0.011931206778248805
J:  0.007112688035382337
J:  0.005087569483216505


In [139]:
''' predict the test dataset with the trained model '''
def predict(col, X, Y, parameters):
    
    X_test, Y_test = create_dataset(col)
    
    cache_test = forward_activation(X_test, parameters)
    A_test = (cache_test['A2'] > 0.5) * 1
    
    cache = forward_activation(X, parameters)
    A_train = (cache['A2'] > 0.5) * 1
    
    accuracy_train = (A_train == Y) * 1
    accuracy_test = (A_test == Y_test) * 1
    
    return accuracy_train, accuracy_test

In [154]:
accuracy_train, accuracy_test = predict(col, X, Y, parameters)
print('Training Accuracy: ', np.mean(accuracy_train) * 100, '%')
print('Testing Accuracy: ', np.mean(accuracy_test) * 100, '%')

Training Accuracy:  100.0 %
Testing Accuracy:  100.0 %
