In [1]:
import numpy as np

## Calculating by "hand" to make the next section easier to understand 

In [2]:
X = np.array([[0, 0, 1], [1, 1, 1], [1, 0, 1], [0, 1, 1]])  ## x1 AND (x2 OR x3)
Y = np.array([[0, 1, 1, 0]]).reshape(-1, 1)

n_records, n_features = X.shape
n_hidden = 2
lr = 0.01

In [3]:
model = dict()

model['W1'] = np.random.normal(scale= 1 / n_features ** .5, size = (n_features, n_hidden)) ## Weights for inp_layer to hidden_layer
model['W2'] = np.random.normal(scale = 1 / n_features ** .5 , size = (n_hidden,1)) ## Weights for hidden_to_output layer
model['b1'] = np.zeros(shape=[1, n_hidden], dtype=np.float32)
model['b2'] = np.array([0.]).reshape(1,1)

In [4]:
def sigmoid(x):
    return 1. / (1 + np.exp(-x))

def sse(y, y_hat):
    return np.mean(np.square(y - y_hat))

### Forward Pass

In [5]:
def forward(X, model):
    W1, W2, b1, b2 = model['W1'], model['W2'], model['b1'], model['b2']
    z1 = np.dot(X, W1) + b1 ## hidden layer input
    a1 = sigmoid(z1)        ## hidden layer input
    z2 = np.dot(a1, W2) + b2 ## output layer input
    a2 = sigmoid(z2) ## output layer ... uhm, output
    
    output = (a2 > .5).astype(np.int)
    return a1, a2, output

In [6]:
a1, a2, output = forward(X, model)

In [7]:
print(sse(Y, output))

0.5


### Backprop now

In [8]:
def backprop(Y, a1, a2, output, model):
    W1, W2, b1, b2 = model['W1'], model['W2'], model['b1'], model['b2']
    error = Y - output
    out_error_term = error * a2 * (1-a2)
    hidden_error = np.dot(out_error_term, W2.T)
    hidden_error_term = hidden_error * a1 * (1-a1)
    dW2 = 0.01 * np.dot(a1.T, out_error_term)
    db2 = np.sum(out_error_term, axis=0, keepdims=True)
    dW1 = 0.01 * np.dot(X.T, hidden_error_term)
    db1 = np.sum(hidden_error_term, axis=0, keepdims=True)
    
    return dW1, db1, dW2, db2

In [9]:
dW1, db1, dW2, db2 = backprop(Y, a1, a2, output, model)

In [10]:
print(model['W1'])
print('-' * 13)
print(model['b1'])
print('-' * 27)
print(model['W2'])
print('-' * 13)
print(model['b2'])
print('=' * 13)
model['W1'] += dW1
model['b1'] += db1
model['W2'] += dW2
model['b2'] += db2
print(model['W1'])
print('-' * 13)
print(model['b1'])
print('-' * 27)
print(model['W2'])
print('-' * 13)
print(model['b2'])

[[-1.34969494 -0.55405819]
 [ 0.39736477 -0.04288576]
 [ 0.03181855 -0.16498714]]
-------------
[[0. 0.]]
---------------------------
[[-0.61562031]
 [ 0.85916161]]
-------------
[[0.]]
[[-1.34969494 -0.55405819]
 [ 0.39773233 -0.04341696]
 [ 0.03257014 -0.16605079]]
-------------
[[ 0.07515879 -0.10636501]]
---------------------------
[[-0.61840223]
 [ 0.85689589]]
-------------
[[-0.49957581]]


In [11]:
for epoch in range(2000):
    a1, a2, output = forward(X, model)
    dW1, db1, dW2, db2 = backprop(Y, a1, a2, output, model)
    
    model['W1'] += dW1
    model['b1'] += db1
    model['W2'] += dW2
    model['b2'] += db2
    
    if epoch%100 == 0:
        print("Loss : {}".format(sse(Y, output)))

Loss : 0.5
Loss : 0.5
Loss : 0.25
Loss : 0.25
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0
Loss : 0.0


In [12]:
print(model['W1'])
print('-' * 13)
print(model['b1'])
print('-' * 27)
print(model['W2'])
print('-' * 13)
print(model['b2'])

[[-1.46073205 -0.49124061]
 [ 0.34423218 -0.03633192]
 [ 0.03659119 -0.18896203]]
-------------
[[ 0.4772635 -2.397489 ]]
---------------------------
[[-0.81111695]
 [ 0.80665429]]
-------------
[[0.45032151]]


In [13]:
test_point = [1,1,0]
y = [1]
a1, a2, output = forward(test_point, model)

In [14]:
print(y)
print(output)

[1]
[[1]]
