In [1]:
import numpy as np

def generate_linear(n: int = 100):
    pts = np.random.uniform(0, 1, (n, 2))
    inputs, labels = [], []
    for pt in pts:
        inputs.append([pt[0], pt[1]])
        if pt[0] > pt[1]:
            labels.append(0)
        else:
            labels.append(1)
    return np.array(inputs), np.array(labels).reshape(n, 1)

def generate_xor_easy(n: int = 11):
    inputs, labels = [], []
    for i in range(n):
        inputs.append([0.1 * i, 0.1 * i])
        labels.append(0)
        if 0.1 * i == 0.5:
            continue
        inputs.append([0.1 * i, 1 - 0.1 * i])
        labels.append(1)
    return np.array(inputs), np.array(labels).reshape(21, 1)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def derivative_sigmoid(x):
    return np.multiply(x, 1.0 - x)

def leaky_ReLU(x):
    return np.where(x > 0, x, 0.01 * x)

def derivative_leaky_ReLU(x):
    return np.where(x > 0, 1, 0.01)

def show_result(x, y, pred_y):
    import matplotlib.pyplot as plt
    plt.subplot(1,2,1)
    plt.title('Ground truth', fontsize=18)
    for i in range(x.shape[0]): # x.shape = (100, 2)
        if y[i] == 0:
            plt.plot(x[i][0], x[i][1], 'ro')
        else:
            plt.plot(x[i][0], x[i][1], 'bo')    
    plt.subplot(1,2,2)
    plt.title('Predict result', fontsize=18)
    for i in range(x.shape[0]):
        if pred_y[i] == 0:
            plt.plot(x[i][0], x[i][1], 'ro')
        else:
            plt.plot(x[i][0], x[i][1], 'bo')     
    plt.show()

def show_loss(loss):
    import matplotlib.pyplot as plt
    plt.xlabel("epoch")
    plt.ylabel("Loss")
    plt.plot(loss)

In [3]:
# sigmoid activation

np.random.seed(5)
# x_train, y_train = generate_linear(n = 100) 
x_train, y_train = generate_xor_easy()

layer_num = [2, 10, 10, 1]
epoch = 2000
learning_rate = 0.05

W1 = np.random.randn(layer_num[0], layer_num[1])
W2 = np.random.randn(layer_num[1], layer_num[2])
W3 = np.random.randn(layer_num[2], layer_num[3])
loss = []

# train

for i in range(1,epoch+1):

    # forward
    H1 = np.dot(x_train, W1)
    Z1 = sigmoid(H1)
    H2 = np.dot(Z1, W2)
    Z2 = sigmoid(H2)
    H3 = np.dot(Z2, W3)
    y_pred = sigmoid(H3)

    # calculate loss
    loss.append(np.mean((y_pred - y_train) ** 2))
    if i % 100 == 0:
        print('epoch ' + str(i) + ' loss : ' + str(round(loss[i-1], 8)))

    # backward
    cache_y = derivative_sigmoid(y_pred) * 2 * (y_pred - y_train)
    dW3 = np.dot(Z2.T, cache_y)
    temp1 = np.dot(cache_y, W3.T)
    cache_layer2 = derivative_sigmoid(Z2) * temp1
    dW2 = np.dot(Z1.T, cache_layer2)
    temp2 = np.dot(cache_layer2, W2.T)
    cache_layer1 = derivative_sigmoid(Z1) * temp2
    dW1 = np.dot(x_train.T, cache_layer1)

    # update weight
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    W3 -= learning_rate * dW3

# test

H1 = np.dot(x_train, W1)
Z1 = sigmoid(H1)
H2 = np.dot(Z1, W2)
Z2 = sigmoid(H2)
H3 = np.dot(Z2, W3)
y_pred = sigmoid(H3)
print()
for i in range(y_train.shape[0]):
    print('Iter '+str(i)+'| Ground truth: ' + str(y_train[i][0]) + ' | prdiction: ' + str(round(y_pred[i][0], 8)))

test_loss = np.mean((y_pred - y_train) ** 2)
y_pred = np.round(y_pred)
acc = np.sum((y_train == y_pred) / y_train.shape[0]) * 100
print('loss = ' + str(test_loss) + ' , accuracy = ' + str(round(acc)) + '%')

# show_result(x_train, y_train, y_pred)
# show_loss(loss)

epoch 100 loss : 0.25042799
epoch 200 loss : 0.24618281
epoch 300 loss : 0.24159749
epoch 400 loss : 0.23351941
epoch 500 loss : 0.21586073
epoch 600 loss : 0.18397518
epoch 700 loss : 0.14309598
epoch 800 loss : 0.10716997
epoch 900 loss : 0.08249298
epoch 1000 loss : 0.06626331
epoch 1100 loss : 0.05510333
epoch 1200 loss : 0.04690807
epoch 1300 loss : 0.04047374
epoch 1400 loss : 0.03513331
epoch 1500 loss : 0.03052624
epoch 1600 loss : 0.0264618
epoch 1700 loss : 0.02284576
epoch 1800 loss : 0.01963733
epoch 1900 loss : 0.0168192
epoch 2000 loss : 0.01437722

Iter 0| Ground truth: 0 | prdiction: 0.03631923
Iter 1| Ground truth: 1 | prdiction: 0.97786805
Iter 2| Ground truth: 0 | prdiction: 0.03342104
Iter 3| Ground truth: 1 | prdiction: 0.99090016
Iter 4| Ground truth: 0 | prdiction: 0.04540537
Iter 5| Ground truth: 1 | prdiction: 0.99441914
Iter 6| Ground truth: 0 | prdiction: 0.1048517
Iter 7| Ground truth: 1 | prdiction: 0.98784688
Iter 8| Ground truth: 0 | prdiction: 0.2063212


In [2]:
# without activation

np.random.seed(5)
x_train, y_train = generate_linear(n = 100) 
# x_train, y_train = generate_xor_easy()

layer_num = [2, 10, 10, 1]
epoch = 2000
learning_rate = 0.00025

W1 = np.random.randn(layer_num[0], layer_num[1])
W2 = np.random.randn(layer_num[1], layer_num[2])
W3 = np.random.randn(layer_num[2], layer_num[3])
loss = []

# train

for i in range(1,epoch+1):

    # forward
    H1 = np.dot(x_train, W1)
    H2 = np.dot(H1, W2)
    y_pred = np.dot(H2, W3)

    # calculate loss
    loss.append(np.mean((y_pred - y_train) ** 2))
    if i % 100 == 0 :
        print('epoch ' + str(i) + ' loss : ' + str(loss[i-1]))

    # backward
    dW3 = np.dot(H2.T, (y_pred - y_train))
    cache_layer2 = np.dot((y_pred - y_train), W3.T)
    dW2 = np.dot(H1.T, cache_layer2)
    cache_layer1 = np.dot(cache_layer2, W2.T)
    dW1 = np.dot(x_train.T, cache_layer1)

    # update weight
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    W3 -= learning_rate * dW3

# test

H1 = np.dot(x_train, W1)
H2 = np.dot(H1, W2)
y_pred = np.dot(H2, W3)
print()
for i in range(y_train.shape[0]) :
    print('Iter '+str(i)+'| Ground truth: ' + str(y_train[i][0]) + ' | prediction: ' + str(y_pred[i][0]))

test_loss = np.mean((y_pred - y_train) ** 2)
y_pred = np.round(y_pred)
acc = np.sum((y_train == y_pred) / y_train.shape[0]) * 100
print('loss = ' + str(test_loss) + ' , accuracy = ' + str(round(acc)) + '%')

# show_result(x_train, y_train, y_pred)
# show_loss(loss)

epoch 100 loss : 0.12887728392156628
epoch 200 loss : 0.12887728392156414
epoch 300 loss : 0.1288772839215642
epoch 400 loss : 0.1288772839215642
epoch 500 loss : 0.1288772839215642
epoch 600 loss : 0.1288772839215642
epoch 700 loss : 0.1288772839215642
epoch 800 loss : 0.1288772839215642
epoch 900 loss : 0.1288772839215642
epoch 1000 loss : 0.1288772839215642
epoch 1100 loss : 0.1288772839215642
epoch 1200 loss : 0.1288772839215642
epoch 1300 loss : 0.1288772839215642
epoch 1400 loss : 0.1288772839215642
epoch 1500 loss : 0.1288772839215642
epoch 1600 loss : 0.1288772839215642
epoch 1700 loss : 0.1288772839215642
epoch 1800 loss : 0.1288772839215642
epoch 1900 loss : 0.1288772839215642
epoch 2000 loss : 0.1288772839215642

Iter 0| Ground truth: 1 | prediction: 1.1605058740170306
Iter 1| Ground truth: 1 | prediction: 0.8640129379820698
Iter 2| Ground truth: 0 | prediction: 0.26643786160828226
Iter 3| Ground truth: 0 | prediction: -0.1783674808585788
Iter 4| Ground truth: 1 | prediction

In [3]:
# momentum optimizer

np.random.seed(5)
x_train, y_train = generate_linear(n = 100) 
# x_train, y_train = generate_xor_easy()

layer_num = [2, 10, 10, 1]
epoch = 2000
learning_rate = 0.05

W1 = np.random.randn(layer_num[0], layer_num[1])
W2 = np.random.randn(layer_num[1], layer_num[2])
W3 = np.random.randn(layer_num[2], layer_num[3])
v1 = 0
v2 = 0
v3 = 0
beta = 0.9
loss = []

# train

for i in range(1,epoch+1):

    # forward
    H1 = np.dot(x_train, W1)
    Z1 = sigmoid(H1)
    H2 = np.dot(Z1, W2)
    Z2 = sigmoid(H2)
    H3 = np.dot(Z2, W3)
    y_pred = sigmoid(H3)

    # calculate loss
    loss.append(np.mean((y_pred - y_train) ** 2))
    if i % 100 == 0:
        print('epoch ' + str(i) + ' loss : ' + str(round(loss[i-1], 8)))

    # backward
    cache_y = derivative_sigmoid(y_pred) * 2 * (y_pred - y_train)
    dW3 = np.dot(Z2.T, cache_y)
    temp1 = np.dot(cache_y, W3.T)
    cache_layer2 = derivative_sigmoid(Z2) * temp1
    dW2 = np.dot(Z1.T, cache_layer2)
    temp2 = np.dot(cache_layer2, W2.T)
    cache_layer1 = derivative_sigmoid(Z1) * temp2
    dW1 = np.dot(x_train.T, cache_layer1)

    # update weight
    v1 = beta * v1 - learning_rate * dW1
    v2 = beta * v2 - learning_rate * dW2
    v3 = beta * v3 - learning_rate * dW3
    W1 += v1
    W2 += v2
    W3 += v3

# test

H1 = np.dot(x_train, W1)
Z1 = sigmoid(H1)
H2 = np.dot(Z1, W2)
Z2 = sigmoid(H2)
H3 = np.dot(Z2, W3)
y_pred = sigmoid(H3)
print()
for i in range(y_train.shape[0]):
    print('Iter '+str(i)+'| Ground truth: ' + str(y_train[i][0]) + ' | prdiction: ' + str(round(y_pred[i][0], 8)))

test_loss = np.mean((y_pred - y_train) ** 2)
y_pred = np.round(y_pred)
acc = np.sum((y_train == y_pred) / y_train.shape[0]) * 100
print('loss = ' + str(test_loss) + ' , accuracy = ' + str(round(acc)) + '%')

# show_result(x_train, y_train, y_pred)
# show_loss(loss)

epoch 100 loss : 0.00144906
epoch 200 loss : 0.00062289
epoch 300 loss : 0.00037615
epoch 400 loss : 0.00025906
epoch 500 loss : 0.00019284
epoch 600 loss : 0.00015116
epoch 700 loss : 0.00012293
epoch 800 loss : 0.00010275
epoch 900 loss : 8.772e-05
epoch 1000 loss : 7.616e-05
epoch 1100 loss : 6.704e-05
epoch 1200 loss : 5.968e-05
epoch 1300 loss : 5.364e-05
epoch 1400 loss : 4.861e-05
epoch 1500 loss : 4.436e-05
epoch 1600 loss : 4.072e-05
epoch 1700 loss : 3.759e-05
epoch 1800 loss : 3.486e-05
epoch 1900 loss : 3.246e-05
epoch 2000 loss : 3.035e-05

Iter 0| Ground truth: 1 | prdiction: 1.0
Iter 1| Ground truth: 1 | prdiction: 1.0
Iter 2| Ground truth: 1 | prdiction: 1.0
Iter 3| Ground truth: 0 | prdiction: 0.0
Iter 4| Ground truth: 0 | prdiction: 1e-08
Iter 5| Ground truth: 1 | prdiction: 1.0
Iter 6| Ground truth: 0 | prdiction: 0.0
Iter 7| Ground truth: 0 | prdiction: 0.0
Iter 8| Ground truth: 0 | prdiction: 0.0
Iter 9| Ground truth: 0 | prdiction: 4.56e-06
Iter 10| Ground truth: 

In [37]:
# leaky_ReLU activation

np.random.seed(5)
x_train, y_train = generate_linear(n = 100) 
# x_train, y_train = generate_xor_easy()

layer_num = [2, 10, 10, 1]
epoch = 2000
learning_rate = 0.01

W1 = np.random.randn(layer_num[0], layer_num[1])
W2 = np.random.randn(layer_num[1], layer_num[2])
W3 = np.random.randn(layer_num[2], layer_num[3])
loss = []

# train

for i in range(1,epoch+1):

    # forward
    H1 = np.dot(x_train, W1)
    Z1 = leaky_ReLU(H1)
    H2 = np.dot(Z1, W2)
    Z2 = leaky_ReLU(H2)
    H3 = np.dot(Z2, W3)
    y_pred = leaky_ReLU(H3)

    # calculate loss
    loss.append(np.mean((y_pred - y_train) ** 2))
    if i % 100 == 0:
        print('epoch ' + str(i) + ' loss : ' + str(loss[i-1]))

    # backward
    temp1 = derivative_leaky_ReLU(y_pred) * (y_pred - y_train)
    dW3 = np.dot(Z2.T, temp1)
    cache_layer2 = np.dot(temp1, W3.T)
    temp2 = derivative_leaky_ReLU(Z2) * cache_layer2
    dW2 = np.dot(Z1.T, temp2)
    cache_layer1 = np.dot(temp2, W2.T)
    dW1 = np.dot(x_train.T, cache_layer1)

    # update weight
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    W3 -= learning_rate * dW3
# test

H1 = np.dot(x_train, W1)
Z1 = leaky_ReLU(H1)
H2 = np.dot(Z1, W2)
Z2 = leaky_ReLU(H2)
H3 = np.dot(Z2, W3)
y_pred = leaky_ReLU(H3)
print()
for i in range(y_train.shape[0]):
    print('Iter '+str(i)+'| Ground truth: ' + str(y_train[i][0]) + ' | prdiction: ' + str(round(y_pred[i][0], 8)))

test_loss = np.mean((y_pred - y_train) ** 2)
y_pred = np.round(y_pred)
acc = np.sum((y_train == y_pred) / y_train.shape[0]) * 100
print('loss = ' + str(test_loss) + ' , accuracy = ' + str(round(acc)) + '%')

# show_result(x_train, y_train, y_pred)
# show_loss(loss)

epoch 100 loss : 0.09104941890115141
epoch 200 loss : 0.08721681743385858
epoch 300 loss : 0.08534520280407179
epoch 400 loss : 0.08607306280302195
epoch 500 loss : 0.09141405262615404
epoch 600 loss : 0.08721693692219007
epoch 700 loss : 0.08517860759370142
epoch 800 loss : 0.08391944342643778
epoch 900 loss : 0.08374030612935826
epoch 1000 loss : 0.08278563425321935
epoch 1100 loss : 0.08321132497914555
epoch 1200 loss : 0.08362352998249384
epoch 1300 loss : 0.08403783172337834
epoch 1400 loss : 0.08838347073859207
epoch 1500 loss : 0.08776495371868022
epoch 1600 loss : 0.09670664651718726
epoch 1700 loss : 0.09485184099483865
epoch 1800 loss : 0.09055051163136861
epoch 1900 loss : 0.09154469186811287
epoch 2000 loss : 0.08778532557749141
[[ 2.78097077e-05 -6.53816744e-06 -2.47127106e-03 -8.56067717e-05
   3.53443594e-03  2.66629394e-05 -1.25253352e-04  3.52447694e-03
  -2.75397240e-05  1.41701471e-05]
 [-9.65997450e-03 -9.53263996e-05 -3.66046004e-02 -3.90965340e-04
   3.94494930e-0