In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# BP neural network - numpy

## BP structure

In [2]:
# n: sample size 
# d_in: input layer size
# d_out: output layer size
# h: hidden layer size
n, d_in, h, d_out = 64, 1000, 100, 10

## Dataset

In [3]:
# dataset generation by random
x = np.random.randn(n, d_in)
y = np.random.randn(n, d_out)

## Set parameters

In [4]:
# initialize weights
# weights from input to hidden layer（1000，100）
w1 = np.random.randn(d_in, h)

# weights from hidden to output layer（100，10）
w2 = np.random.randn(h, d_out)

# learning rate
learning_rate = 1e-6

## BPNN model training

In [5]:
# 100 epoches
for t in range(100):
    # forwards
    temp = x.dot(w1)
    temp_relu = np.maximum(temp, 0)  # relu activation, non-linearization
    y_pred = temp_relu.dot(w2)

    # loss function - MSE
    loss = np.square(y_pred - y).sum() / n
    print(t, loss)

    # backwards, calculate gradient of w1 and w2 based on loss
    # gradient w2
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = temp_relu.T.dot(grad_y_pred)
    
    # gradient w1
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp = grad_temp_relu.copy()
    grad_temp[temp<0] = 0
    grad_w1 = x.T.dot(grad_temp)

    # update weights based on w2, w1
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
print(w1, w2)

0 612473.0464094316
1 527570.3293214465
2 463622.9818347979
3 365090.20080773684
4 244727.7496867843
5 145342.5682949387
6 81952.55453747572
7 47877.69488775183
8 30326.601688909104
9 21071.081121765295
10 15785.19695869549
11 12459.273671767947
12 10170.032587660286
13 8480.463055376806
14 7170.3334272756765
15 6122.084452755678
16 5266.316886872414
17 4558.734667863551
18 3968.852395812167
19 3471.9388919676285
20 3050.9306593331235
21 2691.9453510842104
22 2383.768657787772
23 2117.9163233806466
24 1887.57775192046
25 1687.4761262672919
26 1512.6947579283242
27 1359.492898368278
28 1224.7849983201286
29 1105.882144472496
30 1000.7139829700245
31 907.470637942827
32 824.49461296252
33 750.3994382323934
34 684.1429481282348
35 624.750871290227
36 571.3536249166192
37 523.249842883954
38 479.8431716118615
39 440.5946965258881
40 405.03535155608307
41 372.78092899229534
42 343.4638140566237
43 316.7817855947699
44 292.4531906873727
45 270.24229664092195
46 249.94636303768357
47 231.3575

# Boston house price prediction - BPNN

In [6]:
from sklearn.datasets import load_boston
# load dataset
data = load_boston()

In [7]:
X_ = data['data']
y = data['target']

In [8]:
y = y.reshape(y.shape[0], 1)  # matrix transform for calulating

In [9]:
# data regularization
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

## NN model parameter initialization

In [10]:
# parameter initialization
n_features = X_.shape[1]  # feature dimensionality
n_hidden = 10

# from input to hidden layer
w1 = np.random.randn(n_features, n_hidden)
b1 = np.zeros(n_hidden)

# from hidden to output layer
w2 = np.random.randn(n_hidden, 1)
b2 = np.zeros(1)

In [11]:
learning_rate = 1e-6

## loss function

In [12]:
def Relu(x):
    result = np.where(x < 0, 0, x)
    return result

def MSE_loss(y, y_hat):
    return np.mean(np.square(y_hat - y))

def Linear(X, w1, b1):
    y = X.dot(w1) + b1
    return y

## BP model training (3 layers)

In [13]:
# 200 epoches
for t in range(200):
    # forwards
    l1 = Linear(X_, w1, b1)
    s1 = Relu(l1)
    y_pred = Linear(s1, w2, b2)

    # calculate loss
    loss = MSE_loss(y, y_pred)
    print(t, loss)

    # backwords
    # gradient of w2
    grad_y_pred = 2.0 * (y_pred - y)  # gradient of MSE
    grad_w2 = s1.T.dot(grad_y_pred)
    # gradient of w1  
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp_relu[l1 < 0] = 0
    grad_w1 = X_.T.dot(grad_temp_relu)

    # update weight w1, w2
    w1 = w1 - learning_rate * grad_w1
    w2 = w2 - learning_rate * grad_w2

print('w1={}\n w2 ={}'.format(w1, w2))

0 427.5431428238139
1 418.33981250886643
2 409.42910832675955
3 400.7639632512896
4 392.3167379055646
5 384.0535827095942
6 375.94481974483335
7 367.97530247926954
8 360.1208908437593
9 352.35937864147724
10 344.67943852176955
11 337.05922115162906
12 329.46489798058684
13 321.91251382817075
14 314.41921771963615
15 306.987308017617
16 299.6018834023191
17 292.27101645037504
18 284.97987023504567
19 277.72644611524044
20 270.5080566744426
21 263.32791634497823
22 256.21118506077113
23 249.16259787135303
24 242.19835019698607
25 235.32195790351022
26 228.52529917452622
27 221.7970012389598
28 215.18990980112972
29 208.7156843775026
30 202.38012448529173
31 196.18491344691026
32 190.1294402688165
33 184.21870484408635
34 178.4453987318382
35 172.81593885825126
36 167.3554864549168
37 162.06769849757873
38 156.97166081418368
39 152.06777681962933
40 147.3486206019486
41 142.80895482820128
42 138.45583959629843
43 134.2905378536847
44 130.31091335436741
45 126.50980241766416
46 122.8818438