In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# BP neural network modelling

## BP structure

In [2]:
# n: sample size 
# d_in: input layer size
# d_out: output layer size
# h: hidden layer size
n, d_in, h, d_out = 64, 1000, 100, 10

## Dataset

In [3]:
# dataset generation by random
x = np.random.randn(n, d_in)
y = np.random.randn(n, d_out)

## Set parameters

In [4]:
# initialize weights
# weights from input to hidden layer（1000，100）
w1 = np.random.randn(d_in, h)

# weights from hidden to output layer（100，10）
w2 = np.random.randn(h, d_out)

# learning rate
learning_rate = 1e-6

## BPNN model training

In [5]:
# 100 epoches
for t in range(100):
    # forwards
    temp = x.dot(w1)
    temp_relu = np.maximum(temp, 0)  # relu activation, non-linearization
    y_pred = temp_relu.dot(w2)

    # loss function- MSE
    loss = np.square(y_pred - y).sum() / n
    print(t, loss)

    # backwards, calculate gradient of w1 and w2 based on loss
    # gradient w2
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = temp_relu.T.dot(grad_y_pred)
    
    # gradient w1
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp = grad_temp_relu.copy()
    grad_temp[temp<0] = 0
    grad_w1 = x.T.dot(grad_temp)

    # update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
print(w1, w2)
print(w1.shape)

0 545324.8243799373
1 546238.5522806306
2 613728.5977766882
3 625711.5956054057
4 504998.94364692195
5 305638.876233408
6 147220.8538468314
7 66788.82640243528
8 34211.979843394154
9 21306.913694050018
10 15499.957808534049
11 12299.872959685908
12 10180.80501712724
13 8604.484117088052
14 7358.1353133551975
15 6342.662070953324
16 5501.313297226758
17 4796.2059788253555
18 4200.552560833291
19 3694.314679354323
20 3261.235347639964
21 2888.80226717174
22 2566.91309901303
23 2287.4168565520067
24 2043.815852335003
25 1830.7456902666959
26 1643.6247785230819
27 1478.8525128949586
28 1333.332154489652
29 1204.358312357848
30 1089.742369393236
31 987.6137777964844
32 896.3954075959108
33 814.8016213475667
34 741.6041510754267
35 675.8616435772528
36 616.7028944786165
37 563.3581430310145
38 515.1817460973828
39 471.6241303082496
40 432.2446305322959
41 396.52615677980174
42 364.0874087316904
43 334.5925626559276
44 307.72261324682285
45 283.23665830903246
46 260.8982738787537
47 240.47715

# Boston house price prediction - BPNN

In [6]:
from sklearn.datasets import load_boston
# load dataset
data = load_boston()

In [7]:
X_ = data['data']
y = data['target']

In [8]:
y = y.reshape(y.shape[0], 1)  # increase dimensionality

In [9]:
# data regularization
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

## NN model parameter initialization

In [10]:
# parameter initialization
n_features = X_.shape[1]
# print(n_features)
n_hidden = 10

w1 = np.random.randn(n_features, n_hidden)
b1 = np.zeros(n_hidden)

w2 = np.random.randn(n_hidden, 1)
b2 = np.zeros(1)

In [11]:
learning_rate = 1e-6

## loss function

In [12]:
def Relu(x):
    result = np.where(x < 0, 0, x)
    return result

def MSE_loss(y, y_hat):
    return np.mean(np.square(y_hat - y))

def Linear(X, w1, b1):
    y = X.dot(w1) + b1
    return y

## BP model training (simple 3 layers)

In [13]:
# 200 epoches
for t in range(200):
    # forwards
    l1 = Linear(X_, w1, b1)
    s1 = Relu(l1)
    y_pred = Linear(s1, w2, b2)

    # calculate loss
    loss = MSE_loss(y, y_pred)
    print(t, loss)

    # backwords
    grad_y_pred = 2.0 * (y_pred - y)
    # weight w2
    grad_w2 = s1.T.dot(grad_y_pred)
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp_relu[l1 < 0] = 0
    # weight w1
    grad_w1 = X_.T.dot(grad_temp_relu)

    # update weight
    w1 = w1 - learning_rate * grad_w1
    w2 = w2 - learning_rate * grad_w2

print('w1={}\n w2 ={}'.format(w1, w2))

0 443.1215261204288
1 429.7307010759248
2 416.7739580105401
3 404.26530589403256
4 392.17930265767177
5 380.4742874668256
6 369.1043390616647
7 358.0934724252768
8 347.41071299254764
9 337.0411877609077
10 327.0007552457235
11 317.2902056874125
12 307.8778569826649
13 298.7571044909153
14 289.9498183766416
15 281.4383053925285
16 273.2335970371405
17 265.31542429582254
18 257.65769676470956
19 250.26603740878045
20 243.14316014042365
21 236.2944815162948
22 229.72804897851202
23 223.43811481291598
24 217.41888538574887
25 211.64937849897277
26 206.1245867159191
27 200.8507833376332
28 195.81852566256057
29 191.0007743109316
30 186.4114529929014
31 182.0424941654117
32 177.88026164159214
33 173.9199510871657
34 170.15216544220993
35 166.5517232634403
36 163.1136769599985
37 159.83374233260707
38 156.71194549142677
39 153.73922654467736
40 150.90028047644893
41 148.18389690032956
42 145.59062346105816
43 143.1139222430553
44 140.74572444516951
45 138.473632948921
46 136.29529460623684
47