In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

# BP neural network - numpy

## BP structure

In [2]:
# n: sample size 
# d_in: input layer size
# d_out: output layer size
# h: hidden layer size
n, d_in, h, d_out = 64, 1000, 100, 10

## Dataset

In [3]:
# dataset generation by random
x = np.random.randn(n, d_in)
y = np.random.randn(n, d_out)

## Set parameters

In [4]:
# initialize weights
# weights from input to hidden layer（1000，100）
w1 = np.random.randn(d_in, h)

# weights from hidden to output layer（100，10）
w2 = np.random.randn(h, d_out)

# learning rate
learning_rate = 1e-6

## BPNN model training

In [5]:
# 100 epoches
for t in range(100):
    # forwards
    temp = x.dot(w1)
    temp_relu = np.maximum(temp, 0)  # relu activation, non-linearization
    y_pred = temp_relu.dot(w2)

    # loss function - MSE
    loss = np.square(y_pred - y).sum() / n
    print(t, loss)

    # backwards, calculate gradient of w1 and w2 based on loss
    # gradient w2
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = temp_relu.T.dot(grad_y_pred)
    
    # gradient w1
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp = grad_temp_relu.copy()
    grad_temp[temp<0] = 0
    grad_w1 = x.T.dot(grad_temp)

    # update weights based on w2, w1
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2
print(w1, w2)

0 397933.87595343933
1 338051.0456832171
2 344556.3390533223
3 368743.17120696
4 372235.99647407065
5 331870.3751145212
6 252914.75065101997
7 166287.1968761673
8 98515.28102259341
9 55958.70045992467
10 32381.78607170984
11 19928.164123192575
12 13319.780886964727
13 9648.825633281727
14 7456.852144414801
15 6033.2114125364515
16 5028.84607468582
17 4272.208967705802
18 3674.158336692523
19 3186.65473336981
20 2780.287538773452
21 2436.5976070958927
22 2142.926652812669
23 1890.579253158777
24 1672.3945792393513
25 1482.718955409651
26 1317.365941391883
27 1172.6356665257563
28 1045.6791343868388
29 934.023725778566
30 835.5971160498214
31 748.6880814294391
32 671.7724271799398
33 603.6197092929694
34 543.067046543691
35 489.2241001581833
36 441.2358037689901
37 398.37603105937177
38 360.04040737515106
39 325.7199499033096
40 294.93651433095806
41 267.3070364878941
42 242.47633731275477
43 220.1320458245737
44 200.01044261695796
45 181.87301724055186
46 165.50454933763154
47 150.71217

# Boston house price prediction - BPNN

In [6]:
from sklearn.datasets import load_boston
# load dataset
data = load_boston()

In [7]:
X_ = data['data']
y = data['target']

In [8]:
y = y.reshape(y.shape[0], 1)  # matrix transform for calulating

In [9]:
# data regularization
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

## NN model parameter initialization

In [10]:
# parameter initialization
n_features = X_.shape[1]  # feature dimensionality
n_hidden = 10

# from input to hidden layer
w1 = np.random.randn(n_features, n_hidden)
b1 = np.zeros(n_hidden)

# from hidden to output layer
w2 = np.random.randn(n_hidden, 1)
b2 = np.zeros(1)

In [11]:
learning_rate = 1e-6

## loss function

In [12]:
def Relu(x):
    result = np.where(x < 0, 0, x)
    return result

def MSE_loss(y, y_hat):
    return np.mean(np.square(y_hat - y))

def Linear(X, w1, b1):
    y = X.dot(w1) + b1
    return y

## BP model training (3 layers)

In [13]:
# 200 epoches
for t in range(200):
    # forwards
    l1 = Linear(X_, w1, b1)
    s1 = Relu(l1)
    y_pred = Linear(s1, w2, b2)

    # calculate loss
    loss = MSE_loss(y, y_pred)
    print(t, loss)

    # backwords
    # gradient of w2
    grad_y_pred = 2.0 * (y_pred - y)  # gradient of MSE
    grad_w2 = s1.T.dot(grad_y_pred)
    # gradient of w1  
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp_relu[l1 < 0] = 0
    grad_w1 = X_.T.dot(grad_temp_relu)

    # update weight w1, w2
    w1 = w1 - learning_rate * grad_w1
    w2 = w2 - learning_rate * grad_w2

print('w1={}\n w2 ={}'.format(w1, w2))

0 907.095527465675
1 871.5712141619886
2 838.7130243672201
3 808.052360297677
4 779.3419473489746
5 752.4439941508729
6 727.0404405620022
7 703.0695030477084
8 680.3443324760391
9 658.7936276935884
10 638.2397560150079
11 618.6023323541679
12 599.7865122434388
13 581.7410140318002
14 564.3846884016152
15 547.6495735550179
16 531.5003263032869
17 515.8562841722722
18 500.70143900466184
19 486.01455928645754
20 471.7302459379821
21 457.7660875082715
22 444.16104710273714
23 430.82197888783617
24 417.7272682272986
25 404.9364833035004
26 392.3837639978987
27 380.0506242372035
28 367.9587510229312
29 356.170328229946
30 344.6643407769872
31 333.4702371914083
32 322.5876609032884
33 312.01345327604696
34 301.71198775692767
35 291.6436750855724
36 281.890108911379
37 272.4154107225624
38 263.253809095432
39 254.421670984884
40 245.912858654256
41 237.72365849398997
42 229.8163236785779
43 222.21669412044815
44 214.96251657571855
45 208.02402540500375
46 201.37843209116096
47 194.994413787620