In [1]:
"""
    使用numpy实现Boston房价预测
    Step1 数据加载，来源sklearn中的load_boston
    Step2 数据规范化，将X 采用正态分布规范化
    Step3 初始化网络
    Step4 定义激活函数，损失函数，学习率 epoch
    Step5 循环执行：前向传播，计算损失函数，反向传播，参数更新
    Step6 输出训练好的model参数，即w1, w2, b1, b2
""" 
import numpy as np
from sklearn.datasets import load_boston
from sklearn.utils import shuffle, resample

In [2]:
# 数据加载
data = load_boston()
X_ = data['data']
y = data['target']

In [3]:
X_

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [4]:
y.shape

(506,)

In [5]:
# 将y转化为矩阵的形式
y = y.reshape(y.shape[0],1)

In [6]:
# 数据规范化 正态分布 
X_ = (X_ - np.mean(X_, axis=0)) / np.std(X_, axis=0)

In [7]:
"""
    初始化网络参数
    定义隐藏层维度，w1,b1,w2,b2
""" 
n_features = X_.shape[1]
n_hidden = 10
w1 = np.random.randn(n_features, n_hidden)
b1 = np.zeros(n_hidden)
w2 = np.random.randn(n_hidden, 1)
b2 = np.zeros(1)

In [8]:
X_.shape

(506, 13)

In [9]:
1e-6

1e-06

In [14]:
# relu函数
def Relu(x):
    result = np.where(x<0, 0, x)
    return result


# 定义损失函数
def MSE_loss(y, y_hat):
    return np.mean(np.square(y_hat - y))

# 设置学习率
learning_rate = 1e-6

# 定义线性回归函数
def Linear(X, w1, b1):
    y = X.dot(w1) + b1
    return y

# 5000次迭代
for t in range(5000):
    # 前向传播，计算预测值y (Linear->Relu->Linear)  bias = l
    l1 = Linear(X_, w1, b1) 
    
    s1 = Relu(l1)
    
    y_pred = Linear(s1, w2, b2)

    # 计算损失函数, 并输出每次epoch的loss
    loss = MSE_loss(y, y_pred)
    #print(t, loss)


    # 反向传播，基于loss 计算w1和w2的梯度
    grad_y_pred = 2.0 * (y_pred - y)   #MSE的梯度
    grad_w2 = s1.T.dot(grad_y_pred)
    grad_temp_relu = grad_y_pred.dot(w2.T)
    grad_temp_relu[l1<0] = 0
    grad_w1 = X_.T.dot(grad_temp_relu)
    
    # 更新权重, 对w1, w2, b1, b2进行更新
    w2 = w2 - learning_rate * grad_w2
    w1 = w1 - learning_rate * grad_w1



# 得到最终的w1, w2
#print('w1={} \n w2={}'.format(w1, w2))

0 10.411393901567473
1 10.411231179162872
2 10.411016315616708
3 10.410848724498
4 10.41062738313578
5 10.410485295495455
6 10.410238094035654
7 10.410087690450522
8 10.409877421720424
9 10.40968270570803
10 10.409487476366568
11 10.409298325539885
12 10.409084780960594
13 10.408909908035309
14 10.408716332907739
15 10.408488066117815
16 10.408342423301399
17 10.408117534871485
18 10.407931604659673
19 10.407717829302294
20 10.407565813185462
21 10.407310860311018
22 10.407180365591278
23 10.406942858093734
24 10.406762536696595
25 10.406571815366034
26 10.406382196664602
27 10.406156131634885
28 10.406004747620036
29 10.40579053441618
30 10.405580297797174
31 10.405417688189031
32 10.405195272626226
33 10.405011443101424
34 10.404814131750575
35 10.404631723348578
36 10.404407285605332
37 10.404272105534751
38 10.404012528712402
39 10.40387136416092
40 10.403649060427592
41 10.403470549402147
42 10.40325866266685
43 10.403088085202802
44 10.402861047272902
45 10.40269862742833
46 10.4

Thinking1:为什么我们需要在神经网络中使用非线性激活函数
$$ 为了拟合非线性数据 $$

In [None]:
Thinking2:逻辑回归Logistic Loss是什么，解决分类问题 or 回归问题？
cross entropy
分类问题