# 线性回归

In [2]:
import tensorflow as tf
import timeit
import numpy as np
np.set_printoptions(suppress=True)
print(tf.__version__)

2.0.0


In [3]:
# 1 采样数据
data = []# 保存样本集的列表
for i in range(100): # 循环采样 100 个点
    # numpy.random.uniform(low,high,size)
    # 从一个均匀分布[low,high)中随机采样，默认返回一个值
    x = np.random.uniform(-10., 10.) # 随机采样输入 x
    # 采样高斯噪声，高斯分布
    eps = np.random.normal(0., 0.01)
    # 得到模型的输出
    y = 1.477 * x +0.089 + eps
    data.append([x, y])
data = np.array(data)
print(data)

# 2 计算误差
def mse(b, w, points):
    # 根据当前的 w,b 参数计算均方差损失
    totalError = 0
    for i in range(0, len(points)): # 循环迭代所有点
        x = points[i, 0] # 获得 i 号点的输入 x
        y = points[i, 1] # 获得 i 号点的输出 y
        # 计算差的平方，并累加
        totalError += (y - (w * x + b)) ** 2
    # 将累加的误差求平均，得到均方差
    return totalError / float(len(points))

# 3 计算梯度
def step_gradient(b_current, w_current, points, lr):
    # 计算误差函数在所有点上的导数，并更新 w,b
    b_gradient = 0
    w_gradient = 0
    M = float(len(points)) # 总样本数
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        # 误差函数对 b 的导数： grad_b = 2(wx+b-y)，参考公式(2.3)
        b_gradient += (2/M) * ((w_current * x + b_current) - y)
        # 误差函数对 w 的导数： grad_w = 2(wx+b-y)*x，参考公式(2.2)
        w_gradient += (2/M) * x * ((w_current * x + b_current) - y)
        # 根据梯度下降算法更新 w',b',其中 lr 为学习率
        new_b = b_current - (lr * b_gradient)
        new_w = w_current - (lr * w_gradient)
    return [new_b, new_w]

# 4 更新梯度
def gradient_descent(points, starting_b, starting_w, lr, num_iterations):
    # 循环更新 w,b 多次
    b = starting_b # b 的初始值
    w = starting_w # w 的初始值
    # 根据梯度下降算法更新多次
    for step in range(num_iterations):
        # 计算梯度并更新一次
        b, w = step_gradient(b, w, np.array(points), lr)
        loss = mse(b, w, points) # 计算当前的均方差，用于监控训练进度
        if step%50 == 0: # 打印误差和实时的 w,b 值
            print(f"iteration:{step}, loss:{loss}, w:{w}, b:{b}")
    return [b, w] # 返回最后一次的 w,b

def main():
    # 加载训练集数据，这些数据是通过真实模型添加观测误差采样得到的
    lr = 0.01  # 学习率
    initial_b = 0  # 初始化 b 为 0
    initial_w = 0  # 初始化 w 为 0
    num_iterations = 1000
    # 训练优化 1000 次，返回最优 w*,b*和训练 Loss 的下降过程
    [b, w] = gradient_descent(data, initial_b, initial_w, lr, num_iterations)
    loss = mse(b, w, data)  # 计算最优数值解 w,b 上的均方差
    print(f'Final loss:{loss}, w:{w}, b:{b}')


main()


[[  3.61731124   5.44229298]
 [  0.4469666    0.74651127]
 [ -3.05369619  -4.41306661]
 [  6.85336066  10.21644765]
 [  2.97286717   4.47826094]
 [  9.27060176  13.78770528]
 [  2.25811094   3.42140334]
 [ -7.13477033 -10.44970788]
 [ -1.13567212  -1.58090054]
 [ -4.7572576   -6.92150381]
 [ -9.13084524 -13.3921784 ]
 [ -5.28663554  -7.7071681 ]
 [ -8.36381767 -12.27126613]
 [ -8.51067749 -12.48356053]
 [  9.90437603  14.71592396]
 [  5.15896611   7.70926706]
 [ -2.77932503  -4.01915655]
 [  0.1082475    0.25421451]
 [  8.93338075  13.27321399]
 [  9.7564652   14.50931665]
 [ -1.93093733  -2.76250119]
 [  8.54642985  12.71080197]
 [ -3.19480533  -4.63557468]
 [  5.77920543   8.63080314]
 [  7.77236736  11.56485695]
 [ -4.98328218  -7.2712047 ]
 [ -0.52142056  -0.68950218]
 [  3.45735179   5.20381574]
 [ -9.31265421 -13.66748602]
 [  1.93228131   2.93255584]
 [  5.65602114   8.4433465 ]
 [  5.90531805   8.79133383]
 [ -4.71959775  -6.89278203]
 [ -1.47895967  -2.08922034]
 [ -7.49469881