# 线性回归

## 预处理

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
%matplotlib widget

In [2]:
pd.set_option("display.width", 100)
os.getcwd()
plt.rcParams["font.sans-serif"] = "SimHei"
plt.rcParams["axes.unicode_minus"] = False

In [3]:
X, Y = load_boston(return_X_y=True)
X = np.c_[X, np.ones(X.shape[0])]
train_x, test_x, train_y, test_y = train_test_split(X, Y, random_state=5)

## 损失函数
$$J_\theta = \frac{1}{2m} \sum_{i = 0}^{n} (x_i \theta - Y_i)^2$$

In [4]:
def costFunction(x, y, theta, h):
    loss = np.power(h(x, theta) - y, 2)
    return np.sum(loss) / (2 * len(y))

## 梯度下降
$$\theta = \theta - \alpha \frac{1}{m} x^T(x \theta - Y)$$

In [5]:
def gradientDescent(x, y, theta, h, alpha, iters):
    costs = list()
    for _ in range(iters):
        costs.append(costFunction(x, y, theta, h))
        theta = theta - alpha * x.T @ (h(x, theta) - y) / len(y)
    return costs, theta

## 模型训练

### 初始化参数

In [6]:
h = lambda x, theta : x @ theta # 线性函数
theta = np.full(shape=train_x.shape[1], fill_value=0.0238)  # 系数
iters = 1000000  # 迭代次数
alpha = 0.00000635    # 学习率
costFunction(train_x, train_y, theta, h)    # 预调用代价函数

66.80923156980715

### 调用函数并绘图

In [7]:
costs, thetas = gradientDescent(train_x, train_y, theta, h, alpha, iters)

In [None]:
plt.figure()
plt.title("损失函数")
plt.plot(range(iters), costs)
plt.xlabel("迭代次数", labelpad=10)
plt.ylabel("偏差", labelpad=15)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0, 0.5, '偏差')

In [None]:
model = LinearRegression().fit(train_x, train_y)
score = model.score(test_x, test_y)
print(f"泛化精度:{score:.2%}")
predict_y = test_x @ thetas
plt.figure()
plt.title("房价预测")
plt.plot(range(test_y.shape[0]), test_y, color='y', label='房价曲线', zorder=0)
plt.plot(range(test_x.shape[0]), predict_y, color='g', label='房价预测曲线A', zorder=1)
plt.plot(range(test_x.shape[0]), model.predict(test_x), color='r', label='房价预测曲线B', zorder=2)
plt.legend(frameon=False, fontsize=6, loc="upper left")

泛化精度:70.59%


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x21625239a60>