In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib widget

In [2]:
pd.set_option('display.unicode.east_asian_width', True)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.width', 20)
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams["axes.unicode_minus"] = False

In [3]:
# 读取数据
iris = pd.read_csv('data/archive/Iris.csv', header=0, index_col='Id')
iris.columns = ['sl', 'sw', 'pl', 'pw', 'sp']
sp_code = pd.factorize(iris['sp'])
iris['sp'] = sp_code[0]
iris.head(100)

Unnamed: 0_level_0,sl,sw,pl,pw,sp
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,5.1,3.5,1.4,0.2,0
2,4.9,3.0,1.4,0.2,0
3,4.7,3.2,1.3,0.2,0
4,4.6,3.1,1.5,0.2,0
5,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
96,5.7,3.0,4.2,1.2,1
97,5.7,2.9,4.2,1.3,1
98,6.2,2.9,4.3,1.3,1
99,5.1,2.5,3.0,1.1,1


### 损失函数
$$J_\theta = \frac{1}{2m} \sum_{i=1}^n (x_i \theta - y_i)^2$$

In [4]:
def costFunction(x, y, theta, h):
    cost = (h(x, theta) - y)**2
    return np.sum(cost) / (len(x) * 2)

### 梯度下降
$$
\theta = \theta - \frac{\alpha}{m} x^T(x \theta - y)
$$

In [5]:
def gradientDescent(x, y, theta, h, alpha, iters):
    loss = list()
    for _ in range(iters):
        loss.append(costFunction(x, y, theta, h))
        theta = theta - x.T @ (h(x, theta) - y) * (alpha / len(x))
    return loss, theta

### 初始化参数

In [6]:
x = iris["pl"].values
y = iris["pw"].values
x = np.c_[x, np.ones(len(x))]
h = lambda x, theta : x @ theta
theta = np.zeros(x.shape[1])
alpha = 0.001
iters = 50000
costFunction(x, y, theta, h)

1.0076666666666665

In [7]:
plt.figure(num="损失函数")
loss, thetas = gradientDescent(x, y, theta, h, alpha, iters)
plt.plot(range(len(loss)), loss, label=f"学习率:{alpha}")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
thetas0 = np.linalg.inv(x.T @ x) @ x.T @ y

In [None]:
plt.figure()
plt.title('pl-pw散点图', color='r', fontsize=14)
plt.scatter(iris['pl'], iris['pw'], c=iris['sp'], cmap=plt.cm.flag_r)
plt.xlabel("pl", fontsize=13)
plt.ylabel("pw", fontsize=13)
label = ["梯度下降", "正规方程"]
for i, t in enumerate([thetas, thetas0]):
    y_p = h(x, t)
    xy = np.vstack((x[:, 0], y_p)).T
    xy = xy[np.argsort(xy[:, 0])]
    plt.plot(xy[:, 0], xy[:, 1], label=label[i])
plt.legend()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.legend.Legend at 0x1e2bca63ee0>

In [None]:
def height(x, y):
    return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)

In [None]:
x = np.linspace(-3, 3, 300)
y = np.linspace(-3, 3, 300)
X, Y = np.meshgrid(x, y)
plt.figure()
# 为等高线填充颜色 10表示按照高度分成10层
plt.contourf(X, Y, height(X, Y), 10, alpha=0.75, cmap=plt.cm.hot)
C = plt.contour(X, Y, height(X, Y), 10, colors='black')
# 绘制等高线标签
plt.clabel(C, inline=True, fontsize=10)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<a list of 9 text.Text objects>