In [1]:
# 代码自动完成提示 tab 键
%config IPCompleter.greedy=True

# 一个 cell 可以多行输出
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [6]:
# 一元一次线性回归
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

# 通过 reshape 变成的 每个元素都是一个数组，然后组成一个数组
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [8]:
# 生成模型，并用数据做 fit，这样模型中的参数就有了
model = LinearRegression()
model.fit(x, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [14]:
# r-square, intercept, coef 
model.score(x, y)
model.intercept_
model.coef_

0.7158756137479542

5.633333333333329

array([0.54])

In [17]:
# 对新的数据，做预测
# 注意：这里输入，必须还是 数组的数组
x_new = np.arange(5).reshape((-1, 1))
x_new

model.predict(x_new)

array([[0],
       [1],
       [2],
       [3],
       [4]])

array([5.63333333, 6.17333333, 6.71333333, 7.25333333, 7.79333333])

In [18]:
# 多元回归
# 和一元回归一样， 不同：输入 x，内部的数组需要有 2 个数，和元数对应
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

# 构建模型，并且 fit 模型，
model = LinearRegression().fit(x, y)

# 检查模型参数
print('coefficient of determination:', model.score(x, y))
print('intercept:', model.intercept_)
print('slope:', model.coef_)

# 预测：对比输入数据
# 由于是 二元回归，如果要画图，那就是三维空间中的一条线；一般多元回归都不再画图了
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')

# 预测：使用新数据
x_new = np.arange(10).reshape((-1, 2))
print(x_new)
y_new = model.predict(x_new)
print(y_new)

coefficient of determination: 0.8615939258756776
intercept: 5.52257927519819
slope: [0.44706965 0.25502548]
predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[ 5.77760476  7.18179502  8.58598528  9.99017554 11.3943658 ]


In [20]:
# 多项式回归
# 只需要把输入变量，做多项式处理即可；
from sklearn.preprocessing import PolynomialFeatures

In [21]:
# Step 2a: Provide data
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

# 这里 degree=2 是二次多项式，并且输入变量 x 也是二元；
# Step 2b: Transform input data
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

# Step 3: Create a model and fit it
model = LinearRegression().fit(x_, y)

# Step 4: Get results
r_sq = model.score(x_, y)
intercept, coefficients = model.intercept_, model.coef_

# Step 5: Predict
y_pred = model.predict(x_)

print('coefficient of determination:', r_sq)
print('intercept:', intercept)

# 这里的系数有 6 个：因为是二元，1 个常数项，2 个一次项，3 个二次项（包含一个交叉项）
# 𝑓(𝑥₁, 𝑥₂) = 𝑏₀ + 𝑏₁𝑥₁ + 𝑏₂𝑥₂ + 𝑏₃𝑥₁² + 𝑏₄𝑥₁𝑥₂ + 𝑏₅𝑥₂²
print('coefficients:', coefficients, sep='\n')

print('predicted response:', y_pred, sep='\n')

coefficient of determination: 0.9453701449127822
intercept: 0.8430556452395876
coefficients:
[ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]
predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
 39.05631386 41.92339046]
