# 线性回归API
- sklearn.linear_model.LinearRegression()
    - LinearRegression.coef_：回归系数

- 导入模块

In [9]:
from sklearn.linear_model import LinearRegression

- 构造数据集

In [2]:
x = [[80, 86],
[82, 80],
[85, 78],
[90, 90],
[86, 82],
[82, 90],
[78, 80],
[92, 94]]
y = [84.2, 80.6, 80.1, 90, 83.2, 87.6, 79.4, 93.4]

- 机器学习-- 模型训练

In [5]:
# 实例化API
estimator = LinearRegression()
# 使用fit方法进行训练
estimator.fit(x,y)

# 即是权重参数 w
estimator.coef_
 
estimator.predict([[100, 80]])

array([86.])

# sklearn.linear_model.LinearRegression(fit_intercept=True)
- 通过正规方程优化
- fit_intercept：是否计算偏置-
- LinearRegression.coef_：回归系数
- LinearRegression.intercept_：偏置

# sklearn.linear_model.SGDRegressor(loss="squared_loss", fit_intercept=True, learning_rate ='invscaling', eta0=0.01)
- SGDRegressor类实现了随机梯度下降学习，它支持不同的loss函数和正则化惩罚项来拟合线性回归模型。
- loss:损失类型
- loss=”squared_loss”: 普通最小二乘法
- fit_intercept：是否计算偏置
- learning_rate : string, optional
    - 学习率填充
- 'constant': eta = eta0
- 'optimal': eta = 1.0 / (alpha * (t + t0)) [default]
- 'invscaling': eta = eta0 / pow(t, power_t)
- power_t=0.25:存在父类当中
- 对于一个常数值的学习率来说，可以使用learning_rate=’constant’ ，并使用eta0来指定学习率。


- SGDRegressor.coef_：回归系数
- SGDRegressor.intercept_：偏置

# 回归性能评估
均方误差(Mean Squared Error)MSE)评价机制：

sklearn.metrics.mean_squared_error(y_true, y_pred)
- 均方误差回归损失
- y_true:真实值
- y_pred:预测值
- return:浮点数结果

In [14]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [3]:
def linear_model1():
    """
    线性回归:正规方程
    :return:None
    """
    # 1.获取数据
    data = load_boston()
 
    # 2.数据集划分
    x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
 
    # 3.特征工程-标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.fit_transform(x_test)
 
    # 4.机器学习-线性回归(特征方程)
    estimator = LinearRegression()
    estimator.fit(x_train, y_train)
 
    # 5.模型评估
    # 5.1 获取系数等值
    y_predict = estimator.predict(x_test)
    print("预测值为:\n", y_predict)
    print("模型中的系数为:\n", estimator.coef_)
    print("模型中的偏置为:\n", estimator.intercept_)
 
    # 5.2 评价
    # 均方误差
    error = mean_squared_error(y_test, y_predict)
    print("误差为:\n", error)
 
 
def linear_model2():
    """
    线性回归:梯度下降法
    :return:None
    """
    # 1.获取数据
    data = load_boston()
 
    # 2.数据集划分
    x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
 
    # 3.特征工程-标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.fit_transform(x_test)
 
    # 4.机器学习-线性回归(特征方程)
    estimator = SGDRegressor(max_iter=1000)
    # estimator = SGDRegressor(max_iter=1000,learning_rate="constant",eta0=0.1)
    estimator.fit(x_train, y_train)
 
    # 5.模型评估
    # 5.1 获取系数等值
    y_predict = estimator.predict(x_test)
    print("预测值为:\n", y_predict)
    print("模型中的系数为:\n", estimator.coef_)
    print("模型中的偏置为:\n", estimator.intercept_)
 
    # 5.2 评价
    # 均方误差
    error = mean_squared_error(y_test, y_predict)
    print("误差为:\n", error)

In [16]:
linear_model1()
linear_model2()

预测值为:
 [28.14790667 31.30481159 20.5173895  31.4803076  19.01576648 18.26058425
 20.57439825 18.45232382 18.46065155 32.93661269 20.3603692  27.24886071
 14.81691426 19.20872297 37.01503458 18.32036009  7.71389628 17.56196944
 30.18543811 23.60655873 18.14917545 33.84385342 28.48976083 16.9967041
 34.76065063 26.22246312 34.83857168 26.62310118 18.64402278 13.21154037
 30.37364532 14.70785748 37.18173708  8.88049446 15.06699441 16.14502168
  7.19990762 19.17049423 39.56848262 28.23663    24.62411509 16.75182833
 37.84465582  5.71770376 21.21547924 24.63882018 18.8561516  19.93416672
 15.19839712 26.29892968  7.4274177  27.14300763 29.18745146 16.27895854
  7.99799673 35.46394958 32.38905222 20.83161049 16.41464618 20.87141783
 22.92150844 23.60828508 19.32245804 38.33751529 23.87463642 18.98494066
 12.63480997  6.12915396 41.44675745 21.08894595 16.27561572 21.48546861
 40.74502107 20.4839158  36.82098808 27.0452329  19.79437176 19.64484428
 24.58763105 21.08454269 30.91968983 19.33266

# 正则化线性模型
- Ridge Regression 岭回归
- Lasso 回归
- Elastic Net 弹性网络
- Early stopping


- 常用：岭回归

- 假设只有少部分特征是有用的：

    - 弹性网络
    - Lasso
    - 一般来说，弹性网络的使用更为广泛。因为在特征维度高于训练样本数，或者特征是强相关的情况下，Lasso回归的表现不太稳定。


- api:

    - from sklearn.linear_model import Ridge, ElasticNet, Lasso

# 线性回归的改进-岭回归
- API
- sklearn.linear_model.Ridge(alpha=1.0, fit_intercept=True,solver="auto", normalize=False)
    - 具有l2正则化的线性回归
    - alpha:正则化力度，也叫 λ
    - λ取值：0~1 1~10
    - solver:会根据数据自动选择优化方法
    - sag:如果数据集、特征都比较大，选择该随机梯度下降优化
    - normalize:数据是否进行标准化
    - normalize=False:可以在fit之前调用preprocessing.StandardScaler标准化数据
    - Ridge.coef_:回归权重
    - Ridge.intercept_:回归偏置
- Ridge方法相当于SGDRegressor(penalty='l2', loss="squared_loss"),只不过SGDRegressor实现了一个普通的随机梯度下降学习，推荐使用Ridge(实现了SAG)

- sklearn.linear_model.RidgeCV(_BaseRidgeCV, RegressorMixin)
- 具有l2正则化的线性回归，可以进行交叉验证
- coef_:回归系数

In [None]:
class _BaseRidgeCV(LinearModel):
    def __init__(self, alphas=(0.1, 1.0, 10.0),
                 fit_intercept=True, normalize=False,scoring=None,
                 cv=None, gcv_mode=None,
                 store_cv_values=False):

In [1]:
from sklearn.linear_model import Ridge, ElasticNet, Lasso

def linear_model3():
    """
    线性回归:岭回归
    :return:
    """
    # 1.获取数据
    data = load_boston()
 
    # 2.数据集划分
    x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
 
    # 3.特征工程-标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.fit_transform(x_test)
 
    # 4.机器学习-线性回归(岭回归)
    estimator = Ridge(alpha=1)
    # estimator = RidgeCV(alphas=(0.1, 1, 10))
    estimator.fit(x_train, y_train)
 
    # 5.模型评估
    # 5.1 获取系数等值
    y_predict = estimator.predict(x_test)
    print("预测值为:\n", y_predict)
    print("模型中的系数为:\n", estimator.coef_)
    print("模型中的偏置为:\n", estimator.intercept_)
 
    # 5.2 评价
    # 均方误差
    error = mean_squared_error(y_test, y_predict)
    print("误差为:\n", error)

In [17]:
linear_model3()

预测值为:
 [28.13514381 31.28742806 20.54637256 31.45779505 19.05568933 18.26035004
 20.59277879 18.46395399 18.49310689 32.89149735 20.38916336 27.19539571
 14.82641534 19.22385973 36.98699955 18.29852297  7.78481347 17.58930015
 30.19228148 23.61186682 18.14688039 33.81334203 28.44588593 16.97492092
 34.72357533 26.19400705 34.77212916 26.62689656 18.63066492 13.34246426
 30.35128911 14.59472585 37.18259957  8.93178571 15.10673508 16.1072542
  7.22299512 19.14535184 39.53308652 28.26937936 24.62676357 16.76310494
 37.85719041  5.71249289 21.17777272 24.60640023 18.90197753 19.95020929
 15.1922374  26.27853095  7.55102357 27.10160025 29.17947182 16.275476
  8.02888564 35.42165713 32.28262473 20.9525814  16.43494393 20.88177884
 22.92764493 23.58271167 19.35870763 38.27704421 23.98459232 18.96691367
 12.66552625  6.122414   41.44033214 21.09214394 16.23412117 21.51649375
 40.72274345 20.53192898 36.78646575 27.01972904 19.91315009 19.66906691
 24.59629369 21.2589005  30.93402996 19.3338604

# 模型的保存和加载
- sklearn模型的保存和加载API
- from sklearn.externals import joblib
- 保存：joblib.dump(estimator, 'test.pkl')
- 加载：estimator = joblib.load('test.pkl')

In [None]:
def load_dump_demo():
    """
    线性回归:岭回归
    :return:
    """
    # 1.获取数据
    data = load_boston()
 
    # 2.数据集划分
    x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, random_state=22)
 
    # 3.特征工程-标准化
    transfer = StandardScaler()
    x_train = transfer.fit_transform(x_train)
    x_test = transfer.fit_transform(x_test)
 
    # 4.机器学习-线性回归(岭回归)
    # # 4.1 模型训练
    # estimator = Ridge(alpha=1)
    # estimator.fit(x_train, y_train)
    #
    # # 4.2 模型保存
    # joblib.dump(estimator, "./data/test.pkl")
 
    # 4.3 模型加载
    estimator = joblib.load("./data/test.pkl")
 
    # 5.模型评估
    # 5.1 获取系数等值
    y_predict = estimator.predict(x_test)
    print("预测值为:\n", y_predict)
    print("模型中的系数为:\n", estimator.coef_)
    print("模型中的偏置为:\n", estimator.intercept_)
 
    # 5.2 评价
    # 均方误差
    error = mean_squared_error(y_test, y_predict)
    print("误差为:\n", error)