In [8]:
import numpy as np 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [3]:
# 模拟数据集
np.random.seed(42)
x = np.random.uniform(-1.0,3.0,size=100)
X = x.reshape(-1,1)
y = 0.5 * x + 3 + np.random.normal(0,1,size=100)
np.random.seed(666)
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [4]:
# 使用多项式回归
def PolynomialRegression(degree):
  return Pipeline([
    ('poly', PolynomialFeatures(degree=degree)),
    ('std_scaler', StandardScaler()),
    ('lin_reg', LinearRegression())
])

In [5]:
poly_reg = PolynomialRegression(degree=20)
poly_reg.fit(X_train,y_train)
y_poly_predict = poly_reg.predict(X_test)
mse = mean_squared_error(y_test,y_poly_predict)

print(mse)  # 输出为：167.94061213110385

167.94121073582812


In [6]:
# 使用岭回归
def RidgeRegression(degree,alpha):
  return Pipeline([
    ('poly', PolynomialFeatures(degree=degree)),
    ('std_scaler', StandardScaler()),
    ('lin_reg', Ridge(alpha=alpha))
])

ridge_reg = RidgeRegression(20,0.01)
ridge_reg.fit(X_train,y_train)
y_ridge_predict = ridge_reg.predict(X_test)
mse2 = mean_squared_error(y_ridge_predict,y_test)
print(mse2)  # 输出为：1.1959639082957925 与mse对比可得岭回归更优

1.1959639082957148


In [9]:
# 使用LASSO回归
def LassoRegression(degree,alpha):
  return Pipeline([
    ('poly', PolynomialFeatures(degree=degree)),
    ('std_scaler', StandardScaler()),
    ('lin_reg', Lasso(alpha=alpha))
])

lasso_reg = LassoRegression(20,0.01)
lasso_reg.fit(X_train,y_train)
y_lasso_predict = lasso_reg.predict(X_test)
mse2 = mean_squared_error(y_lasso_predict,y_test)
print(mse2)  # 输出为：1.1048334401791602

1.1048334401791602
