In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
from sklearn.datasets import load_boston

house = load_boston()
df = pd.DataFrame(data=house.data, 
                  columns=house.feature_names)
df['MEDV'] = house.target

X = df.iloc[:, :-1].values
y = df['MEDV'].values

In [2]:
# 加入二次與三次的多項式特徵
X_poly = np.hstack((X**(i+1) for i in range(0, 3)))

# 特徵標準化
from sklearn.preprocessing import StandardScaler

scalar = StandardScaler()
X_std = scalar.fit_transform(X_poly)
X_std[:10, 0]

array([-0.41978194, -0.41733926, -0.41734159, -0.41675042, -0.41248185,
       -0.41704368, -0.41024282, -0.40369567, -0.39593474, -0.40072931])

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_std, y,
                                                    test_size=0.2, 
                                                    random_state=0)
print(X_train.shape)
print(X_test.shape)

(404, 39)
(102, 39)


In [4]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV, LassoCV

lst_model = [LinearRegression().fit(X_train, y_train), 
             LassoCV().fit(X_train, y_train), 
             RidgeCV().fit(X_train, y_train)]

In [5]:
from sklearn.metrics import r2_score
# 計算 Adjusted R-squared
def adj_R2(r2, n, k):
    return r2-(k-1)/(n-k)*(1-r2)

lst_r2 = []
for reg in lst_model:
    y_pred = reg.predict(X_test)
    r2 = r2_score(y_pred, y_test)
    lst_r2.append(adj_R2(r2, X_test.shape[0], X_test.shape[1]))
    
print('多項式迴歸：Adj. R^2 =', lst_r2[0])
print('多項式迴歸+L1正規化：Adj. R^2 =', lst_r2[1])
print('多項式迴歸+L2正規化：Adj. R^2 =', lst_r2[2])

多項式迴歸：Adj. R^2 = 0.25573394671443955
多項式迴歸+L1正規化：Adj. R^2 = 0.32820715948665974
多項式迴歸+L2正規化：Adj. R^2 = 0.33069213668499164
