In [1]:
#导入数据
from sklearn.datasets import load_boston
import pandas as pd

boston = load_boston()
features = boston.data
target = boston.target
df_boston = pd.DataFrame(features)
df_boston.columns = boston.feature_names
df_boston["MEDV"] = target
df_boston.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [2]:
print(boston.DESCR) #查看数据集的描述信息

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [9]:
#标准化
from sklearn.preprocessing import StandardScaler

ss_x = StandardScaler()
ss_y = StandardScaler()

s_features=ss_x.fit_transform(features)
s_target=ss_y.fit_transform(target.values.reshape(-1, 1))


In [10]:
#分离数据集
from sklearn.model_selection import train_test_split
features=df_boston[df_boston.loc[:,df_boston.columns!='MEDV'].columns]
target = df_boston['MEDV']
x_train, x_test, y_train, y_test = train_test_split(s_features, s_target, test_size=0.3, random_state=0)

In [11]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

#线型回归
lr_model = LinearRegression()
lr_model.fit(x_train, y_train)
lr_y_pred = lr_model.predict(x_test)

lr_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_pred))
lr_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_pred))
lr_R2 = r2_score(y_test, lr_y_pred)

print("lr_MSE:\t",lr_MSE)     
print("lr_MAE:\t",lr_MAE)     
print("lr_R2:\t",lr_R2)    

lr_MSE:	 27.195965766883198
lr_MAE:	 3.6099040603818127
lr_R2:	 0.6733825506400195


In [45]:
#使用线性核函数的SVR进行训练，并进行预测
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

l_svr = SVR(kernel='linear')
l_svr.fit(x_train, y_train)
l_svr_y_pred = l_svr.predict(x_test)

svr_linear_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(l_svr_y_pred))
svr_linear_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(l_svr_y_pred))
svr_linear_R2 = r2_score(y_test,l_svr_y_pred)

print("svr_linear_MSE:\t",svr_linear_MSE)     
print("svr_linear_MAE:\t",svr_linear_MAE)     
print("svr_linear_R2:\t",svr_linear_R2)  

svr_linear_MSE:	 31.5155456826
svr_linear_MAE:	 3.54110394231
svr_linear_R2:	 0.621505364646


  y = column_or_1d(y, warn=True)


In [46]:
#使用径向基核函数的SVR进行训练，并进行预测
r_svr = SVR(kernel='rbf')
r_svr.fit(x_train, y_train)
r_svr_y_pred = r_svr.predict(x_test)

svr_rbf_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(r_svr_y_pred))
svr_rbf_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(r_svr_y_pred))
svr_rbf_R2 = r2_score(y_test,r_svr_y_pred)

print("svr_rbf_MSE:\t",svr_rbf_MSE)     
print("svr_rbf_MAE:\t",svr_rbf_MAE)     
print("svr_rbf_R2:\t",svr_rbf_R2) 

svr_rbf_MSE:	 20.969826622
svr_rbf_MAE:	 2.69814096739
svr_rbf_R2:	 0.748157085374


  y = column_or_1d(y, warn=True)


In [47]:
#使用多项式核函数的SVR进行训练，并进行预测
p_svr = SVR(kernel='poly')
p_svr.fit(x_train, y_train)
p_svr_y_pred = p_svr.predict(x_test)

svr_poly_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(p_svr_y_pred))
svr_poly_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(p_svr_y_pred))
svr_poly_R2 = r2_score(y_test,p_svr_y_pred)

print("svr_poly_MSE:\t",svr_poly_MSE)     
print("svr_poly_MAE:\t",svr_poly_MAE)     
print("svr_poly_R2:\t",svr_poly_R2)

svr_poly_MSE:	 22.8817510876
svr_poly_MAE:	 3.07137528017
svr_poly_R2:	 0.725195301348


  y = column_or_1d(y, warn=True)


In [48]:
#决策树回归
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

dtr = DecisionTreeRegressor()
dtr.fit(x_train, y_train)
dtr_y_pred = dtr.predict(x_test)

dtr_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_pred))
dtr_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_pred))
dtr_R2 = r2_score(y_test,dtr_y_pred)

print("dtr_MSE:\t",dtr_MSE)     
print("dtr_MAE:\t",dtr_MAE)     
print("dtr_R2:\t",dtr_R2)

dtr_MSE:	 26.8713815789
dtr_MAE:	 3.14407894737
dtr_R2:	 0.67728073394


In [49]:
#K近邻回归
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

knr = KNeighborsRegressor()
knr.fit(x_train, y_train)
knr_y_pred = knr.predict(x_test)

knr_MSE = mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(knr_y_pred))
knr_MAE = mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(knr_y_pred))
knr_R2 = r2_score(y_test,knr_y_pred)

print("knr_MSE:\t",knr_MSE)     
print("knr_MAE:\t",knr_MAE)     
print("knr_R2:\t",knr_R2)

knr_MSE:	 27.8072921053
knr_MAE:	 3.26828947368
knr_R2:	 0.666040658424


In [53]:
models = pd.DataFrame({
    'Model': [ 'LinearRegression', 'SVR(linear)', 'SVR(rbf)', 'SVR(poly)', 'DecisionTreeRegressor','KNeighborsRegressor'],
    'MAE': [lr_MAE, svr_linear_MAE, svr_rbf_MAE, svr_poly_MAE, dtr_MAE, knr_MAE],
    'MSE':[lr_MSE, svr_linear_MSE, svr_rbf_MSE, svr_poly_MSE, dtr_MSE, knr_MSE],
     'R2':[lr_R2, svr_linear_R2, svr_rbf_R2, svr_poly_R2, dtr_R2, knr_R2]
})
   
   
models.sort_values(by='MAE', ascending=True)

Unnamed: 0,MAE,MSE,Model,R2
2,2.698141,20.969827,SVR(rbf),0.748157
3,3.071375,22.881751,SVR(poly),0.725195
4,3.144079,26.871382,DecisionTreeRegressor,0.677281
5,3.268289,27.807292,KNeighborsRegressor,0.666041
1,3.541104,31.515546,SVR(linear),0.621505
0,3.609904,27.195966,LinearRegression,0.673383
