In [1]:
# 线性回归、SGD、SVM、KNN、DTR、集成算法RFR、ExtraRFR、GDBTR
# 从sklearn.datasets导入波士顿房价数据读取器。
from sklearn.datasets import load_boston
# 从读取房价数据存储在变量boston中。
boston = load_boston()
# 输出数据描述。
print (boston.DESCR)



Boston House Prices dataset

Notes
------
Data Set Characteristics:  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive
    
    :Median Value (attribute 14) is usually the target

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pupil-teacher ratio by town
      

In [2]:
# 从sklearn.cross_validation导入数据分割器。
from sklearn.model_selection import train_test_split

# 导入numpy并重命名为np。
import numpy as np

X = boston.data
y = boston.target


# 随机采样25%的数据构建测试样本，其余作为训练样本。
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33, test_size=0.25)

# 分析回归目标值的差异。
print ("The max target value is", np.max(boston.target))
print ("The min target value is", np.min(boston.target))
print ("The average target value is", np.mean(boston.target))


The max target value is 50.0
The min target value is 5.0
The average target value is 22.5328063241


In [3]:
# 从sklearn.preprocessing导入数据标准化模块。
from sklearn.preprocessing import StandardScaler

# 分别初始化对特征和目标值的标准化器。
ss_X = StandardScaler()
ss_y = StandardScaler()

# 分别对训练和测试数据的特征以及目标值进行标准化处理。
X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)

y_train = ss_y.fit_transform(y_train)
y_test = ss_y.transform(y_test)




In [4]:
# 从sklearn.linear_model导入LinearRegression。
from sklearn.linear_model import LinearRegression

# 使用默认配置初始化线性回归器LinearRegression。
lr = LinearRegression()
# 使用训练数据进行参数估计。
lr.fit(X_train, y_train)
# 对测试数据进行回归预测。
lr_y_predict = lr.predict(X_test)



In [27]:
# 从sklearn.linear_model导入SGDRegressor。
from sklearn.linear_model import SGDRegressor

# 使用默认配置初始化线性回归器SGDRegressor。
sgdr = SGDRegressor()
# 使用训练数据进行参数估计。
sgdr.fit(X_train, y_train)
# 对测试数据进行回归预测。
sgdr_y_predict = sgdr.predict(X_test)

estimators = []
estimators.append(('LinearReg',  lr))
estimators.append(('SGD'      ,  sgdr))
print(estimators)   # 各个模型的参数

[('LinearReg', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)), ('SGD', SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False))]


In [52]:
# 从sklearn.linear_model导入LinearRegression。
from sklearn.linear_model import LinearRegression

# 在初始化参数基础上 调优线性回归器LinearRegression。
lr = LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
# 使用训练数据进行参数估计。
lr.fit(X_train, y_train)
# 对测试数据进行回归预测。
lr_y_predict = lr.predict(X_test)

# 从sklearn.linear_model导入SGDRegressor。
from sklearn.linear_model import SGDRegressor

# 在初始化参数基础上 调优线性回归器SGDRegressor。
sgdr = SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False)
# 使用训练数据进行参数估计。
sgdr.fit(X_train, y_train)
# 对测试数据进行回归预测。
sgdr_y_predict = sgdr.predict(X_test)

estimators = []
estimators.append(('LinearReg',  lr))
estimators.append(('SGD'      ,  sgdr))
print(estimators)   # 各个模型的参数

lr.set_params(copy_X=True, fit_intercept=False, n_jobs=1, normalize=False)
lr.get_params(deep = True)

[('LinearReg', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)), ('SGD', SGDRegressor(alpha=0.0001, average=False, epsilon=0.1, eta0=0.01,
       fit_intercept=True, l1_ratio=0.15, learning_rate='invscaling',
       loss='squared_loss', n_iter=5, penalty='l2', power_t=0.25,
       random_state=None, shuffle=True, verbose=0, warm_start=False))]


{'copy_X': True, 'fit_intercept': False, 'n_jobs': 1, 'normalize': False}

In [53]:
# 使用LinearRegression模型自带的评估模块，并输出评估结果。
print ('The value of default measurement of LinearRegression is',  "%.2f%%"%(100*lr.score(X_test, y_test)))

# 从sklearn.metrics依次导入r2_score、mean_squared_error以及mean_absoluate_error用于回归性能的评估。
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# 使用r2_score模块，并输出评估结果。
print ('The value of R-squared of LinearRegression is',  "%.2f%%"%(100*r2_score(y_test, lr_y_predict)))

# 使用mean_squared_error模块，并输出评估结果。
print ('The mean squared error of LinearRegression is',  "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict))))  

# 使用mean_absolute_error模块，并输出评估结果。
print ('The mean absoluate error of LinearRegression is', "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))) 

LinearReg_R2  = "%.2f%%"%(100*lr.score(X_test, y_test))
LinearReg_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
LinearReg_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict))) 
print(LinearReg_R2 )
print(LinearReg_MSE)
print(LinearReg_MAE)

The value of default measurement of LinearRegression is 67.63%
The value of R-squared of LinearRegression is 67.63%
The mean squared error of LinearRegression is 25.10
The mean absoluate error of LinearRegression is 3.53
67.63%
25.10
3.53


In [51]:
# 使用SGDRegressor模型自带的评估模块，并输出评估结果。
print ('The value of default measurement of SGDRegressor is', "%.2f%%"%(100*sgdr.score(X_test, y_test)))

# 使用r2_score模块，并输出评估结果。
# print ('R^2 of SGDRegressor', "%.2f%%"%(100*r2_score(y_test, sgdr_y_predict)))

# 使用mean_squared_error模块，并输出评估结果。
print ('MSE of SGDRegressor', "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict))))

# 使用mean_absolute_error模块，并输出评估结果。
print ('MAE of SGDRegressor', "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict))))    

SGD_R2  = "%.2f%%"%(100*sgdr.score(X_test, y_test))
SGD_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))
SGD_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict))) 
print(SGD_R2 )
print(SGD_MSE)
print(SGD_MAE)

The value of default measurement of SGDRegressor is 65.78%
MSE of SGDRegressor 26.53
MAE of SGDRegressor 3.51
65.78%
26.53
3.51


In [77]:
# 从sklearn.svm中导入支持向量机（回归）模型。
from sklearn.svm import SVR

# 使用线性核函数配置的支持向量机进行回归训练，并且对测试样本进行预测。
linear_svr = SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
linear_svr.fit(X_train, y_train)
linear_svr_y_predict = linear_svr.predict(X_test)

# 使用多项式核函数配置的支持向量机进行回归训练，并且对测试样本进行预测。
poly_svr = SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',kernel='poly',   max_iter=-1, shrinking=True, tol=0.001, verbose=False)
poly_svr.fit(X_train, y_train)
poly_svr_y_predict = poly_svr.predict(X_test)

# 使用径向基核函数配置的支持向量机进行回归训练，并且对测试样本进行预测。
rbf_svr = SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',kernel='rbf',    max_iter=-1, shrinking=True, tol=0.001, verbose=False)
rbf_svr.fit(X_train, y_train)
rbf_svr_y_predict = rbf_svr.predict(X_test)

estimators = []
estimators.append(('linear_svr',  linear_svr))
estimators.append(('Poly_svr'  ,  poly_svr))
estimators.append(('RBF_svr'   ,  rbf_svr))
print(estimators)   # 各个模型的参数

[('linear_svr', SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)), ('Poly_svr', SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='poly', max_iter=-1, shrinking=True, tol=0.001, verbose=False)), ('RBF_svr', SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False))]


In [78]:
# 使用R-squared、MSE和MAE指标对三种配置的支持向量机（回归）模型在相同测试集上进行性能评估。
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
print ('R^2 of linear SVR', linear_svr.score(X_test, y_test))
print ('MSE of linear SVR', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))
print ('MAE of linear SVR', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))   
B11 = ("%.2f%%"%(100*linear_svr.score(X_test, y_test)),"%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))  
, "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))))
print (B11)

linear_svr_R2  = "%.2f%%"%(100*linear_svr.score(X_test, y_test))
linear_svr_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict)))
linear_svr_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(linear_svr_y_predict))) 
print(linear_svr_R2 )
print(linear_svr_MSE)
print(linear_svr_MAE)

R^2 of linear SVR 0.65171709743
MSE of linear SVR 27.0063071393
MAE of linear SVR 3.42667291687
('65.17%', '27.01', '3.43')
65.17%
27.01
3.43


In [79]:
print ('R^2 of Poly SVR', poly_svr.score(X_test, y_test))
print ('MSE of Poly SVR', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict)))
print ('MAEof Poly SVR', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict)))

Poly_svr_R2  = "%.2f%%"%(100*poly_svr.score(X_test, y_test))
Poly_svr_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict)))
Poly_svr_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(poly_svr_y_predict))) 
print(Poly_svr_R2 )
print(Poly_svr_MSE)
print(Poly_svr_MAE)

R^2 of Poly SVR 0.404454058003
MSE of Poly SVR 46.179403314
MAEof Poly SVR 3.75205926674
40.45%
46.18
3.75


In [80]:
print ('R^2 of RBF SVR', rbf_svr.score(X_test, y_test))
print ('MSE of RBF SVR', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict)))
print ('MAE of RBF SVR', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict)))

RBF_svr_R2  = "%.2f%%"%(100*rbf_svr.score(X_test, y_test))
RBF_svr_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict)))
RBF_svr_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rbf_svr_y_predict))) 
print(RBF_svr_R2 )
print(RBF_svr_MSE)
print(RBF_svr_MAE)

R^2 of RBF SVR 0.756406891227
MSE of RBF SVR 18.8885250008
MAE of RBF SVR 2.60756329798
75.64%
18.89
2.61


In [12]:
# 从sklearn.neighbors导入KNeighborRegressor（K近邻回归器）。
from sklearn.neighbors import KNeighborsRegressor

# 初始化K近邻回归器，并且调整配置，使得预测的方式为平均回归：weights='uniform'。
uni_knr = KNeighborsRegressor(weights='uniform')
uni_knr.fit(X_train, y_train)
uni_knr_y_predict = uni_knr.predict(X_test)

# 初始化K近邻回归器，并且调整配置，使得预测的方式为根据距离加权回归：weights='distance'。
dis_knr = KNeighborsRegressor(weights='distance')
dis_knr.fit(X_train, y_train)
dis_knr_y_predict = dis_knr.predict(X_test)


In [13]:
# 使用R-squared、MSE以及MAE三种指标对平均回归配置的K近邻模型在测试集上进行性能评估。
print ('R^2 of uniform-weighted KNeighorRegression:', uni_knr.score(X_test, y_test))
print ('MSE of uniform-weighted KNeighorRegression:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))
print ('MAE of uniform-weighted KNeighorRegression', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))

KNR_unW_R2  = "%.2f%%"%(100*uni_knr.score(X_test, y_test))
KNR_unW_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict)))
KNR_unW_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(uni_knr_y_predict))) 
print(KNR_unW_R2 )
print(KNR_unW_MSE)
print(KNR_unW_MAE)

R^2 of uniform-weighted KNeighorRegression: 0.690345456461
MSE of uniform-weighted KNeighorRegression: 24.0110141732
MAE of uniform-weighted KNeighorRegression 2.96803149606
69.03%
24.01
2.97


In [14]:
# 使用R-squared、MSE以及MAE三种指标对根据距离加权回归配置的K近邻模型在测试集上进行性能评估。
print ('R^2 of distance-weighted KNeighorRegression:', dis_knr.score(X_test, y_test))
print ('MSE of distance-weighted KNeighorRegression:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))
print ('MAE of distance-weighted KNeighorRegression:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))	)

KNR_diW_R2  = "%.2f%%"%(100*dis_knr.score(X_test, y_test))
KNR_diW_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict)))
KNR_diW_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dis_knr_y_predict))) 
print(KNR_diW_R2 )
print(KNR_diW_MSE)
print(KNR_diW_MAE)

R^2 of distance-weighted KNeighorRegression: 0.719758997016
MSE of distance-weighted KNeighorRegression: 21.7302501609
MAE of distance-weighted KNeighorRegression: 2.80505687851
71.98%
21.73
2.81


In [15]:
# 从sklearn.tree中导入DecisionTreeRegressor。
from sklearn.tree import DecisionTreeRegressor
# 使用默认配置初始化DecisionTreeRegressor。
dtr = DecisionTreeRegressor()
# 用波士顿房价的训练数据构建回归树。
dtr.fit(X_train, y_train)
# 使用默认配置的单一回归树对测试数据进行预测，并将预测值存储在变量dtr_y_predict中。
dtr_y_predict = dtr.predict(X_test)


In [16]:
# 使用R-squared、MSE以及MAE指标对默认配置的回归树在测试集上进行性能评估。
print ('R^2 of DecisionTreeRegressor:', dtr.score(X_test, y_test))
print ('MSE of DecisionTreeRegressor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict)))
print ('MAE of DecisionTreeRegressor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict)))

DTR_R2  = "%.2f%%"%(100*dtr.score(X_test, y_test))
DTR_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict)))
DTR_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(dtr_y_predict))) 
print(DTR_R2 )
print(DTR_MSE)
print(DTR_MAE)

R^2 of DecisionTreeRegressor: 0.531796337487
MSE of DecisionTreeRegressor: 36.3051181102
MAE of DecisionTreeRegressor: 3.45433070866
53.18%
36.31
3.45


In [17]:
# 从sklearn.ensemble中导入RandomForestRegressor、ExtraTreesGressor以及GradientBoostingRegressor。
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, ExtraTreesRegressor, AdaBoostRegressor,GradientBoostingRegressor

# 使用BaggingRegressor训练模型，并对测试数据做出预测，结果存储在变量bagr_y_predict中。
bagr = BaggingRegressor()
bagr.fit(X_train, y_train)
bagr_y_predict = bagr.predict(X_test)

# 使用RandomForestRegressor训练模型，并对测试数据做出预测，结果存储在变量rfr_y_predict中。
rfr = RandomForestRegressor()
rfr.fit(X_train, y_train)
rfr_y_predict = rfr.predict(X_test)

# 使用ExtraTreesRegressor训练模型，并对测试数据做出预测，结果存储在变量etr_y_predict中。
etr = ExtraTreesRegressor()
etr.fit(X_train, y_train)
etr_y_predict = etr.predict(X_test)

# 使用AdaBoostRegressor训练模型，并对测试数据做出预测，结果存储在变量gbr_y_predict中。
AdaBoostr = AdaBoostRegressor()
AdaBoostr.fit(X_train, y_train)
AdaBoostr_y_predict = AdaBoostr.predict(X_test)

# 使用GradientBoostingRegressor训练模型，并对测试数据做出预测，结果存储在变量gbr_y_predict中。
gbr = GradientBoostingRegressor()
gbr.fit(X_train, y_train)
gbr_y_predict = gbr.predict(X_test)


In [18]:
# 使用R-squared、MSE以及MAE指标对默认配置的Bagging装袋回归在测试集上进行性能评估。
print ('R^2 of BaggingRegressor:', bagr.score(X_test, y_test))
print ('MSE of BaggingRegressor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(bagr_y_predict)))
print ('MAE of BaggingRegressor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(bagr_y_predict)))

BagR_R2  = "%.2f%%"%(100*bagr.score(X_test, y_test))
BagR_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(bagr_y_predict)))
BagR_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(bagr_y_predict))) 
print(BagR_R2 )
print(BagR_MSE)
print(BagR_MAE)

R^2 of BaggingRegressor: 0.850978768438
MSE of BaggingRegressor: 11.5552992126
MAE of BaggingRegressor: 2.28708661417
85.10%
11.56
2.29


In [19]:
# 使用R-squared、MSE以及MAE指标对默认配置的随机回归森林在测试集上进行性能评估。
print ('R^2 of RandomForestRegressor:', rfr.score(X_test, y_test))
print ('MSE of RandomForestRegressor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict)))
print ('MAE of RandomForestRegressor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict)))

RFR_R2  = "%.2f%%"%(100*rfr.score(X_test, y_test))
RFR_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict)))
RFR_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(rfr_y_predict))) 
print(RFR_R2 )
print(RFR_MSE)
print(RFR_MAE)

R^2 of RandomForestRegressor: 0.800446800984
MSE of RandomForestRegressor: 15.4736133858
MAE of RandomForestRegressor: 2.4605511811
80.04%
15.47
2.46


In [20]:
# 使用R-squared、MSE以及MAE指标对默认配置的极端回归森林在测试集上进行性能评估。
print ('R^2 of ExtraTreesRegessor:', etr.score(X_test, y_test))
print ('MSE of  ExtraTreesRegessor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict)))
print ('MAE of ExtraTreesRegessor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict)))

# 利用训练好的极端回归森林模型，输出每种特征对预测目标的贡献度。
# print (np.sort(zip(etr.feature_importances_, boston.feature_names), axis=0))

ETR_R2  = "%.2f%%"%(100*etr.score(X_test, y_test))
ETR_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict)))
ETR_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(etr_y_predict))) 
print(ETR_R2 )
print(ETR_MSE)
print(ETR_MAE)

R^2 of ExtraTreesRegessor: 0.792313344563
MSE of  ExtraTreesRegessor: 16.104292126
MAE of ExtraTreesRegessor: 2.48622047244
79.23%
16.10
2.49


In [21]:
# 使用R-squared、MSE以及MAE指标对默认配置的AdaBoost提升回归在测试集上进行性能评估。
print ('R^2 of AdaBoostRegressor:', AdaBoostr.score(X_test, y_test))
print ('MSE of AdaBoostRegressor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(AdaBoostr_y_predict)))
print ('MAE of AdaBoostRegressor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(AdaBoostr_y_predict)))

AdaBoostr_R2  = "%.2f%%"%(100*AdaBoostr.score(X_test, y_test))
AdaBoostr_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(AdaBoostr_y_predict)))
AdaBoostr_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(AdaBoostr_y_predict))) 
print(AdaBoostr_R2 )
print(AdaBoostr_MSE)
print(AdaBoostr_MAE)

R^2 of AdaBoostRegressor: 0.781390995082
MSE of AdaBoostRegressor: 16.9512252444
MAE of AdaBoostRegressor: 2.91340023548
78.14%
16.95
2.91


In [22]:
# 使用R-squared、MSE以及MAE指标对默认配置的梯度提升回归树在测试集上进行性能评估。
print ('R^2 of GradientBoostingRegressor:', gbr.score(X_test, y_test))
print ('MSE of GradientBoostingRegressor:', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict)))
print ('MAE of GradientBoostingRegressor:', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict)))

GBR_R2  = "%.2f%%"%(100*gbr.score(X_test, y_test))
GBR_MSE = "%.2f"%(mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict)))
GBR_MAE = "%.2f"%(mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(gbr_y_predict))) 
print(GBR_R2 )
print(GBR_MSE)
print(GBR_MAE)

R^2 of GradientBoostingRegressor: 0.841947632652
MSE of GradientBoostingRegressor: 12.2555851729
MAE of GradientBoostingRegressor: 2.28001916821
84.19%
12.26
2.28


In [23]:
# 综合打印输出比较、评估结果。
# print ('Index','线性回归','随机梯度','SVM(线性核)','SVM(多项核)','SVM(高斯核)') 
# print ('R^2  ', "%.2f%%  "%(100*lr.score(X_test, y_test)), "%.2f%%  "%(100*sgdr.score(X_test, y_test)),
#        "%.2f%%     "%(100*linear_svr.score(X_test, y_test)),"%.2f%%     "%(100*poly_svr.score(X_test, y_test)),"%.2f%%     "%(100*rbf_svr.score(X_test, y_test)))
print ('模型和参数  ','R^2   ' , 'MSE  ','MAE  ')
print ('LinearReg   ',LinearReg_R2 ,LinearReg_MSE ,LinearReg_MAE )   
print ('SGD         ',SGD_R2       ,SGD_MSE       ,SGD_MAE       ) 
print ('linear_svr  ',linear_svr_R2,linear_svr_MSE,linear_svr_MAE)
print ('Poly_svr    ',Poly_svr_R2  ,Poly_svr_MSE  ,Poly_svr_MAE  )
print ('RBF_svr     ',RBF_svr_R2   ,RBF_svr_MSE   ,RBF_svr_MAE   )
print ('KNR_unW     ',KNR_unW_R2   ,KNR_unW_MSE   ,KNR_unW_MAE   )
print ('KNR_diW     ',KNR_diW_R2   ,KNR_diW_MSE   ,KNR_diW_MAE   )
print ('DTR         ',DTR_R2       ,DTR_MSE       ,DTR_MAE       )
print ('BagR        ',BagR_R2      ,BagR_MSE      ,BagR_MAE      )
print ('RFR         ',RFR_R2       ,RFR_MSE       ,RFR_MAE       )
print ('ETR         ',ETR_R2       ,ETR_MSE       ,ETR_MAE       )
print ('AdaBoostr   ',AdaBoostr_R2 ,AdaBoostr_MSE ,AdaBoostr_MAE )
print ('GBR         ',GBR_R2       ,GBR_MSE       ,GBR_MAE       )

模型和参数   R^2    MSE   MAE  
LinearReg    67.63% 25.10 3.53
SGD          65.67% 26.62 3.50
linear_svr   65.17% 27.01 3.43
Poly_svr     40.45% 46.18 3.75
RBF_svr      75.64% 18.89 2.61
KNR_unW      69.03% 24.01 2.97
KNR_diW      71.98% 21.73 2.81
DTR          53.18% 36.31 3.45
BagR         85.10% 11.56 2.29
RFR          80.04% 15.47 2.46
ETR          79.23% 16.10 2.49
AdaBoostr    78.14% 16.95 2.91
GBR          84.19% 12.26 2.28


In [24]:
import sklearn
help(sklearn.ensemble.AdaBoostRegressor)

Help on class AdaBoostRegressor in module sklearn.ensemble.weight_boosting:

class AdaBoostRegressor(BaseWeightBoosting, sklearn.base.RegressorMixin)
 |  An AdaBoost regressor.
 |  
 |  An AdaBoost [1] regressor is a meta-estimator that begins by fitting a
 |  regressor on the original dataset and then fits additional copies of the
 |  regressor on the same dataset but where the weights of instances are
 |  adjusted according to the error of the current prediction. As such,
 |  subsequent regressors focus more on difficult cases.
 |  
 |  This class implements the algorithm known as AdaBoost.R2 [2].
 |  
 |  Read more in the :ref:`User Guide <adaboost>`.
 |  
 |  Parameters
 |  ----------
 |  base_estimator : object, optional (default=DecisionTreeRegressor)
 |      The base estimator from which the boosted ensemble is built.
 |      Support for sample weighting is required.
 |  
 |  n_estimators : integer, optional (default=50)
 |      The maximum number of estimators at which boosting

In [36]:
# import sklearn
help(sklearn.linear_model.LinearRegression)

Help on class LinearRegression in module sklearn.linear_model.base:

class LinearRegression(LinearModel, sklearn.base.RegressorMixin)
 |  Ordinary least squares Linear Regression.
 |  
 |  Parameters
 |  ----------
 |  fit_intercept : boolean, optional
 |      whether to calculate the intercept for this model. If set
 |      to false, no intercept will be used in calculations
 |      (e.g. data is expected to be already centered).
 |  
 |  normalize : boolean, optional, default False
 |      If True, the regressors X will be normalized before regression.
 |      This parameter is ignored when `fit_intercept` is set to False.
 |      When the regressors are normalized, note that this makes the
 |      hyperparameters learnt more robust and almost independent of the number
 |      of samples. The same property is not valid for standardized data.
 |      However, if you wish to standardize, please use
 |      `preprocessing.StandardScaler` before calling `fit` on an estimator
 |      with