# stacking集成算法
## 1 底层算法

In [1]:
from mlxtend.regressor import StackingCVRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn.model_selection import train_test_split as tts
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
kfolds = KFold(n_splits=10, shuffle=True, random_state=123)
alphas_alt = np.logspace(-10, 2.8, 150)
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=alphas_alt, cv=kfolds))
lasso = make_pipeline(RobustScaler(), LassoCV(alphas=alphas_alt, cv=kfolds))
elasticnet = make_pipeline(RobustScaler(), ElasticNetCV(alphas=alphas_alt, cv=kfolds))
xgboost = make_pipeline(RobustScaler(), xgb.XGBRegressor(
                            objective='reg:linear',
                            colsample_bytree=0.7,
                            learning_rate=0.01,
                            max_depth=3,
                            n_estimators=3000,
                            subsample=0.7,
                            reg_alpha=0.0006,
                            nthread=6,
                            gamma=0,
                            scale_pos_weight=1,
                            seed=123))

## 2 上层算法

In [8]:
stack_alg = StackingCVRegressor(regressors=(ridge,lasso,elasticnet,xgboost),meta_regressor=xgboost)

## 3 训练

In [9]:
train = pd.read_csv("./data/train_1.csv")

In [10]:
if 'Unnamed: 0' in train.columns: 
    train = train.drop(['Unnamed: 0'], axis=1)
y = train["SalePrice"]
train1 = train.drop(["Id", "SalePrice"], axis=1)
X = pd.get_dummies(train1).reset_index(drop=True)
X_train, X_test, y_train, y_test = tts(X,y,test_size=0.2,random_state = 123)  

In [11]:
stackX = np.array(X_train)
stacky = np.array(y_train)

In [13]:
stack_alg.fit(stackX, stacky)

StackingCVRegressor(cv=5,
          meta_regressor=Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('xgbregressor', XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.7, gamma=0, learning_rate=0.01, max_delta_step=0,
       max_de...  reg_alpha=0.0006, reg_lambda=1, scale_pos_weight=1, seed=123,
       silent=True, subsample=0.7))]),
          refit=True,
          regressors=(Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('ridgecv', RidgeCV(alphas=array([1.00000e-10, 1.21873e-10, ..., 5.17719e+02, 6.30957e+02]),
    cv=KFold(n_splits=10, random_state=123...reg_alpha=0.0006, reg_lambda=1, scale_pos_weight=1, seed=123,
       silent=True, subsample=0.7))])),
          shuffle=True, store_train_meta_features=False,
          use_features

In [14]:
from sklearn.metrics import mean_squared_error

In [15]:
def benchmark(model,testset,label):
    pred=model.predict(testset)
    if pred[pred<0].shape[0]>0:
        print("Neg Value")
        return -1
    rmse=np.sqrt(mean_squared_error(label,pred))
    lrmse=np.sqrt(mean_squared_error(np.log(label),np.log(abs(pred))))
    print("RMSE:",rmse)
    print("LRMSE:",lrmse)
    return lrmse

In [16]:
benchmark(stack_alg, X_test, y_test)

RMSE: 21530.542931559998
LRMSE: 0.1011383813043952


0.1011383813043952

In [17]:
X_train.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
318,60,90.0,9900,7,5,1993,1993,256.0,987,0,...,0,0,0,1,0,0,0,0,1,0
580,20,72.461024,14585,6,6,1960,1987,85.0,594,219,...,0,0,0,1,0,0,0,0,1,0
961,60,66.34546,12227,6,7,1977,1995,424.0,896,0,...,0,0,0,1,0,0,0,0,1,0
78,90,72.0,10778,4,5,1968,1968,0.0,0,0,...,0,0,0,1,0,0,0,0,1,0
5,50,85.0,14115,5,5,1993,1995,0.0,732,0,...,0,0,0,1,0,0,0,0,1,0
