## XGBoost

In [610]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV

#modeling
import xgboost as xgb

In [513]:
train = pd.read_csv('data/train_preProcess.csv')
test = pd.read_csv('data/test_preProcess.csv')

In [515]:
train_y = train.SalePrice
train.drop(['SalePrice'], axis=1, inplace=True)

train.drop(['Id'], axis=1, inplace=True)

test_id = test['Id']
test.drop(['Id'], axis=1, inplace=True)

df = pd.concat([train, test], axis= 0)

In [398]:
#data dimension
df.shape

(2916, 79)

### Randomized Grid Search

In [577]:
gbm = xgb.XGBRegressor(nthread=4, random_state=123)

gbm_param_grid = {
    'learning_rate': np.arange(0, 0.15, 0.01),
    'gamma': np.arange(0.001, 0.025, 0.001),
    'max_depth': np.arange(2, 10, 1),
    'n_estimators': np.arange(15, 2000, 50),
    "colsample_bytree": np.arange(0.05, 0.15, 0.01)
}

randomized_reg = RandomizedSearchCV(estimator=gbm, param_distributions= gbm_param_grid, n_iter=100, cv=10,
                                    scoring="neg_mean_squared_error", verbose = 20)

In [578]:
randomized_reg.fit(train, train_y)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 
[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.015099886651343321, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.5s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.012681332990380888, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.2s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.015320000037149948, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    1.8s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.019147653421890905, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    2.5s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.021294436584724154, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    3.1s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.013764452421297686, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    3.7s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.016433805953595358, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    4.4s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.01130387043979422, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    5.0s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.009776599498510181, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001 


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    5.6s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.05, n_estimators=1165, max_depth=9, gamma=0.001, score=-0.016466766072270583, total=   0.5s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 
[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.016390306925386504, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 


[Parallel(n_jobs=1)]: Done  10 out of  10 | elapsed:    6.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  11 out of  11 | elapsed:    6.4s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.010579121789369195, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 
[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.012987666884847343, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 


[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:    6.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    6.7s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.017441760523543303, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 
[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.020103864604606756, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 


[Parallel(n_jobs=1)]: Done  14 out of  14 | elapsed:    6.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  15 out of  15 | elapsed:    7.0s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.012745917626682285, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 
[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.014634898669091203, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 


[Parallel(n_jobs=1)]: Done  16 out of  16 | elapsed:    7.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  17 out of  17 | elapsed:    7.3s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.009756211332775093, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 
[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.008525309593854885, total=   0.0s
[CV] colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014 


[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed:    7.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  19 out of  19 | elapsed:    7.5s remaining:    0.0s


[CV]  colsample_bytree=0.07, learning_rate=0.1, n_estimators=665, max_depth=2, gamma=0.014, score=-0.018495931087038918, total=   0.0s
[CV] colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001 
[CV]  colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001, score=-0.017352687403651604, total=   0.2s
[CV] colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001 
[CV]  colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001, score=-0.011123777580610528, total=   0.2s
[CV] colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001 
[CV]  colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001, score=-0.01409005254751267, total=   0.2s
[CV] colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, gamma=0.001 
[CV]  colsample_bytree=0.06, learning_rate=0.06, n_estimators=1115, max_depth=4, ga

[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022, score=-0.011619201283556426, total=   0.4s
[CV] colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022, score=-0.015137834410944085, total=   0.4s
[CV] colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022, score=-0.012070518521691843, total=   0.4s
[CV] colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022, score=-0.009948928156563718, total=   0.4s
[CV] colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=1565, max_depth=5,

[CV]  colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011, score=-0.012238232801917388, total=   0.3s
[CV] colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011, score=-0.013982048764065464, total=   0.3s
[CV] colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011, score=-0.018046817189495874, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011, score=-0.021179970288067704, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.1, learning_rate=0.01, n_estimators=1115, max_depth=6, gamma=0.

[CV]  colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016, score=-0.016544777067272334, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016 
[CV]  colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016, score=-0.009944698743320353, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016 
[CV]  colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016, score=-0.013261793260620361, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016 
[CV]  colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016, score=-0.018078037248978412, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4, gamma=0.016 
[CV]  colsample_bytree=0.05, learning_rate=0.11, n_estimators=1415, max_depth=4,

[CV]  colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002, score=-0.016461614704977937, total=   0.5s
[CV] colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002 
[CV]  colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002, score=-0.012174968894740533, total=   0.5s
[CV] colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002 
[CV]  colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002, score=-0.010961655512713163, total=   0.5s
[CV] colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002 
[CV]  colsample_bytree=0.06, learning_rate=0.1, n_estimators=1415, max_depth=7, gamma=0.002, score=-0.01813101298179923, total=   0.5s
[CV] colsample_bytree=0.05, learning_rate=0.04, n_estimators=215, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.05, learning_rate=0.04, n_estimators=215, max_depth=3, gamma=0.0

[CV]  colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023, score=-0.016642643912713806, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023, score=-0.020915941131499083, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023, score=-0.011794959714586453, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023, score=-0.013841363941848638, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.07, learning_rate=0.14, n_estimators=715, max_depth=3, gamma=0.

[CV]  colsample_bytree=0.06, learning_rate=0.07, n_estimators=1265, max_depth=3, gamma=0.019, score=-0.016511304095247245, total=   0.2s
[CV] colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022, score=-0.014502959526675479, total=   0.3s
[CV] colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022, score=-0.012025145496385023, total=   0.3s
[CV] colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022, score=-0.013713035068816798, total=   0.3s
[CV] colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4, gamma=0.022 
[CV]  colsample_bytree=0.06, learning_rate=0.01, n_estimators=1615, max_depth=4,

[CV]  colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007, score=-0.012450784952958696, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007 
[CV]  colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007, score=-0.015431086136113195, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007 
[CV]  colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007, score=-0.010830567043773021, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007 
[CV]  colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007, score=-0.009556256696478319, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.007 
[CV]  colsample_bytree=0.06, learning_rate=0.11, n_estimators=715, max_depth=4, gamma=0.

[CV]  colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007, score=-0.011807310624365549, total=   0.1s
[CV] colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007, score=-0.013103528202617799, total=   0.1s
[CV] colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007, score=-0.017348095265536028, total=   0.1s
[CV] colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007, score=-0.021598670491763658, total=   0.1s
[CV] colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.14, n_estimators=665, max_depth=5, gamma=0.

[CV]  colsample_bytree=0.08, learning_rate=0.09, n_estimators=1715, max_depth=3, gamma=0.007, score=-0.010076788811497962, total=   0.3s
[CV] colsample_bytree=0.08, learning_rate=0.09, n_estimators=1715, max_depth=3, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.09, n_estimators=1715, max_depth=3, gamma=0.007, score=-0.00836330356822996, total=   0.3s
[CV] colsample_bytree=0.08, learning_rate=0.09, n_estimators=1715, max_depth=3, gamma=0.007 
[CV]  colsample_bytree=0.08, learning_rate=0.09, n_estimators=1715, max_depth=3, gamma=0.007, score=-0.017153366983904586, total=   0.3s
[CV] colsample_bytree=0.08, learning_rate=0.02, n_estimators=215, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.08, learning_rate=0.02, n_estimators=215, max_depth=4, gamma=0.012, score=-0.041039847536362035, total=   0.0s
[CV] colsample_bytree=0.08, learning_rate=0.02, n_estimators=215, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.08, learning_rate=0.02, n_estimators=215, max_depth=4, gamm

[CV]  colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018, score=-0.017790061347297603, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018 
[CV]  colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018, score=-0.019835002475555495, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018 
[CV]  colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018, score=-0.01223118593375712, total=   0.3s
[CV] colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018 
[CV]  colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018, score=-0.01479678237311692, total=   0.4s
[CV] colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, gamma=0.018 
[CV]  colsample_bytree=0.05, learning_rate=0.07, n_estimators=1565, max_depth=5, g

[CV]  colsample_bytree=0.1, learning_rate=0.12, n_estimators=915, max_depth=3, gamma=0.023, score=-0.016203171544675835, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019, score=-132.3653391673584, total=   0.0s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019, score=-134.02984583135927, total=   0.0s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019, score=-132.91188026036113, total=   0.0s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=765, max_depth=9, gamma=0.019, score=-133.

[CV]  colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019, score=-0.015365979758031788, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019 
[CV]  colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019, score=-0.010401322105220864, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019 
[CV]  colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019, score=-0.011067120926186594, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019 
[CV]  colsample_bytree=0.13, learning_rate=0.06, n_estimators=965, max_depth=7, gamma=0.019, score=-0.01666756495832708, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=315, max_depth=9, gamma=0.014 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=315, max_depth=9, gamma=0.0

[CV]  colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024, score=-132.91188026036113, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024 
[CV]  colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024, score=-133.2200831987446, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024 
[CV]  colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024, score=-133.04943610921782, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024 
[CV]  colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024, score=-133.4964230802154, total=   0.1s
[CV] colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024 
[CV]  colsample_bytree=0.06, learning_rate=0.0, n_estimators=1115, max_depth=3, gamma=0.024, score

[CV]  colsample_bytree=0.14, learning_rate=0.12, n_estimators=1915, max_depth=8, gamma=0.003, score=-0.010399744649543572, total=   1.1s
[CV] colsample_bytree=0.14, learning_rate=0.12, n_estimators=1915, max_depth=8, gamma=0.003 
[CV]  colsample_bytree=0.14, learning_rate=0.12, n_estimators=1915, max_depth=8, gamma=0.003, score=-0.01989458319284069, total=   1.1s
[CV] colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, gamma=0.015 
[CV]  colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, gamma=0.015, score=-0.016216314611431096, total=   0.6s
[CV] colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, gamma=0.015 
[CV]  colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, gamma=0.015, score=-0.010766470433071076, total=   0.6s
[CV] colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, gamma=0.015 
[CV]  colsample_bytree=0.11, learning_rate=0.08, n_estimators=1765, max_depth=5, 

[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005, score=-133.4964230802154, total=   0.0s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005, score=-131.98820182610686, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005, score=-132.23722064108512, total=   0.0s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005, score=-133.89038480904122, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.09, learning_rate=0.0, n_estimators=815, max_depth=3, gamma=0.005, score=-132.37

[CV]  colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011, score=-0.013141850085603765, total=   0.3s
[CV] colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011, score=-0.018088898471994228, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011, score=-0.020657656456706683, total=   0.3s
[CV] colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011, score=-0.011928256203880567, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.011 
[CV]  colsample_bytree=0.11, learning_rate=0.06, n_estimators=965, max_depth=6, gamma=0.

[CV]  colsample_bytree=0.1, learning_rate=0.02, n_estimators=815, max_depth=9, gamma=0.005, score=-0.01696732142225296, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005 
[CV]  colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005, score=-0.014387547209248085, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005 
[CV]  colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005, score=-0.011151690620760758, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005 
[CV]  colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005, score=-0.013627460402070999, total=   0.4s
[CV] colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.005 
[CV]  colsample_bytree=0.13, learning_rate=0.03, n_estimators=965, max_depth=7, gamma=0.00

[CV]  colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018, score=-0.015485823974133379, total=   0.4s
[CV] colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018 
[CV]  colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018, score=-0.01085876079763641, total=   0.4s
[CV] colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018 
[CV]  colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018, score=-0.011104781084181184, total=   0.4s
[CV] colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018 
[CV]  colsample_bytree=0.12, learning_rate=0.06, n_estimators=665, max_depth=9, gamma=0.018, score=-0.01666742639107298, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=1865, max_depth=6, gamma=0.017 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1865, max_depth=6, gamma=0.

[CV]  colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008, score=-0.014851913069552698, total=   0.1s
[CV] colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008 
[CV]  colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008, score=-0.019854210431670287, total=   0.1s
[CV] colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008 
[CV]  colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008, score=-0.023016019862532502, total=   0.1s
[CV] colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008 
[CV]  colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008, score=-0.013438632285204928, total=   0.1s
[CV] colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.008 
[CV]  colsample_bytree=0.05, learning_rate=0.13, n_estimators=615, max_depth=7, gamma=0.

[CV]  colsample_bytree=0.14, learning_rate=0.04, n_estimators=865, max_depth=8, gamma=0.008, score=-0.01665000809294545, total=   0.4s
[CV] colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012, score=-0.014632540747658504, total=   0.3s
[CV] colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012, score=-0.011805404177646153, total=   0.3s
[CV] colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012, score=-0.014000311108441021, total=   0.3s
[CV] colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, gamma=0.012 
[CV]  colsample_bytree=0.09, learning_rate=0.01, n_estimators=1315, max_depth=4, g

[CV]  colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023, score=-0.013130533620791278, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023 
[CV]  colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023, score=-0.015150249799216855, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023 
[CV]  colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023, score=-0.010001731174116598, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023 
[CV]  colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023, score=-0.009265018452666977, total=   0.4s
[CV] colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.023 
[CV]  colsample_bytree=0.1, learning_rate=0.07, n_estimators=1415, max_depth=5, gamma=0.

[CV]  colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008, score=-0.011420264774825802, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008 
[CV]  colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008, score=-0.013740748003813795, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008 
[CV]  colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008, score=-0.01767138018293123, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008 
[CV]  colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008, score=-0.020309515126335168, total=   0.1s
[CV] colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.008 
[CV]  colsample_bytree=0.09, learning_rate=0.03, n_estimators=615, max_depth=3, gamma=0.0

[CV]  colsample_bytree=0.05, learning_rate=0.01, n_estimators=115, max_depth=5, gamma=0.022, score=-13.075435170667289, total=   0.0s
[CV] colsample_bytree=0.05, learning_rate=0.01, n_estimators=115, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.05, learning_rate=0.01, n_estimators=115, max_depth=5, gamma=0.022, score=-13.620784324683381, total=   0.0s
[CV] colsample_bytree=0.05, learning_rate=0.01, n_estimators=115, max_depth=5, gamma=0.022 
[CV]  colsample_bytree=0.05, learning_rate=0.01, n_estimators=115, max_depth=5, gamma=0.022, score=-13.12174492087679, total=   0.0s
[CV] colsample_bytree=0.12, learning_rate=0.07, n_estimators=765, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.12, learning_rate=0.07, n_estimators=765, max_depth=3, gamma=0.023, score=-0.015775931149101495, total=   0.1s
[CV] colsample_bytree=0.12, learning_rate=0.07, n_estimators=765, max_depth=3, gamma=0.023 
[CV]  colsample_bytree=0.12, learning_rate=0.07, n_estimators=765, max_depth=3, gamma=0.023, sc

[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02, score=-0.017981353928274914, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02, score=-0.025335083518297146, total=   0.5s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02, score=-0.011292679877311392, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02, score=-0.0159759912141949, total=   0.4s
[CV] colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.02 
[CV]  colsample_bytree=0.11, learning_rate=0.13, n_estimators=1465, max_depth=5, gamma=0.0

[CV]  colsample_bytree=0.06, learning_rate=0.12, n_estimators=415, max_depth=3, gamma=0.007, score=-0.018655694805192947, total=   0.0s
[CV] colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015 
[CV]  colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015, score=-0.014822351742776562, total=   0.0s
[CV] colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015 
[CV]  colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015, score=-0.01532251856599784, total=   0.0s
[CV] colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015 
[CV]  colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015, score=-0.016173125644963125, total=   0.0s
[CV] colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.015 
[CV]  colsample_bytree=0.14, learning_rate=0.04, n_estimators=165, max_depth=7, gamma=0.0

[CV]  colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005, score=-0.011575419816757544, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005, score=-0.014217988584560238, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005, score=-0.009673278128076881, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005, score=-0.008634002335386203, total=   0.1s
[CV] colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.005 
[CV]  colsample_bytree=0.07, learning_rate=0.04, n_estimators=815, max_depth=3, gamma=0.

[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:  6.4min finished


RandomizedSearchCV(cv=10, error_score='raise',
          estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=4, objective='reg:linear', random_state=123,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
          fit_params=None, iid=True, n_iter=100, n_jobs=1,
          param_distributions={'colsample_bytree': array([ 0.05,  0.06,  0.07,  0.08,  0.09,  0.1 ,  0.11,  0.12,  0.13,  0.14]), 'learning_rate': array([ 0.  ,  0.01,  0.02,  0.03,  0.04,  0.05,  0.06,  0.07,  0.08,
        0.09,  0.1 ,  0.11,  0.12,  0.13,  0.14]), 'n_estimators': array([  15,   65,  115,  ...13,  0.014,  0.015,  0.016,
        0.017,  0.018,  0.019,  0.02 ,  0.021,  0.022,  0.023,  0.024])},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          r

In [579]:
np.sqrt(np.abs(randomized_reg.best_score_))

0.11609401043655639

In [607]:
randomized_reg.best_params_

{'colsample_bytree': 0.060000000000000005,
 'gamma': 0.017000000000000001,
 'learning_rate': 0.029999999999999999,
 'max_depth': 3,
 'n_estimators': 1315}

In [599]:
params = randomized_reg.best_params_

In [541]:
num_train = len(train)
num_val = int(num_train * 0.2)

x_train_all = train
x_train = train[:-num_val]
x_val = train[num_train-num_val:]

ylog_train = train_y[:-num_val]
ylog_val = train_y[num_train-num_val:]

In [542]:
df_columns = train.columns

In [543]:
dtrin_all = xgb.DMatrix(x_train_all, train_y, feature_names= df_columns)
dtrain = xgb.DMatrix(x_train, ylog_train, feature_names= df_columns)
dval = xgb.DMatrix(x_val, ylog_val, feature_names= df_columns)
dtest = xgb.DMatrix(test, feature_names=df_columns)

In [609]:
xgb_model = xgb.train(params, dtrain, num_boost_round= 10000, evals=[(dval, 'val')],
                      early_stopping_rounds = 500, verbose_eval=100)

[0]	val-rmse:11.1944
Will train until val-rmse hasn't improved in 500 rounds.
[100]	val-rmse:0.58409
[200]	val-rmse:0.151682
[300]	val-rmse:0.132253
[400]	val-rmse:0.125653
[500]	val-rmse:0.122912
[600]	val-rmse:0.121325
[700]	val-rmse:0.120151
[800]	val-rmse:0.119531
[900]	val-rmse:0.11866
[1000]	val-rmse:0.118058
[1100]	val-rmse:0.117647
[1200]	val-rmse:0.117253
[1300]	val-rmse:0.117023
[1400]	val-rmse:0.116553
[1500]	val-rmse:0.116405
[1600]	val-rmse:0.116161
[1700]	val-rmse:0.115958
[1800]	val-rmse:0.115726
[1900]	val-rmse:0.115653
[2000]	val-rmse:0.115475
[2100]	val-rmse:0.11518
[2200]	val-rmse:0.115084
[2300]	val-rmse:0.114937
[2400]	val-rmse:0.114887
[2500]	val-rmse:0.114881
[2600]	val-rmse:0.114874
[2700]	val-rmse:0.114812
[2800]	val-rmse:0.114728
[2900]	val-rmse:0.114724
[3000]	val-rmse:0.114649
[3100]	val-rmse:0.114619
[3200]	val-rmse:0.114573
[3300]	val-rmse:0.114528
[3400]	val-rmse:0.114557
[3500]	val-rmse:0.114497
[3600]	val-rmse:0.114477
[3700]	val-rmse:0.114468
[3800]	va

In [603]:
num_boost_round = xgb_model.best_iteration
final_model = xgb.train(dict(params, silent =0), dtrain, num_boost_round=num_boost_round)

In [604]:
ylog_pred = final_model.predict(dtest)
y_pred = np.exp(ylog_pred) - 1

In [606]:
df_sub = pd.DataFrame({'id': test_id, "SalePrice":y_pred})
df_sub.to_csv('final_out.csv', index=False)