In [1]:
import time
import numpy as np
import pandas as pd

from scipy import sparse
from scipy.stats.mstats import gmean
from datetime import datetime
from sklearn import preprocessing
from scipy.stats import skew, boxcox,boxcox_normmax
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from bayes_opt import BayesianOptimization
from sklearn.metrics import log_loss

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import xgboost as xgb

seed = 1234



# Load Data

In [6]:
# https://www.kaggle.com/brandenkmurray/two-sigma-connect-rental-listing-inquiries/it-is-lit/comments
# param <- list(booster="gbtree",
#               objective="multi:softprob",
#               eval_metric="mlogloss",
#               nthread=13,
#               num_class=3,
#               eta = .02,
#               gamma = 1,
#               max_depth = 4,
#               min_child_weight = 1,
#               subsample = .7,
#               colsample_bytree = .5
# )
# xgb2 <- xgb.train(data = dtrain,
#                   params = param,
#                   nrounds = 2710
# )

data_path = "../input/"
train_X = pd.read_csv(data_path + 'train_BrandenMurray_MedianBedroom.csv')
test_X = pd.read_csv(data_path + 'test_BrandenMurray_MedianBedroom.csv')
train_y = np.ravel(pd.read_csv(data_path + 'labels_BrandenMurray.csv'))

# all_features = features_to_use + desc_sparse_cols + feat_sparse_cols
print train_X.shape, test_X.shape, train_y.shape

(49352, 287) (74659, 287) (49352,)


In [7]:
X_train, X_val, y_train, y_val = train_test_split(train_X, train_y, train_size=.80, random_state=1234)
print X_train.shape
print X_val.shape
# xgtrain = xgb.DMatrix(X_train, label=y_train)

(39481, 287)
(9871, 287)


In [9]:
rgr = xgb.XGBClassifier(objective = 'multi:softprob',
                       learning_rate = 0.1,
                       n_estimators = 10000,
                       nthread = -1)

rgr.fit(X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
#         num_class = 3,
        early_stopping_rounds=50,
        verbose=25
       )

[0]	validation_0-mlogloss:1.03729
Will train until validation_0-mlogloss hasn't improved in 50 rounds.
[25]	validation_0-mlogloss:0.639315
[50]	validation_0-mlogloss:0.599979
[75]	validation_0-mlogloss:0.585746
[100]	validation_0-mlogloss:0.577073
[125]	validation_0-mlogloss:0.570918
[150]	validation_0-mlogloss:0.566397
[175]	validation_0-mlogloss:0.562536
[200]	validation_0-mlogloss:0.559787
[225]	validation_0-mlogloss:0.557115
[250]	validation_0-mlogloss:0.555306
[275]	validation_0-mlogloss:0.553834
[300]	validation_0-mlogloss:0.552463
[325]	validation_0-mlogloss:0.55131
[350]	validation_0-mlogloss:0.550431
[375]	validation_0-mlogloss:0.549402
[400]	validation_0-mlogloss:0.548734
[425]	validation_0-mlogloss:0.547937
[450]	validation_0-mlogloss:0.547289
[475]	validation_0-mlogloss:0.546859
[500]	validation_0-mlogloss:0.546325
[525]	validation_0-mlogloss:0.545862
[550]	validation_0-mlogloss:0.545363
[575]	validation_0-mlogloss:0.544975
[600]	validation_0-mlogloss:0.544676
[625]	validat

XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=10000, nthread=-1,
       objective='multi:softprob', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [13]:
# pred_y = rgr.predict_proba(test_X, ntree_limit = rgr.best_iteration)

In [14]:
# now = datetime.now()
# sub_name = '../output/sub_xgb_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'

# out_df = pd.DataFrame(pred_y[:,:3])
# out_df.columns = ["high", "medium", "low"]
# out_df["listing_id"] = test_df.listing_id.values
# out_df.to_csv(sub_name, index=False)

# Tune XGBoost

In [10]:
learning_rate = 0.1
best_score = 1000
train_param = 0
for x in [3,4,5,6,7]:
    rgr = xgb.XGBClassifier(
        objective='multi:softprob',
        seed = 1234, # use a fixed seed during tuning so we can reproduce the results
        learning_rate = learning_rate,
        n_estimators = 10000,
        max_depth= x,
        nthread = -1,
        silent = False
    )
    rgr.fit(
        X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
        early_stopping_rounds=50,
        verbose=False
    )
    
    if rgr.best_score < best_score:
        best_score = rgr.best_score
        train_param = x

    print x, '\t', rgr.best_score, rgr.best_iteration

3 	0.541145 1079
4 	0.540671 685
5 	0.540202 367
6 	0.541736 347
7 	0.542972 206


In [11]:
max_depth = train_param
print train_param
# 3 	0.542025 1055
# 4 	0.538678 715
# 5 	0.540994 424
# 6 	0.541306 311

5


In [12]:
train_param = 1
for x in [2,4,8,16]:
    rgr = xgb.XGBClassifier(
        objective='multi:softprob',
        seed = 1234, # use a fixed seed during tuning so we can reproduce the results
        learning_rate = learning_rate,
        n_estimators = 10000,
        max_depth= max_depth,
        nthread = -1,
        silent = False,
        min_child_weight = x
    )
    rgr.fit(
        X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
        early_stopping_rounds=50,
        verbose=False
    )
    
    if rgr.best_score < best_score:
        best_score = rgr.best_score
        train_param = x
        

    print x, '\t', rgr.best_score, rgr.best_iteration

2 	0.539267 482
4 	0.54015 439
8 	0.538771 446
16 	0.539292 432


In [13]:
min_child_weight = train_param
print min_child_weight

8


In [14]:
train_param = 1
for x in [0.3,0.4,0.5,0.6,0.7,0.8,0.9]:
    rgr = xgb.XGBClassifier(
        objective='multi:softprob',
        seed = 1234, # use a fixed seed during tuning so we can reproduce the results
        learning_rate = learning_rate,
        n_estimators = 10000,
        max_depth= max_depth,
        nthread = -1,
        silent = False,
        min_child_weight = min_child_weight,
        colsample_bytree = x
    )
    rgr.fit(
        X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
        early_stopping_rounds=50,
        verbose=False
    )

    if rgr.best_score < best_score:
        best_score = rgr.best_score
        train_param = x
        

    print x, '\t', rgr.best_score, rgr.best_iteration

0.3 	0.536524 510
0.4 	0.536052 538
0.5 	0.53706 453
0.6 	0.536639 429
0.7 	0.537979 577
0.8 	0.538422 492
0.9 	0.538766 437


In [15]:
colsample_bytree = train_param
print train_param

0.4


In [16]:
train_param = 1
for x in [0.5,0.6,0.7,0.8,0.9]:
    rgr = xgb.XGBClassifier(
        objective='multi:softprob',
        seed = 1234, # use a fixed seed during tuning so we can reproduce the results
        learning_rate = learning_rate,
        n_estimators = 10000,
        max_depth= max_depth,
        nthread = -1,
        silent = False,
        min_child_weight = min_child_weight,
        colsample_bytree = colsample_bytree,
        subsample = x
    )
    rgr.fit(
        X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
        early_stopping_rounds=50,
        verbose=False
    )
    if rgr.best_score < best_score:
        best_score = rgr.best_score
        train_param = x
        

    print x, '\t', rgr.best_score, rgr.best_iteration

0.5 	0.537281 555
0.6 	0.537457 408
0.7 	0.537962 456
0.8 	0.537343 470
0.9 	0.536484 456


In [17]:
subsample = train_param
print train_param

1


In [18]:
train_param = 0
for x in [0.3, 0.6, 0.9, 1.2, 1.5, 1.8, 2.1, 2.4, 2.7, 3.0]:
    rgr = xgb.XGBClassifier(
        objective='multi:softprob',
        seed = 1234, # use a fixed seed during tuning so we can reproduce the results
        learning_rate = learning_rate,
        n_estimators = 10000,
        max_depth= max_depth,
        nthread = -1,
        silent = False,
        min_child_weight = min_child_weight,
        colsample_bytree = colsample_bytree,
        subsample = subsample,
        gamma = x
    )
    rgr.fit(
        X_train,y_train,
        eval_set=[(X_val,y_val)],
        eval_metric='mlogloss',
        early_stopping_rounds=50,
        verbose=False
    )

    if rgr.best_score < best_score:
        best_score = rgr.best_score
        train_param = x
        

    print x, '\t', rgr.best_score, rgr.best_iteration

0.3 	0.536349 518
0.6 	0.535914 415
0.9 	0.537153 386
1.2 	0.534944 569
1.5 	0.534998 665
1.8 	0.535051 839
2.1 	0.536308 730
2.4 	0.536679 638
2.7 	0.537926 594
3.0 	0.537102 1128


In [19]:
gamma = train_param
print gamma

1.2


In [23]:
xgtrain = xgb.DMatrix(train_X, label=train_y) 

def xgb_evaluate(min_child_weight, colsample_bytree, max_depth, subsample, gamma):
    params = dict()
    params['objective']='multi:softprob'
    params['eval_metric']='mlogloss',
    params['num_class']=3
    params['silent']=1
    params['eta'] = 0.1
    params['verbose_eval'] = True
    params['min_child_weight'] = int(min_child_weight)
    params['colsample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    
    cv_result = xgb.cv(
        params, xgtrain, 
        num_boost_round=10000, nfold=5,
        metrics = 'mlogloss',
        seed=seed,callbacks=[xgb.callback.early_stop(50)]
    )
    
    return -cv_result['test-mlogloss-mean'].values[-1]


xgb_BO = BayesianOptimization(
    xgb_evaluate, 
    {
        'max_depth': (3,6),
        'min_child_weight': (4,24),
        'colsample_bytree': (0.3,0.7),
        'subsample': (0.6,1),
        'gamma': (0.9,1.8)
    }
)

xgb_BO.maximize(init_points=10, n_iter=40)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1205]	train-mlogloss:0.427454+0.00102843	test-mlogloss:0.532945+0.00617053

    1 | 28m42s | [35m  -0.53295[0m | [32m            0.6829[0m | [32m   1.0781[0m | [32m     3.2980[0m | [32m           16.7086[0m | [32m     0.8516[0m | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[637]	train-mlogloss:0.397506+0.00203644	test-mlogloss:0.531582+0.00554304

    2 | 13m41s | [35m  -0.53158[0m | [32m            0.3249[0m |

  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


[31mBayesian Optimization[0m
[94m---------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   colsample_bytree |     gamma |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[506]	train-mlogloss:0.375411+0.00164837	test-mlogloss:0.531541+0.00524772



  z = (mean - y_max - xi)/std


   11 | 20m41s |   -0.53154 |             0.6875 |    0.9604 |      5.9504 |             8.8471 |      0.8523 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[484]	train-mlogloss:0.385413+0.00135268	test-mlogloss:0.531736+0.00568944



  " state: %s" % convergence_dict)


   12 | 19m30s |   -0.53174 |             0.6803 |    0.9324 |      5.9029 |            12.8308 |      0.7543 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[743]	train-mlogloss:0.393696+0.00157638	test-mlogloss:0.531471+0.00541179



  " state: %s" % convergence_dict)


   13 | 23m38s |   -0.53147 |             0.6990 |    1.0065 |      4.0034 |             4.4420 |      0.8578 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[592]	train-mlogloss:0.378915+0.00110277	test-mlogloss:0.530777+0.00471478

   14 | 14m18s | [35m  -0.53078[0m | [32m            0.3986[0m | [32m   1.0701[0m | [32m     5.7887[0m | [32m            6.7974[0m | [32m     0.9865[0m | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[579]	train-mlogloss:0.399911+0.00102711	test-mlogloss:0.531364+0.00530424

   15 | 11m53s |   -0.53136 |             0.3197 |    1.1727 |      5.5625 |            16.9293 |      0.9963 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train 

  " state: %s" % convergence_dict)


   18 | 17m57s |   -0.53134 |             0.5728 |    0.9333 |      4.6079 |             5.9521 |      0.9118 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[446]	train-mlogloss:0.413639+0.00114097	test-mlogloss:0.532705+0.00540294

   19 | 16m21s |   -0.53270 |             0.6104 |    1.6891 |      5.9865 |            22.8194 |      0.6482 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[592]	train-mlogloss:0.378915+0.00110277	test-mlogloss:0.530777+0.00471478



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   20 | 14m33s |   -0.53078 |             0.3986 |    1.0701 |      5.7887 |             6.7974 |      0.9865 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[593]	train-mlogloss:0.378865+0.00115947	test-mlogloss:0.531202+0.00422823



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   21 | 14m09s |   -0.53120 |             0.3986 |    1.0701 |      5.7887 |             6.7974 |      0.9865 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[541]	train-mlogloss:0.38442+0.00114264	test-mlogloss:0.53118+0.00511172



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   22 | 14m07s |   -0.53118 |             0.4301 |    1.0513 |      5.8121 |             8.7052 |      0.9436 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[599]	train-mlogloss:0.381896+0.000749397	test-mlogloss:0.5316+0.00491681



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   23 | 17m00s |   -0.53160 |             0.4990 |    1.0701 |      5.8591 |            12.5281 |      0.9910 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[923]	train-mlogloss:0.394632+0.00109095	test-mlogloss:0.531387+0.00637279



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   24 | 17m17s |   -0.53139 |             0.3657 |    1.0134 |      4.8595 |            12.5288 |      0.8577 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[596]	train-mlogloss:0.369095+0.00127326	test-mlogloss:0.530776+0.00512298



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   25 | 13m52s | [35m  -0.53078[0m | [32m            0.3657[0m | [32m   1.0134[0m | [32m     5.8591[0m | [32m            5.8650[0m | [32m     0.8577[0m | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[549]	train-mlogloss:0.39139+0.00153374	test-mlogloss:0.532499+0.00573301



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   26 | 12m57s |   -0.53250 |             0.3534 |    1.1478 |      5.6141 |            11.5405 |      0.6067 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[557]	train-mlogloss:0.381549+0.00113504	test-mlogloss:0.531427+0.00474098



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   27 | 13m17s |   -0.53143 |             0.3876 |    1.0512 |      5.4792 |             6.4870 |      0.9436 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[997]	train-mlogloss:0.389656+0.000884437	test-mlogloss:0.530894+0.00551061



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   28 | 22m38s |   -0.53089 |             0.4840 |    1.7005 |      4.2737 |             6.7829 |      0.9077 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[610]	train-mlogloss:0.384275+0.00130714	test-mlogloss:0.532103+0.00476793



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   29 | 22m43s |   -0.53210 |             0.7000 |    1.8000 |      5.7887 |             6.7974 |      0.9865 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1203]	train-mlogloss:0.444486+0.000907417	test-mlogloss:0.534178+0.00588448



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   30 | 16m22s |   -0.53418 |             0.3189 |    1.5695 |      3.1005 |            13.6053 |      0.8533 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[557]	train-mlogloss:0.3817+0.00128054	test-mlogloss:0.530878+0.00533998



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   31 | 13m19s |   -0.53088 |             0.3859 |    1.0483 |      5.8158 |             6.4381 |      0.9368 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[804]	train-mlogloss:0.41027+0.00158936	test-mlogloss:0.532484+0.00497045



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   32 | 14m36s |   -0.53248 |             0.3657 |    1.0134 |      4.8596 |             5.8654 |      0.9910 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1420]	train-mlogloss:0.424942+0.000889592	test-mlogloss:0.533676+0.00555366



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   33 | 29m21s |   -0.53368 |             0.5728 |    1.7886 |      3.9195 |            20.4737 |      0.7495 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[529]	train-mlogloss:0.384256+0.00488817	test-mlogloss:0.531452+0.00481936



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   34 | 23m10s |   -0.53145 |             0.7000 |    1.8000 |      6.0000 |            21.5976 |      1.0000 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[553]	train-mlogloss:0.383517+0.00121879	test-mlogloss:0.531165+0.00550243



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   35 | 13m17s |   -0.53116 |             0.3952 |    1.0643 |      5.6928 |             6.7013 |      0.9732 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1203]	train-mlogloss:0.431344+0.00131937	test-mlogloss:0.532716+0.00570769



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   36 | 21m38s |   -0.53272 |             0.4715 |    1.7255 |      3.8450 |             4.4356 |      0.7615 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[633]	train-mlogloss:0.371561+0.00107631	test-mlogloss:0.530891+0.00565203



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   37 | 15m55s |   -0.53089 |             0.4320 |    1.0513 |      5.4795 |             8.7048 |      0.9880 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[780]	train-mlogloss:0.406915+0.00099274	test-mlogloss:0.53258+0.00483937



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   38 | 17m56s |   -0.53258 |             0.4990 |    1.0135 |      4.8597 |             5.8655 |      0.9910 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[730]	train-mlogloss:0.410346+0.00125086	test-mlogloss:0.5316+0.0053928



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   39 | 13m54s |   -0.53160 |             0.3657 |    1.0135 |      4.8598 |             5.8656 |      0.8577 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[358]	train-mlogloss:0.362431+0.00208714	test-mlogloss:0.532425+0.00469067



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   40 | 16m25s |   -0.53243 |             0.7000 |    0.9000 |      6.0000 |             4.0000 |      1.0000 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[824]	train-mlogloss:0.390764+0.00151752	test-mlogloss:0.531174+0.00552283



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   41 | 19m55s |   -0.53117 |             0.4990 |    1.0135 |      4.8598 |             5.8656 |      0.8577 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[396]	train-mlogloss:0.41507+0.00160692	test-mlogloss:0.53278+0.00562903



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   42 | 09m43s |   -0.53278 |             0.3000 |    1.8000 |      6.0000 |            24.0000 |      0.6000 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1579]	train-mlogloss:0.450409+0.00830093	test-mlogloss:0.534857+0.00496978



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   43 | 34m36s |   -0.53486 |             0.7000 |    0.9000 |      3.3918 |             4.0000 |      1.0000 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1257]	train-mlogloss:0.420553+0.00136638	test-mlogloss:0.533485+0.00524077



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   44 | 31m23s |   -0.53348 |             0.6996 |    0.9474 |      3.6823 |             4.2043 |      0.9336 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[1207]	train-mlogloss:0.421971+0.00137357	test-mlogloss:0.53316+0.00542452



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   45 | 28m42s |   -0.53316 |             0.6994 |    0.9737 |      3.8432 |             4.3175 |      0.8967 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[594]	train-mlogloss:0.378159+0.00139861	test-mlogloss:0.531278+0.00497356



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   46 | 16m52s |   -0.53128 |             0.4991 |    1.3134 |      5.8591 |             5.8650 |      0.9910 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[861]	train-mlogloss:0.404911+0.000978821	test-mlogloss:0.532632+0.0048837



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   47 | 19m44s |   -0.53263 |             0.4990 |    1.0135 |      4.8598 |            12.5270 |      0.9910 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[621]	train-mlogloss:0.375789+0.00118615	test-mlogloss:0.531265+0.00507927



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   48 | 16m14s |   -0.53127 |             0.4526 |    1.0451 |      5.2038 |            10.4052 |      0.9891 | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[803]	train-mlogloss:0.37321+0.00317122	test-mlogloss:0.530453+0.00523233



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   49 | 29m53s | [35m  -0.53045[0m | [32m            0.5987[0m | [32m   1.8000[0m | [32m     6.0000[0m | [32m           24.0000[0m | [32m     1.0000[0m | 
Multiple eval metrics have been passed: 'test-mlogloss' will be used for early stopping.

Will train until test-mlogloss hasn't improved in 50 rounds.
Stopping. Best iteration:
[580]	train-mlogloss:0.379122+0.00476401	test-mlogloss:0.531003+0.00466507



  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   50 | 16m00s |   -0.53100 |             0.4063 |    1.5905 |      6.0000 |            24.0000 |      1.0000 | 


In [24]:
xgb_bo_scores = pd.DataFrame([[s[0]['max_depth'],
                               s[0]['min_child_weight'],
                               s[0]['colsample_bytree'],
                               s[0]['subsample'],
                               s[0]['gamma'],
                               s[1]] for s in zip(xgb_BO.res['all']['params'],xgb_BO.res['all']['values'])],
                            columns = ['max_depth',
                                       'min_child_weight',
                                       'colsample_bytree',
                                       'subsample',
                                       'gamma',
                                       'score'])
xgb_bo_scores=xgb_bo_scores.sort_values('score',ascending=False)
xgb_bo_scores.head(10)

Unnamed: 0,max_depth,min_child_weight,colsample_bytree,subsample,gamma,score
38,6.0,24.0,0.5987,1.0,1.8,-0.530453
14,5.859096,5.864988,0.365725,0.857658,1.01342,-0.530776
9,5.788652,6.797388,0.398584,0.986473,1.070124,-0.530777
3,5.788652,6.797386,0.398584,0.986473,1.070125,-0.530777
20,5.815798,6.438087,0.385921,0.936834,1.048273,-0.530878
26,5.479455,8.704757,0.432004,0.987973,1.051262,-0.530891
17,4.273687,6.782921,0.484008,0.90771,1.700541,-0.530894
39,6.0,24.0,0.406348,1.0,1.590456,-0.531003
24,5.692843,6.701279,0.395197,0.973196,1.064279,-0.531165
30,4.859755,5.865581,0.498985,0.85774,1.013456,-0.531174


In [25]:
def xgb_blend(estimators, train_x, train_y, test_x, fold, early_stopping_rounds=0):
    N_params = len(estimators)
    print ("Blend %d estimators for %d folds" % (N_params, fold))
    skf = KFold(n_splits=fold,random_state=seed)
    N_class = len(set(train_y))
        
    train_blend_x = np.zeros((train_x.shape[0], N_class*N_params))
    test_blend_x_mean = np.zeros((test_x.shape[0], N_class*N_params))
    test_blend_x_gmean = np.zeros((test_x.shape[0], N_class*N_params))
    scores = np.zeros ((fold,N_params))
    best_rounds = np.zeros ((fold, N_params))
    
    for j, est in enumerate(estimators):
        est.set_params(objective = 'multi:softprob')
        est.set_params(silent = False)
        est.set_params(learning_rate = 0.02)
        est.set_params(n_estimators=100000)
        
        print ("Model %d: %s" %(j+1, est))

        test_blend_x_j = np.zeros((test_x.shape[0], N_class*fold))
    
        for i, (train_index, val_index) in enumerate(skf.split(train_x)):
            print ("Model %d fold %d" %(j+1,i+1))
            fold_start = time.time() 
            train_x_fold = train_x.iloc[train_index]
            train_y_fold = train_y[train_index]
            val_x_fold = train_x.iloc[val_index]
            val_y_fold = train_y[val_index]      

            est.fit(train_x_fold,train_y_fold,
                    eval_set = [(val_x_fold, val_y_fold)],
                    eval_metric = 'mlogloss',
                    early_stopping_rounds=early_stopping_rounds,
                    verbose=False)
            best_round=est.best_iteration
            best_rounds[i,j]=best_round
            print ("best round %d" % (best_round))
            val_y_predict_fold = est.predict_proba(val_x_fold,ntree_limit=best_round)
            score = log_loss(val_y_fold, val_y_predict_fold)
            print ("Score: ", score)
            scores[i,j]=score
            train_blend_x[val_index, (j*N_class):(j+1)*N_class] = val_y_predict_fold
            
            test_blend_x_j[:,(i*N_class):(i+1)*N_class] = est.predict_proba(test_x,ntree_limit=best_round)
            print ("Model %d fold %d fitting finished in %0.3fs" % (j+1,i+1, time.time() - fold_start))
            
        test_blend_x_mean[:,(j*N_class):(j+1)*N_class] = \
                np.stack([test_blend_x_j[:,range(0,N_class*fold,N_class)].mean(1),
                          test_blend_x_j[:,range(1,N_class*fold,N_class)].mean(1),
                          test_blend_x_j[:,range(2,N_class*fold,N_class)].mean(1)]).T
        
        test_blend_x_gmean[:,(j*N_class):(j+1)*N_class] = \
                np.stack([gmean(test_blend_x_j[:,range(0,N_class*fold,N_class)], axis=1),
                          gmean(test_blend_x_j[:,range(1,N_class*fold,N_class)], axis=1),
                          gmean(test_blend_x_j[:,range(2,N_class*fold,N_class)], axis=1)]).T
            
        print ("Score for model %d is %f" % (j+1,np.mean(scores[:,j])))
    print ("Score for blended models is %f" % (np.mean(scores)))
    return (train_blend_x, test_blend_x_mean, test_blend_x_gmean, scores,best_rounds)


In [26]:
estimators = [xgb.XGBClassifier(max_depth = 6,
                              min_child_weight = 24,
                              colsample_bytree = 0.598700 ,
                              subsample = 1.0 ,
                              gamma = 1.800000),
             xgb.XGBClassifier(max_depth = 5,
                              min_child_weight = 5,
                              colsample_bytree = 0.365725,
                              subsample = 0.857658,
                              gamma = 1.013420),
             xgb.XGBClassifier(max_depth = 5,
                              min_child_weight = 6,
                              colsample_bytree = 0.398584,
                              subsample = 0.986473,
                              gamma = 1.070124),         
             xgb.XGBClassifier(max_depth = 5,
                              min_child_weight = 6,
                              colsample_bytree = 0.385921,
                              subsample = 0.936834,
                              gamma = 1.048273),  
             xgb.XGBClassifier(max_depth = 5,
                              min_child_weight = 8,
                              colsample_bytree = 0.432004,
                              subsample = 0.987973,
                              gamma = 1.051262),                
             ]

#  	max_depth 	min_child_weight 	colsample_bytree 	subsample 	gamma 		score
# 38 	6.000000 	24.000000 			0.598700 			1.000000 	1.800000 	-0.530453
# 14 	5.859096 	5.864988 			0.365725 			0.857658 	1.013420 	-0.530776
# 9 	5.788652 	6.797388 			0.398584 			0.986473 	1.070124 	-0.530777
# 20 	5.815798 	6.438087 			0.385921 			0.936834 	1.048273 	-0.530878
# 26 	5.479455 	8.704757 			0.432004 			0.987973 	1.051262 	-0.530891


(train_blend_x_xgb,
 test_blend_x_xgb_mean,
 test_blend_x_xgb_gmean,
 blend_scores_xgb,
 best_rounds_xgb) = xgb_blend(estimators,
                              train_X,train_y,
                              test_X,
                              10,
                              300)

# print (np.mean(blend_scores_xgb_le,axis=0))
# print (np.mean(best_rounds_xgb_le,axis=0))

Blend 5 estimators for 10 folds
Model 1: XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.5987,
       gamma=1.8, learning_rate=0.02, max_delta_step=0, max_depth=6,
       min_child_weight=24, missing=None, n_estimators=100000, nthread=-1,
       objective='multi:softprob', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=False, subsample=1.0)
Model 1 fold 1
best round 1892
('Score: ', 0.51770716571707309)
Model 1 fold 1 fitting finished in 1047.273s
Model 1 fold 2
best round 4245
('Score: ', 0.5049523217589178)
Model 1 fold 2 fitting finished in 2144.144s
Model 1 fold 3
best round 3386
('Score: ', 0.526998712881667)
Model 1 fold 3 fitting finished in 1756.881s
Model 1 fold 4
best round 7584
('Score: ', 0.50331539304273787)
Model 1 fold 4 fitting finished in 3714.310s
Model 1 fold 5
best round 4466
('Score: ', 0.53408833044042558)
Model 1 fold 5 fitting finished in 2250.308s
Model 1 fold 6
best round 2312
('Score: ', 0.52730921903999495)
Model 1

In [27]:
now = datetime.now()

name_train_blend = '../output/train_blend_xgb_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
name_test_blend_mean = '../output/test_blend_xgb_mean_BM_MB_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
name_test_blend_gmean = '../output/test_blend_xgb_gmean_BM_MB_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'


print (np.mean(blend_scores_xgb,axis=0))
print (np.mean(best_rounds_xgb,axis=0))
np.savetxt(name_train_blend,train_blend_x_xgb, delimiter=",")
np.savetxt(name_test_blend_mean,test_blend_x_xgb_mean, delimiter=",")
np.savetxt(name_test_blend_gmean,test_blend_x_xgb_gmean, delimiter=",")

[ 0.52767865  0.52613636  0.52754395  0.52657041  0.52717787]
[ 3408.6  3357.7  3197.6  3389.7  3351.5]


In [29]:
test_blend_x_xgb_gmean[:,3:6]


array([[  4.92677356e-01,   4.30737419e-01,   7.44820025e-02],
       [  9.63733862e-01,   2.47328961e-02,   1.11683269e-02],
       [  9.58007718e-01,   3.77191319e-02,   3.23068360e-03],
       ..., 
       [  9.81620214e-01,   1.76846564e-02,   5.19009776e-04],
       [  9.77932673e-01,   2.14620419e-02,   2.95030659e-04],
       [  6.10030975e-01,   3.52669469e-01,   3.46001805e-02]])

In [30]:
test_blend_x_xgb_gmean[:,:3]

array([[  4.61991373e-01,   4.66239213e-01,   6.66398217e-02],
       [  9.72825251e-01,   1.68346721e-02,   9.97755912e-03],
       [  9.56019249e-01,   3.89987940e-02,   3.83993967e-03],
       ..., 
       [  9.79178680e-01,   1.97692822e-02,   7.48417182e-04],
       [  9.74714138e-01,   2.45168262e-02,   4.44877343e-04],
       [  5.70693260e-01,   3.94875426e-01,   3.18685029e-02]])

In [31]:
# now = datetime.now()
sub_name = '../output/sub_XGB_mean_BM_MB_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'

out_df = pd.DataFrame(test_blend_x_xgb_mean[:,3:6])
out_df.columns = ["low", "medium", "high"]
out_df["listing_id"] = test_X.listing_id.values
out_df.to_csv(sub_name, index=False)


# ypreds.columns = cols

# df = pd.read_json(open("../input/test.json", "r"))
# ypreds['listing_id'] = df["listing_id"]

# ypreds.to_csv('my_preds.csv', index=None)