In [2]:
from __future__ import division
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from time import time
from catboost import CatBoostClassifier
import gc
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from scipy.stats import randint as sp_randint
from hyperopt import hp, tpe, STATUS_OK, Trials, fmin

start = time()
train = pd.read_csv('D:/Driver/ohe_train_v2.csv',na_values=-1)
#test = pd.read_csv('D:/Driver/ohe_test_v2.csv',na_values=-1)

unwanted = train.columns[train.columns.str.startswith('ps_calc_')]
train = train.drop(unwanted, axis=1)
#test = test.drop(unwanted, axis=1)

X = train.drop(['id','target'], axis=1)
features = X.columns
X = train[features].values
y = train['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)

def objective(space):
    clf = xgb.XGBClassifier(n_estimators=2000,
                            max_depth=space['max_depth'],
                            min_child_weight=space['min_child_weight'],
                            subsample=space['subsample'],
                            learning_rate=space['learning_rate'],
                            gamma=space['gamma'],
                            colsample_bytree=space['colsample_bytree'],
                            max_delta_step=space['max_delta_step'],
                            objective='binary:logistic',
                            random_state= 2017
                            )
    eval_set = [(X_test, y_test)]

    clf.fit(X_train,
            y_train,
            eval_set=eval_set,
            eval_metric='auc',
            early_stopping_rounds=100)

    pred = clf.predict_proba(X_test)[:,1]
    #   mae = mean_absolute_error(np.exp(y_valid), np.exp(pred))
    auc = roc_auc_score(y_test, pred)

    #    print "SCORE:", mae
    return {'loss': 1-auc, 'status': STATUS_OK}
    # skf = StratifiedKFold(n_splits=4, random_state=2017)
    #
    # scores = cross_val_score(clf, X, y,
    #                          cv=skf, scoring='roc_auc',n_jobs=-1)
    #
    # return 1 - scores.mean()

"""

{'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_delta_step': 1.9000000000000001, 
'subsample': 0.7000000000000001, 'min_child': 18.0, 'max_depth': 5, 'gamma': 0.75}
"""
space = {
    'max_depth': hp.choice('max_depth', np.arange(5, 20, dtype=int)),
    'min_child_weight': hp.quniform('min_child', 5, 20, 1),
    'subsample': hp.quniform('subsample', 0.7, 1, 0.05),
    'learning_rate': hp.quniform('learning_rate', 0.025, 0.5, 0.025),
    'gamma': hp.quniform('gamma', 0.5, 1, 0.05),
    'colsample_bytree': hp.quniform('colsample_bytree', 0.4, 1.0, 0.05),
    'max_delta_step': hp.quniform('max_delta_step', 1, 3, 0.1)
}

trials = Trials()
best = fmin(objective,
            space,
            algo=tpe.suggest,
            max_evals=30,  # change
            trials=trials)

print(best)
print('Time: {} mins'.format((time()-start)/60))

[0]	validation_0-auc:0.500985
Will train until validation_0-auc hasn't improved in 100 rounds.
[1]	validation_0-auc:0.500985
[2]	validation_0-auc:0.500985
[3]	validation_0-auc:0.50115
[4]	validation_0-auc:0.502004
[5]	validation_0-auc:0.528709
[6]	validation_0-auc:0.590159
[7]	validation_0-auc:0.611886
[8]	validation_0-auc:0.615683
[9]	validation_0-auc:0.621217
[10]	validation_0-auc:0.620278
[11]	validation_0-auc:0.622689
[12]	validation_0-auc:0.623701
[13]	validation_0-auc:0.624235
[14]	validation_0-auc:0.62659
[15]	validation_0-auc:0.629381
[16]	validation_0-auc:0.630053
[17]	validation_0-auc:0.63051
[18]	validation_0-auc:0.631312
[19]	validation_0-auc:0.632489
[20]	validation_0-auc:0.632545
[21]	validation_0-auc:0.632736
[22]	validation_0-auc:0.633493
[23]	validation_0-auc:0.633853
[24]	validation_0-auc:0.633877
[25]	validation_0-auc:0.634282
[26]	validation_0-auc:0.634689
[27]	validation_0-auc:0.634355
[28]	validation_0-auc:0.634295
[29]	validation_0-auc:0.6352
[30]	validation_0-au

[12]	validation_0-auc:0.63141
[13]	validation_0-auc:0.63197
[14]	validation_0-auc:0.632095
[15]	validation_0-auc:0.631888
[16]	validation_0-auc:0.632412
[17]	validation_0-auc:0.633013
[18]	validation_0-auc:0.633184
[19]	validation_0-auc:0.633343
[20]	validation_0-auc:0.63313
[21]	validation_0-auc:0.633093
[22]	validation_0-auc:0.632169
[23]	validation_0-auc:0.632123
[24]	validation_0-auc:0.632337
[25]	validation_0-auc:0.631489
[26]	validation_0-auc:0.631147
[27]	validation_0-auc:0.630317
[28]	validation_0-auc:0.629359
[29]	validation_0-auc:0.629141
[30]	validation_0-auc:0.62889
[31]	validation_0-auc:0.6287
[32]	validation_0-auc:0.628491
[33]	validation_0-auc:0.628349
[34]	validation_0-auc:0.628307
[35]	validation_0-auc:0.628491
[36]	validation_0-auc:0.628698
[37]	validation_0-auc:0.628407
[38]	validation_0-auc:0.627623
[39]	validation_0-auc:0.626481
[40]	validation_0-auc:0.626124
[41]	validation_0-auc:0.625893
[42]	validation_0-auc:0.625491
[43]	validation_0-auc:0.62531
[44]	validation

[46]	validation_0-auc:0.637077
[47]	validation_0-auc:0.637278
[48]	validation_0-auc:0.63731
[49]	validation_0-auc:0.637104
[50]	validation_0-auc:0.636735
[51]	validation_0-auc:0.636623
[52]	validation_0-auc:0.63653
[53]	validation_0-auc:0.636247
[54]	validation_0-auc:0.636305
[55]	validation_0-auc:0.635909
[56]	validation_0-auc:0.635673
[57]	validation_0-auc:0.635242
[58]	validation_0-auc:0.635653
[59]	validation_0-auc:0.635466
[60]	validation_0-auc:0.635304
[61]	validation_0-auc:0.635474
[62]	validation_0-auc:0.635287
[63]	validation_0-auc:0.635135
[64]	validation_0-auc:0.634875
[65]	validation_0-auc:0.635152
[66]	validation_0-auc:0.635589
[67]	validation_0-auc:0.635399
[68]	validation_0-auc:0.635348
[69]	validation_0-auc:0.634866
[70]	validation_0-auc:0.634301
[71]	validation_0-auc:0.633854
[72]	validation_0-auc:0.63373
[73]	validation_0-auc:0.633613
[74]	validation_0-auc:0.633611
[75]	validation_0-auc:0.633753
[76]	validation_0-auc:0.633686
[77]	validation_0-auc:0.633816
[78]	valida

[7]	validation_0-auc:0.612044
[8]	validation_0-auc:0.61174
[9]	validation_0-auc:0.61139
[10]	validation_0-auc:0.612326
[11]	validation_0-auc:0.61175
[12]	validation_0-auc:0.610421
[13]	validation_0-auc:0.610348
[14]	validation_0-auc:0.609387
[15]	validation_0-auc:0.610067
[16]	validation_0-auc:0.609501
[17]	validation_0-auc:0.610148
[18]	validation_0-auc:0.6093
[19]	validation_0-auc:0.608186
[20]	validation_0-auc:0.607141
[21]	validation_0-auc:0.606006
[22]	validation_0-auc:0.605434
[23]	validation_0-auc:0.604972
[24]	validation_0-auc:0.604296
[25]	validation_0-auc:0.602933
[26]	validation_0-auc:0.603472
[27]	validation_0-auc:0.603049
[28]	validation_0-auc:0.603055
[29]	validation_0-auc:0.602551
[30]	validation_0-auc:0.601568
[31]	validation_0-auc:0.600115
[32]	validation_0-auc:0.599998
[33]	validation_0-auc:0.600231
[34]	validation_0-auc:0.599148
[35]	validation_0-auc:0.598643
[36]	validation_0-auc:0.597538
[37]	validation_0-auc:0.597609
[38]	validation_0-auc:0.597291
[39]	validation_

[60]	validation_0-auc:0.634648
[61]	validation_0-auc:0.63502
[62]	validation_0-auc:0.635081
[63]	validation_0-auc:0.635236
[64]	validation_0-auc:0.63549
[65]	validation_0-auc:0.635896
[66]	validation_0-auc:0.63579
[67]	validation_0-auc:0.636051
[68]	validation_0-auc:0.636172
[69]	validation_0-auc:0.636183
[70]	validation_0-auc:0.636303
[71]	validation_0-auc:0.636491
[72]	validation_0-auc:0.636456
[73]	validation_0-auc:0.636747
[74]	validation_0-auc:0.637294
[75]	validation_0-auc:0.637422
[76]	validation_0-auc:0.637627
[77]	validation_0-auc:0.637867
[78]	validation_0-auc:0.63775
[79]	validation_0-auc:0.637955
[80]	validation_0-auc:0.638074
[81]	validation_0-auc:0.638468
[82]	validation_0-auc:0.638536
[83]	validation_0-auc:0.638518
[84]	validation_0-auc:0.638483
[85]	validation_0-auc:0.638666
[86]	validation_0-auc:0.638989
[87]	validation_0-auc:0.63887
[88]	validation_0-auc:0.639005
[89]	validation_0-auc:0.638896
[90]	validation_0-auc:0.638921
[91]	validation_0-auc:0.639056
[92]	validati

[76]	validation_0-auc:0.627453
[77]	validation_0-auc:0.627769
[78]	validation_0-auc:0.627577
[79]	validation_0-auc:0.627041
[80]	validation_0-auc:0.626818
[81]	validation_0-auc:0.626786
[82]	validation_0-auc:0.626909
[83]	validation_0-auc:0.626562
[84]	validation_0-auc:0.626533
[85]	validation_0-auc:0.626466
[86]	validation_0-auc:0.626357
[87]	validation_0-auc:0.626077
[88]	validation_0-auc:0.62576
[89]	validation_0-auc:0.625639
[90]	validation_0-auc:0.625665
[91]	validation_0-auc:0.62558
[92]	validation_0-auc:0.625422
[93]	validation_0-auc:0.62561
[94]	validation_0-auc:0.625501
[95]	validation_0-auc:0.625271
[96]	validation_0-auc:0.625368
[97]	validation_0-auc:0.625308
[98]	validation_0-auc:0.625161
[99]	validation_0-auc:0.624899
[100]	validation_0-auc:0.624379
[101]	validation_0-auc:0.623902
[102]	validation_0-auc:0.623931
[103]	validation_0-auc:0.623854
[104]	validation_0-auc:0.623893
[105]	validation_0-auc:0.623595
[106]	validation_0-auc:0.623247
[107]	validation_0-auc:0.62306
[108

[0]	validation_0-auc:0.599287
Will train until validation_0-auc hasn't improved in 100 rounds.
[1]	validation_0-auc:0.609181
[2]	validation_0-auc:0.614006
[3]	validation_0-auc:0.613248
[4]	validation_0-auc:0.606619
[5]	validation_0-auc:0.604693
[6]	validation_0-auc:0.602391
[7]	validation_0-auc:0.600454
[8]	validation_0-auc:0.602338
[9]	validation_0-auc:0.60194
[10]	validation_0-auc:0.603506
[11]	validation_0-auc:0.602802
[12]	validation_0-auc:0.60074
[13]	validation_0-auc:0.602426
[14]	validation_0-auc:0.601736
[15]	validation_0-auc:0.601274
[16]	validation_0-auc:0.600957
[17]	validation_0-auc:0.599688
[18]	validation_0-auc:0.599206
[19]	validation_0-auc:0.598247
[20]	validation_0-auc:0.597648
[21]	validation_0-auc:0.596715
[22]	validation_0-auc:0.596476
[23]	validation_0-auc:0.595872
[24]	validation_0-auc:0.594751
[25]	validation_0-auc:0.594075
[26]	validation_0-auc:0.59321
[27]	validation_0-auc:0.591735
[28]	validation_0-auc:0.59093
[29]	validation_0-auc:0.5907
[30]	validation_0-auc

[36]	validation_0-auc:0.627246
[37]	validation_0-auc:0.627267
[38]	validation_0-auc:0.626536
[39]	validation_0-auc:0.625841
[40]	validation_0-auc:0.625429
[41]	validation_0-auc:0.62552
[42]	validation_0-auc:0.625549
[43]	validation_0-auc:0.625559
[44]	validation_0-auc:0.625528
[45]	validation_0-auc:0.625956
[46]	validation_0-auc:0.625998
[47]	validation_0-auc:0.625859
[48]	validation_0-auc:0.625911
[49]	validation_0-auc:0.625427
[50]	validation_0-auc:0.625019
[51]	validation_0-auc:0.62483
[52]	validation_0-auc:0.624551
[53]	validation_0-auc:0.624522
[54]	validation_0-auc:0.624211
[55]	validation_0-auc:0.623737
[56]	validation_0-auc:0.623735
[57]	validation_0-auc:0.623278
[58]	validation_0-auc:0.622974
[59]	validation_0-auc:0.623154
[60]	validation_0-auc:0.622822
[61]	validation_0-auc:0.622733
[62]	validation_0-auc:0.622298
[63]	validation_0-auc:0.622381
[64]	validation_0-auc:0.622546
[65]	validation_0-auc:0.622462
[66]	validation_0-auc:0.622102
[67]	validation_0-auc:0.621452
[68]	valid

[57]	validation_0-auc:0.613795
[58]	validation_0-auc:0.613235
[59]	validation_0-auc:0.613105
[60]	validation_0-auc:0.612779
[61]	validation_0-auc:0.612564
[62]	validation_0-auc:0.611496
[63]	validation_0-auc:0.611367
[64]	validation_0-auc:0.611312
[65]	validation_0-auc:0.610461
[66]	validation_0-auc:0.610607
[67]	validation_0-auc:0.609949
[68]	validation_0-auc:0.60949
[69]	validation_0-auc:0.609122
[70]	validation_0-auc:0.609512
[71]	validation_0-auc:0.609577
[72]	validation_0-auc:0.60943
[73]	validation_0-auc:0.60977
[74]	validation_0-auc:0.609744
[75]	validation_0-auc:0.609677
[76]	validation_0-auc:0.609456
[77]	validation_0-auc:0.608871
[78]	validation_0-auc:0.608213
[79]	validation_0-auc:0.607853
[80]	validation_0-auc:0.607773
[81]	validation_0-auc:0.606814
[82]	validation_0-auc:0.606859
[83]	validation_0-auc:0.606559
[84]	validation_0-auc:0.606669
[85]	validation_0-auc:0.606521
[86]	validation_0-auc:0.606713
[87]	validation_0-auc:0.606394
[88]	validation_0-auc:0.606281
[89]	valida

[84]	validation_0-auc:0.591523
[85]	validation_0-auc:0.590775
[86]	validation_0-auc:0.590572
[87]	validation_0-auc:0.590598
[88]	validation_0-auc:0.589959
[89]	validation_0-auc:0.589626
[90]	validation_0-auc:0.589156
[91]	validation_0-auc:0.589235
[92]	validation_0-auc:0.588719
[93]	validation_0-auc:0.588882
[94]	validation_0-auc:0.588739
[95]	validation_0-auc:0.58897
[96]	validation_0-auc:0.58879
[97]	validation_0-auc:0.588427
[98]	validation_0-auc:0.588201
[99]	validation_0-auc:0.587902
[100]	validation_0-auc:0.587577
[101]	validation_0-auc:0.586924
[102]	validation_0-auc:0.587198
[103]	validation_0-auc:0.587208
[104]	validation_0-auc:0.587232
[105]	validation_0-auc:0.587239
[106]	validation_0-auc:0.58618
[107]	validation_0-auc:0.58658
[108]	validation_0-auc:0.586591
[109]	validation_0-auc:0.586678
[110]	validation_0-auc:0.586038
[111]	validation_0-auc:0.585287
[112]	validation_0-auc:0.584392
[113]	validation_0-auc:0.583916
[114]	validation_0-auc:0.58333
[115]	validation_0-auc:0.5835

[43]	validation_0-auc:0.627387
[44]	validation_0-auc:0.627358
[45]	validation_0-auc:0.627497
[46]	validation_0-auc:0.62777
[47]	validation_0-auc:0.628231
[48]	validation_0-auc:0.628172
[49]	validation_0-auc:0.628361
[50]	validation_0-auc:0.628199
[51]	validation_0-auc:0.628535
[52]	validation_0-auc:0.628691
[53]	validation_0-auc:0.628874
[54]	validation_0-auc:0.628731
[55]	validation_0-auc:0.628721
[56]	validation_0-auc:0.628825
[57]	validation_0-auc:0.62904
[58]	validation_0-auc:0.629556
[59]	validation_0-auc:0.629867
[60]	validation_0-auc:0.629741
[61]	validation_0-auc:0.629739
[62]	validation_0-auc:0.629702
[63]	validation_0-auc:0.629789
[64]	validation_0-auc:0.62969
[65]	validation_0-auc:0.629882
[66]	validation_0-auc:0.6298
[67]	validation_0-auc:0.629801
[68]	validation_0-auc:0.629568
[69]	validation_0-auc:0.629265
[70]	validation_0-auc:0.629303
[71]	validation_0-auc:0.629098
[72]	validation_0-auc:0.629131
[73]	validation_0-auc:0.629241
[74]	validation_0-auc:0.629426
[75]	validati

[302]	validation_0-auc:0.635878
[303]	validation_0-auc:0.635798
[304]	validation_0-auc:0.635807
[305]	validation_0-auc:0.635841
[306]	validation_0-auc:0.63587
[307]	validation_0-auc:0.6359
[308]	validation_0-auc:0.635874
[309]	validation_0-auc:0.63594
[310]	validation_0-auc:0.635972
[311]	validation_0-auc:0.635911
[312]	validation_0-auc:0.635895
[313]	validation_0-auc:0.635874
[314]	validation_0-auc:0.635874
[315]	validation_0-auc:0.635875
[316]	validation_0-auc:0.635867
[317]	validation_0-auc:0.63584
[318]	validation_0-auc:0.635818
[319]	validation_0-auc:0.63573
[320]	validation_0-auc:0.635787
[321]	validation_0-auc:0.635764
[322]	validation_0-auc:0.635825
[323]	validation_0-auc:0.635836
[324]	validation_0-auc:0.635807
[325]	validation_0-auc:0.635792
[326]	validation_0-auc:0.635798
[327]	validation_0-auc:0.635738
[328]	validation_0-auc:0.635645
[329]	validation_0-auc:0.635702
[330]	validation_0-auc:0.63566
[331]	validation_0-auc:0.635671
[332]	validation_0-auc:0.635687
[333]	validatio

[187]	validation_0-auc:0.6425
[188]	validation_0-auc:0.64242
[189]	validation_0-auc:0.642362
[190]	validation_0-auc:0.642366
[191]	validation_0-auc:0.642463
[192]	validation_0-auc:0.64257
[193]	validation_0-auc:0.642595
[194]	validation_0-auc:0.642607
[195]	validation_0-auc:0.642471
[196]	validation_0-auc:0.64245
[197]	validation_0-auc:0.642373
[198]	validation_0-auc:0.642296
[199]	validation_0-auc:0.642292
[200]	validation_0-auc:0.642385
[201]	validation_0-auc:0.642321
[202]	validation_0-auc:0.642422
[203]	validation_0-auc:0.642364
[204]	validation_0-auc:0.642357
[205]	validation_0-auc:0.642382
[206]	validation_0-auc:0.642368
[207]	validation_0-auc:0.642316
[208]	validation_0-auc:0.642283
[209]	validation_0-auc:0.642254
[210]	validation_0-auc:0.642192
[211]	validation_0-auc:0.642136
[212]	validation_0-auc:0.642153
[213]	validation_0-auc:0.642177
[214]	validation_0-auc:0.642035
[215]	validation_0-auc:0.642041
[216]	validation_0-auc:0.642122
[217]	validation_0-auc:0.642116
[218]	validat

[202]	validation_0-auc:0.642104
[203]	validation_0-auc:0.642101
[204]	validation_0-auc:0.642109
[205]	validation_0-auc:0.642074
[206]	validation_0-auc:0.642022
[207]	validation_0-auc:0.641925
[208]	validation_0-auc:0.641988
[209]	validation_0-auc:0.64195
[210]	validation_0-auc:0.641951
[211]	validation_0-auc:0.641868
[212]	validation_0-auc:0.641905
[213]	validation_0-auc:0.641756
[214]	validation_0-auc:0.641671
[215]	validation_0-auc:0.641729
[216]	validation_0-auc:0.641766
[217]	validation_0-auc:0.64178
[218]	validation_0-auc:0.64181
[219]	validation_0-auc:0.641772
[220]	validation_0-auc:0.641792
[221]	validation_0-auc:0.641711
[222]	validation_0-auc:0.641681
[223]	validation_0-auc:0.641694
[224]	validation_0-auc:0.641675
[225]	validation_0-auc:0.641668
[226]	validation_0-auc:0.641678
[227]	validation_0-auc:0.641701
[228]	validation_0-auc:0.641697
[229]	validation_0-auc:0.641703
[230]	validation_0-auc:0.641538
[231]	validation_0-auc:0.641749
[232]	validation_0-auc:0.641697
[233]	valid

[50]	validation_0-auc:0.637034
[51]	validation_0-auc:0.637087
[52]	validation_0-auc:0.637019
[53]	validation_0-auc:0.63702
[54]	validation_0-auc:0.637323
[55]	validation_0-auc:0.637618
[56]	validation_0-auc:0.637823
[57]	validation_0-auc:0.637888
[58]	validation_0-auc:0.638099
[59]	validation_0-auc:0.637946
[60]	validation_0-auc:0.638093
[61]	validation_0-auc:0.638147
[62]	validation_0-auc:0.638169
[63]	validation_0-auc:0.638084
[64]	validation_0-auc:0.638134
[65]	validation_0-auc:0.638501
[66]	validation_0-auc:0.638601
[67]	validation_0-auc:0.638835
[68]	validation_0-auc:0.639007
[69]	validation_0-auc:0.63901
[70]	validation_0-auc:0.638899
[71]	validation_0-auc:0.638928
[72]	validation_0-auc:0.639069
[73]	validation_0-auc:0.639188
[74]	validation_0-auc:0.6393
[75]	validation_0-auc:0.639458
[76]	validation_0-auc:0.639375
[77]	validation_0-auc:0.639553
[78]	validation_0-auc:0.639548
[79]	validation_0-auc:0.639721
[80]	validation_0-auc:0.639536
[81]	validation_0-auc:0.639416
[82]	validat

[67]	validation_0-auc:0.622487
[68]	validation_0-auc:0.622715
[69]	validation_0-auc:0.622738
[70]	validation_0-auc:0.623038
[71]	validation_0-auc:0.622956
[72]	validation_0-auc:0.623221
[73]	validation_0-auc:0.623282
[74]	validation_0-auc:0.62344
[75]	validation_0-auc:0.623602
[76]	validation_0-auc:0.623533
[77]	validation_0-auc:0.623632
[78]	validation_0-auc:0.623628
[79]	validation_0-auc:0.623649
[80]	validation_0-auc:0.623511
[81]	validation_0-auc:0.623272
[82]	validation_0-auc:0.623359
[83]	validation_0-auc:0.623332
[84]	validation_0-auc:0.623358
[85]	validation_0-auc:0.623459
[86]	validation_0-auc:0.623784
[87]	validation_0-auc:0.623772
[88]	validation_0-auc:0.623813
[89]	validation_0-auc:0.623918
[90]	validation_0-auc:0.623943
[91]	validation_0-auc:0.623927
[92]	validation_0-auc:0.623865
[93]	validation_0-auc:0.62385
[94]	validation_0-auc:0.623872
[95]	validation_0-auc:0.624057
[96]	validation_0-auc:0.624295
[97]	validation_0-auc:0.624282
[98]	validation_0-auc:0.624332
[99]	valid

[325]	validation_0-auc:0.640905
[326]	validation_0-auc:0.640922
[327]	validation_0-auc:0.64093
[328]	validation_0-auc:0.640944
[329]	validation_0-auc:0.640975
[330]	validation_0-auc:0.64097
[331]	validation_0-auc:0.640991
[332]	validation_0-auc:0.640978
[333]	validation_0-auc:0.640971
[334]	validation_0-auc:0.640961
[335]	validation_0-auc:0.641008
[336]	validation_0-auc:0.640995
[337]	validation_0-auc:0.641043
[338]	validation_0-auc:0.641013
[339]	validation_0-auc:0.641026
[340]	validation_0-auc:0.641066
[341]	validation_0-auc:0.641078
[342]	validation_0-auc:0.641087
[343]	validation_0-auc:0.641117
[344]	validation_0-auc:0.641127
[345]	validation_0-auc:0.641146
[346]	validation_0-auc:0.641122
[347]	validation_0-auc:0.641126
[348]	validation_0-auc:0.641115
[349]	validation_0-auc:0.64111
[350]	validation_0-auc:0.641153
[351]	validation_0-auc:0.641169
[352]	validation_0-auc:0.641157
[353]	validation_0-auc:0.641149
[354]	validation_0-auc:0.641157
[355]	validation_0-auc:0.641186
[356]	valid

[582]	validation_0-auc:0.642146
[583]	validation_0-auc:0.642136
[584]	validation_0-auc:0.642139
[585]	validation_0-auc:0.642161
[586]	validation_0-auc:0.642148
[587]	validation_0-auc:0.642188
[588]	validation_0-auc:0.642165
[589]	validation_0-auc:0.642156
[590]	validation_0-auc:0.642132
[591]	validation_0-auc:0.642135
[592]	validation_0-auc:0.642146
[593]	validation_0-auc:0.642148
[594]	validation_0-auc:0.642179
[595]	validation_0-auc:0.64219
[596]	validation_0-auc:0.642181
[597]	validation_0-auc:0.642157
[598]	validation_0-auc:0.642175
[599]	validation_0-auc:0.642171
[600]	validation_0-auc:0.642253
[601]	validation_0-auc:0.642235
[602]	validation_0-auc:0.642255
[603]	validation_0-auc:0.642283
[604]	validation_0-auc:0.642269
[605]	validation_0-auc:0.642289
[606]	validation_0-auc:0.642289
[607]	validation_0-auc:0.642305
[608]	validation_0-auc:0.642301
[609]	validation_0-auc:0.642305
[610]	validation_0-auc:0.642332
[611]	validation_0-auc:0.642327
[612]	validation_0-auc:0.642305
[613]	val

[128]	validation_0-auc:0.64016
[129]	validation_0-auc:0.640217
[130]	validation_0-auc:0.640182
[131]	validation_0-auc:0.640135
[132]	validation_0-auc:0.640241
[133]	validation_0-auc:0.64029
[134]	validation_0-auc:0.640304
[135]	validation_0-auc:0.640324
[136]	validation_0-auc:0.640345
[137]	validation_0-auc:0.640467
[138]	validation_0-auc:0.640496
[139]	validation_0-auc:0.640564
[140]	validation_0-auc:0.640502
[141]	validation_0-auc:0.640557
[142]	validation_0-auc:0.640512
[143]	validation_0-auc:0.640509
[144]	validation_0-auc:0.640465
[145]	validation_0-auc:0.640457
[146]	validation_0-auc:0.640408
[147]	validation_0-auc:0.640411
[148]	validation_0-auc:0.640365
[149]	validation_0-auc:0.640432
[150]	validation_0-auc:0.640404
[151]	validation_0-auc:0.640414
[152]	validation_0-auc:0.640593
[153]	validation_0-auc:0.640619
[154]	validation_0-auc:0.640562
[155]	validation_0-auc:0.640553
[156]	validation_0-auc:0.640515
[157]	validation_0-auc:0.640558
[158]	validation_0-auc:0.640627
[159]	vali

[100]	validation_0-auc:0.62555
[101]	validation_0-auc:0.625571
[102]	validation_0-auc:0.62553
[103]	validation_0-auc:0.625618
[104]	validation_0-auc:0.625788
[105]	validation_0-auc:0.625873
[106]	validation_0-auc:0.625866
[107]	validation_0-auc:0.625936
[108]	validation_0-auc:0.626279
[109]	validation_0-auc:0.626463
[110]	validation_0-auc:0.626396
[111]	validation_0-auc:0.626499
[112]	validation_0-auc:0.626614
[113]	validation_0-auc:0.626962
[114]	validation_0-auc:0.627113
[115]	validation_0-auc:0.627158
[116]	validation_0-auc:0.627295
[117]	validation_0-auc:0.627338
[118]	validation_0-auc:0.627482
[119]	validation_0-auc:0.62758
[120]	validation_0-auc:0.627719
[121]	validation_0-auc:0.627769
[122]	validation_0-auc:0.62797
[123]	validation_0-auc:0.628199
[124]	validation_0-auc:0.628315
[125]	validation_0-auc:0.62862
[126]	validation_0-auc:0.628529
[127]	validation_0-auc:0.628583
[128]	validation_0-auc:0.62894
[129]	validation_0-auc:0.628903
[130]	validation_0-auc:0.629193
[131]	validati

[358]	validation_0-auc:0.642628
[359]	validation_0-auc:0.642621
[360]	validation_0-auc:0.642621
[361]	validation_0-auc:0.642694
[362]	validation_0-auc:0.64265
[363]	validation_0-auc:0.64271
[364]	validation_0-auc:0.642753
[365]	validation_0-auc:0.642804
[366]	validation_0-auc:0.642809
[367]	validation_0-auc:0.642767
[368]	validation_0-auc:0.642783
[369]	validation_0-auc:0.642782
[370]	validation_0-auc:0.642859
[371]	validation_0-auc:0.642846
[372]	validation_0-auc:0.642876
[373]	validation_0-auc:0.642896
[374]	validation_0-auc:0.642913
[375]	validation_0-auc:0.642915
[376]	validation_0-auc:0.642945
[377]	validation_0-auc:0.642916
[378]	validation_0-auc:0.642967
[379]	validation_0-auc:0.642951
[380]	validation_0-auc:0.642988
[381]	validation_0-auc:0.642969
[382]	validation_0-auc:0.642981
[383]	validation_0-auc:0.642954
[384]	validation_0-auc:0.642943
[385]	validation_0-auc:0.642943
[386]	validation_0-auc:0.64293
[387]	validation_0-auc:0.642946
[388]	validation_0-auc:0.642961
[389]	valid

[7]	validation_0-auc:0.624324
[8]	validation_0-auc:0.625035
[9]	validation_0-auc:0.626758
[10]	validation_0-auc:0.625578
[11]	validation_0-auc:0.62742
[12]	validation_0-auc:0.627036
[13]	validation_0-auc:0.626863
[14]	validation_0-auc:0.626969
[15]	validation_0-auc:0.628255
[16]	validation_0-auc:0.628086
[17]	validation_0-auc:0.62763
[18]	validation_0-auc:0.627787
[19]	validation_0-auc:0.628561
[20]	validation_0-auc:0.628164
[21]	validation_0-auc:0.627461
[22]	validation_0-auc:0.628405
[23]	validation_0-auc:0.628524
[24]	validation_0-auc:0.629226
[25]	validation_0-auc:0.629184
[26]	validation_0-auc:0.629101
[27]	validation_0-auc:0.629928
[28]	validation_0-auc:0.630067
[29]	validation_0-auc:0.631937
[30]	validation_0-auc:0.633236
[31]	validation_0-auc:0.63341
[32]	validation_0-auc:0.633735
[33]	validation_0-auc:0.634003
[34]	validation_0-auc:0.634284
[35]	validation_0-auc:0.63456
[36]	validation_0-auc:0.635847
[37]	validation_0-auc:0.635847
[38]	validation_0-auc:0.635833
[39]	validation

[68]	validation_0-auc:0.625138
[69]	validation_0-auc:0.625303
[70]	validation_0-auc:0.62559
[71]	validation_0-auc:0.625572
[72]	validation_0-auc:0.625777
[73]	validation_0-auc:0.625818
[74]	validation_0-auc:0.625776
[75]	validation_0-auc:0.625823
[76]	validation_0-auc:0.625663
[77]	validation_0-auc:0.625699
[78]	validation_0-auc:0.625753
[79]	validation_0-auc:0.625752
[80]	validation_0-auc:0.625608
[81]	validation_0-auc:0.625689
[82]	validation_0-auc:0.625733
[83]	validation_0-auc:0.625772
[84]	validation_0-auc:0.625898
[85]	validation_0-auc:0.625913
[86]	validation_0-auc:0.626114
[87]	validation_0-auc:0.626229
[88]	validation_0-auc:0.62613
[89]	validation_0-auc:0.626174
[90]	validation_0-auc:0.626062
[91]	validation_0-auc:0.626111
[92]	validation_0-auc:0.626256
[93]	validation_0-auc:0.626138
[94]	validation_0-auc:0.62606
[95]	validation_0-auc:0.625943
[96]	validation_0-auc:0.626231
[97]	validation_0-auc:0.626356
[98]	validation_0-auc:0.626561
[99]	validation_0-auc:0.626654
[100]	valid

[327]	validation_0-auc:0.641791
[328]	validation_0-auc:0.641809
[329]	validation_0-auc:0.641785
[330]	validation_0-auc:0.641787
[331]	validation_0-auc:0.641793
[332]	validation_0-auc:0.641776
[333]	validation_0-auc:0.641773
[334]	validation_0-auc:0.641752
[335]	validation_0-auc:0.641743
[336]	validation_0-auc:0.641744
[337]	validation_0-auc:0.641714
[338]	validation_0-auc:0.641726
[339]	validation_0-auc:0.64178
[340]	validation_0-auc:0.641791
[341]	validation_0-auc:0.641835
[342]	validation_0-auc:0.641804
[343]	validation_0-auc:0.64182
[344]	validation_0-auc:0.641799
[345]	validation_0-auc:0.641822
[346]	validation_0-auc:0.641843
[347]	validation_0-auc:0.641875
[348]	validation_0-auc:0.641922
[349]	validation_0-auc:0.64192
[350]	validation_0-auc:0.642004
[351]	validation_0-auc:0.642032
[352]	validation_0-auc:0.642043
[353]	validation_0-auc:0.642042
[354]	validation_0-auc:0.642054
[355]	validation_0-auc:0.642086
[356]	validation_0-auc:0.642075
[357]	validation_0-auc:0.642085
[358]	valid

[585]	validation_0-auc:0.643671
[586]	validation_0-auc:0.64368
[587]	validation_0-auc:0.643687
[588]	validation_0-auc:0.643654
[589]	validation_0-auc:0.643667
[590]	validation_0-auc:0.643699
[591]	validation_0-auc:0.643736
[592]	validation_0-auc:0.643732
[593]	validation_0-auc:0.643702
[594]	validation_0-auc:0.643728
[595]	validation_0-auc:0.643733
[596]	validation_0-auc:0.643746
[597]	validation_0-auc:0.643728
[598]	validation_0-auc:0.643721
[599]	validation_0-auc:0.643707
[600]	validation_0-auc:0.643717
[601]	validation_0-auc:0.64372
[602]	validation_0-auc:0.643677
[603]	validation_0-auc:0.643666
[604]	validation_0-auc:0.64366
[605]	validation_0-auc:0.643685
[606]	validation_0-auc:0.643715
[607]	validation_0-auc:0.643711
[608]	validation_0-auc:0.643725
[609]	validation_0-auc:0.643696
[610]	validation_0-auc:0.64368
[611]	validation_0-auc:0.643712
[612]	validation_0-auc:0.643719
[613]	validation_0-auc:0.643712
[614]	validation_0-auc:0.643727
[615]	validation_0-auc:0.643732
[616]	valida

[116]	validation_0-auc:0.61478
[117]	validation_0-auc:0.614277
[118]	validation_0-auc:0.614329
[119]	validation_0-auc:0.614253
[120]	validation_0-auc:0.614172
[121]	validation_0-auc:0.61419
[122]	validation_0-auc:0.614071
[123]	validation_0-auc:0.614269
[124]	validation_0-auc:0.614157
[125]	validation_0-auc:0.614086
[126]	validation_0-auc:0.614061
Stopping. Best iteration:
[26]	validation_0-auc:0.632645

{'colsample_bytree': 0.8, 'learning_rate': 0.025, 'max_delta_step': 1.9000000000000001, 'subsample': 0.7000000000000001, 'min_child': 18.0, 'max_depth': 0, 'gamma': 0.75}
Time: 626.000983334 mins


In [None]:
print 'Done.'

In [1]:
from __future__ import division
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from time import time
import gc
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
from hyperopt import hp, tpe, STATUS_OK, Trials, fmin
from sklearn.model_selection import cross_val_score
%matplotlib inline

In [3]:
train = pd.read_csv('D:/Driver/ohe_train_v2.csv',na_values=-1)
# test = pd.read_csv('D:/Driver/ohe_test_v2.csv',na_values=-1)

In [4]:
features = list(train.columns)

In [9]:
features.remove('id')
features.remove('target')

In [4]:
X = train.drop(['id','target'], axis=1)
features = X.columns
X = X.values
y = train['target'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)

In [None]:
def objective(space):
    clf = xgb.XGBClassifier(n_estimators = space['n_estimators'],
                           max_depth = space['max_depth'],
                           min_child_weight = space['min_child_weight'],
                           subsample = space['subsample'],
                           learning_rate = space['learning_rate'],
                           gamma = space['gamma'],
                           colsample_bytree = space['colsample_bytree'],
                           objective='binary:logistic'
                           )

    skf = StratifiedKFold(n_splits=5, random_state=2017)

    scores = cross_val_score(clf, train[features], train['target'], 
                             cv=skf, scoring='roc_auc')

    return 1 - scores.mean()


space ={
        'max_depth': hp.choice('max_depth', np.arange(7, 15, dtype=int)),
        'min_child_weight': hp.quniform ('min_child', 1, 20, 1),
        'subsample': hp.uniform ('subsample', 0.8, 1),
        'n_estimators' : hp.choice('n_estimators', np.arange(1000, 2000, 100, dtype=int)),
        'learning_rate' : hp.quniform('learning_rate', 0.025, 0.5, 0.025),
        'gamma' : hp.quniform('gamma', 0.5, 1, 0.05),
        'colsample_bytree' : hp.quniform('colsample_bytree', 0.4, 1, 0.05)
    }


trials = Trials()
best = fmin(objective,
            space,
            algo=tpe.suggest,
            max_evals=3, # change
            trials=trials)

print(best)


In [4]:
gc.collect()

0

In [24]:
np.arange(0.1, 0.3, 0.1)

array([ 0.1,  0.2])

In [5]:
def gini(y, pred):
    g = np.asarray(np.c_[y, pred, np.arange(len(y)) ], dtype=np.float)
    g = g[np.lexsort((g[:,2], -1*g[:,1]))]
    gs = g[:,0].cumsum().sum() / g[:,0].sum()
    gs -= (len(y) + 1) / 2.
    return gs / len(y)

def gini_xgb(pred, y):
    y = y.get_label()
    return 'gini', gini(y, pred) / gini(y, y)

def gini_lgb(preds, dtrain):
    y = list(dtrain.get_label())
    score = gini(y, preds) / gini(y, y)
    return 'gini', score, True

In [36]:
np.arange(50, 101, 5) / 100

array([ 0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,  0.85,  0.9 ,
        0.95,  1.  ])

In [37]:
start = time()
param_dist = {
    'min_child_weight': [6, 8, 10],
    'max_depth': [7,8,10],
    'max_delta_step': np.arange(1, 2.1, 0.2) ,
    'colsample_bytree': np.arange(0.4, 1.05, 0.05),
    'subsample': np.arange(0.7, 1, 0.05),
    'learning_rate': [0.01, 0.025, 0.05, 0.1],
    'gamma': np.arange(50, 101, 5) / 100,
    'n_estimators' : [1500]
    }

fit_dict = {"eval_set":[(X_test, y_test)],
            "early_stopping_rounds": 100,
            'eval_metric' : 'auc'}

xgb_tune = xgb.XGBClassifier(random_state=42, objective='binary:logistic')
n_iter_search = 1
# gini_scorer = make_scorer(gini_normalized, greater_is_better=True)
random_search = RandomizedSearchCV(estimator = xgb_tune,
                                   param_distributions = param_dist,
                                   scoring = 'roc_auc',
                                   n_iter=n_iter_search,
                                   random_state=42,
                                   cv=5,
                                   verbose=5,
                                   n_jobs=8,
                                   fit_params=fit_dict)

start = time()
random_search.fit(X_train, y_train)
print 'Training time: {} mins.'.format((time() - start) / 60)



Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=8)]: Done   2 out of   5 | elapsed: 20.0min remaining: 30.0min


JoblibMemoryError: JoblibMemoryError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\KrystalU\Anaconda2\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    169     pkg_name = mod_name.rpartition('.')[0]
    170     main_globals = sys.modules["__main__"].__dict__
    171     if alter_argv:
    172         sys.argv[0] = fname
    173     return _run_code(code, main_globals, None,
--> 174                      "__main__", fname, loader, pkg_name)
        fname = r'C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel_launcher.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = ''
    175 
    176 def run_module(mod_name, init_globals=None,
    177                run_name=None, alter_sys=False):
    178     """Execute a module's code without importing it

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\runpy.py in _run_code(code=<code object <module> at 000000000365AF30, file ...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Kry...onda2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_fname=r'C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel_launcher.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 000000000365AF30, file ...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel_launcher.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': '', 'app': <module 'ipykernel.kernelapp' from 'C:\Users\Kry...onda2\lib\site-packages\ipykernel\kernelapp.pyc'>, 'sys': <module 'sys' (built-in)>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\zmq\eventloop\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\tornado\ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 21, 1, 15, 23, 822000, tzinfo=tzutc()), u'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', u'msg_type': u'execute_request', u'session': u'8591A082E9B74DC892EC5B566FFFB685', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', 'msg_type': u'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['8591A082E9B74DC892EC5B566FFFB685']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 21, 1, 15, 23, 822000, tzinfo=tzutc()), u'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', u'msg_type': u'execute_request', u'session': u'8591A082E9B74DC892EC5B566FFFB685', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', 'msg_type': u'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['8591A082E9B74DC892EC5B566FFFB685'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {u'date': datetime.datetime(2017, 10, 21, 1, 15, 23, 822000, tzinfo=tzutc()), u'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', u'msg_type': u'execute_request', u'session': u'8591A082E9B74DC892EC5B566FFFB685', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'33A71F0C5EF843F28C1BECF2682DD31C', 'msg_type': u'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)'
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=(u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)',), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = (u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)',)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'start = time()\nparam_dist = {\n    \'min_chil...g time: {} mins.\'.format((time() - start) / 60)', store_history=True, silent=False, shell_futures=True)
   2713                 self.displayhook.exec_result = result
   2714 
   2715                 # Execute the user code
   2716                 interactivity = "none" if silent else self.ast_node_interactivity
   2717                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2718                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2719                 
   2720                 self.last_execution_succeeded = not has_raised
   2721 
   2722                 # Reset this so later displayed values do not modify the

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Print object>], cell_name='<ipython-input-37-0d4797240f73>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<ExecutionResult object at 52013ef0, execution_c..._before_exec=None error_in_exec=None result=None>)
   2817 
   2818         try:
   2819             for i, node in enumerate(to_run_exec):
   2820                 mod = ast.Module([node])
   2821                 code = compiler(mod, cell_name, "exec")
-> 2822                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0000000073060EB0, file "<ipython-input-37-0d4797240f73>", line 30>
        result = <ExecutionResult object at 52013ef0, execution_c..._before_exec=None error_in_exec=None result=None>
   2823                     return True
   2824 
   2825             for i, node in enumerate(to_run_interactive):
   2826                 mod = ast.Interactive([node])

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0000000073060EB0, file "<ipython-input-37-0d4797240f73>", line 30>, result=<ExecutionResult object at 52013ef0, execution_c..._before_exec=None error_in_exec=None result=None>)
   2877         outflag = 1  # happens in more places, so it's easier as default
   2878         try:
   2879             try:
   2880                 self.hooks.pre_run_code_hook()
   2881                 #rprint('Running code', repr(code_obj)) # dbg
-> 2882                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0000000073060EB0, file "<ipython-input-37-0d4797240f73>", line 30>
        self.user_global_ns = {'CatBoostClassifier': <class 'catboost.core.CatBoostClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u"from __future__ import division\nimport pandas...ials \nget_ipython().magic(u'matplotlib inline')", u"train = pd.read_csv('D:/Driver/ohe_train_v2.cs...ad_csv('D:/Driver/ohe_test_v2.csv',na_values=-1)", u"X = train.drop(['id','target'], axis=1)\nfeatu...y, test_size=0.2, random_state=42, stratify = y)", u"get_ipython().magic(u'pinfo hp.uniform')", u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'hp.quniform(5,10)/10', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u"get_ipython().magic(u'pinfo xgb.XGBClassifier')", u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'np.arange(3,7) / 10', u"from __future__ import division\nimport pandas...ndint\nget_ipython().magic(u'matplotlib inline')", u'sp_randint(3,7)', u'print(sp_randint(3,7))', u'print(sp_randint(3,7).next())', ...], 'Out': {15: array([ 0.3,  0.4,  0.5,  0.6]), 17: <scipy.stats._distn_infrastructure.rv_frozen object>, 22: <scipy.stats._distn_infrastructure.rv_frozen object>, 24: array([ 0.1,  0.2]), 25: array([ 1. ,  1.2,  1.4,  1.6,  1.8,  2. ]), 26: array([ 1. ,  1.2,  1.4,  1.6,  1.8,  2. ,  2.2]), 27: array([ 1. ,  1.2,  1.4,  1.6,  1.8]), 28: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,... 0.8 ,
        0.85,  0.9 ,  0.95,  1.  ,  1.05]), 29: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,
        0.85,  0.9 ,  0.95]), 30: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,... 0.75,  0.8 ,
        0.85,  0.9 ,  0.95,  1.  ]), ...}, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'STATUS_OK': 'ok', 'StratifiedKFold': <class 'sklearn.model_selection._split.StratifiedKFold'>, 'Trials': <class 'hyperopt.base.Trials'>, 'X': array([[ 0.60000002,  0.5       ,  0.2       , ....  0.        ,
         0.        ,  3.        ]]), 'X_test': array([[ 0.60000002,  0.30000001,  0.30000001, ....  0.        ,
         0.        ,  0.        ]]), ...}
        self.user_ns = {'CatBoostClassifier': <class 'catboost.core.CatBoostClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', u"from __future__ import division\nimport pandas...ials \nget_ipython().magic(u'matplotlib inline')", u"train = pd.read_csv('D:/Driver/ohe_train_v2.cs...ad_csv('D:/Driver/ohe_test_v2.csv',na_values=-1)", u"X = train.drop(['id','target'], axis=1)\nfeatu...y, test_size=0.2, random_state=42, stratify = y)", u"get_ipython().magic(u'pinfo hp.uniform')", u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'hp.quniform(5,10)/10', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u"get_ipython().magic(u'pinfo xgb.XGBClassifier')", u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'start = time()\ndef objective(space):\n    clf...s=trials)\n\nprint best\nprint (time()-start)/60', u'np.arange(3,7) / 10', u"from __future__ import division\nimport pandas...ndint\nget_ipython().magic(u'matplotlib inline')", u'sp_randint(3,7)', u'print(sp_randint(3,7))', u'print(sp_randint(3,7).next())', ...], 'Out': {15: array([ 0.3,  0.4,  0.5,  0.6]), 17: <scipy.stats._distn_infrastructure.rv_frozen object>, 22: <scipy.stats._distn_infrastructure.rv_frozen object>, 24: array([ 0.1,  0.2]), 25: array([ 1. ,  1.2,  1.4,  1.6,  1.8,  2. ]), 26: array([ 1. ,  1.2,  1.4,  1.6,  1.8,  2. ,  2.2]), 27: array([ 1. ,  1.2,  1.4,  1.6,  1.8]), 28: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,... 0.8 ,
        0.85,  0.9 ,  0.95,  1.  ,  1.05]), 29: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,  0.7 ,  0.75,  0.8 ,
        0.85,  0.9 ,  0.95]), 30: array([ 0.4 ,  0.45,  0.5 ,  0.55,  0.6 ,  0.65,... 0.75,  0.8 ,
        0.85,  0.9 ,  0.95,  1.  ]), ...}, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'STATUS_OK': 'ok', 'StratifiedKFold': <class 'sklearn.model_selection._split.StratifiedKFold'>, 'Trials': <class 'hyperopt.base.Trials'>, 'X': array([[ 0.60000002,  0.5       ,  0.2       , ....  0.        ,
         0.        ,  3.        ]]), 'X_test': array([[ 0.60000002,  0.30000001,  0.30000001, ....  0.        ,
         0.        ,  0.        ]]), ...}
   2883             finally:
   2884                 # Reset our crash handler in place
   2885                 sys.excepthook = old_excepthook
   2886         except SystemExit as e:

...........................................................................
C:\Users\KrystalU\Documents\Data\Driver\<ipython-input-37-0d4797240f73> in <module>()
     25                                    cv=5,
     26                                    verbose=5,
     27                                    n_jobs=8,
     28                                    fit_params=fit_dict)
     29 start = time()
---> 30 random_search.fit(X_train, y_train)
     31 print 'Training time: {} mins.'.format((time() - start) / 60)

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\model_selection\_search.py in fit(self=RandomizedSearchCV(cv=5, error_score='raise',
  ...n_train_score=True, scoring='roc_auc', verbose=5), X=array([[ 0.60000002,  0.1       ,  0.2       , ....  0.        ,
         0.        ,  2.        ]]), y=array([0, 0, 0, ..., 1, 0, 0], dtype=int64), groups=None, **fit_params={'early_stopping_rounds': 100, 'eval_metric': 'auc', 'eval_set': [(array([[ 0.60000002,  0.30000001,  0.30000001, ....  0.        ,
         0.        ,  0.        ]]), array([0, 0, 0, ..., 0, 0, 0], dtype=int64))]})
    633                                   return_train_score=self.return_train_score,
    634                                   return_n_test_samples=True,
    635                                   return_times=True, return_parameters=False,
    636                                   error_score=self.error_score)
    637           for parameters, (train, test) in product(candidate_params,
--> 638                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=5, random_state=None, shuffle=False)>
        X = array([[ 0.60000002,  0.1       ,  0.2       , ....  0.        ,
         0.        ,  2.        ]])
        y = array([0, 0, 0, ..., 1, 0, 0], dtype=int64)
        groups = None
    639 
    640         # if one choose to see train score, "out" will contain train score info
    641         if self.return_train_score:
    642             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=8), iterable=<generator object <genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=8)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError                                        Fri Oct 20 21:15:33 2017
PID: 4316             Python 2.7.13: C:\Users\KrystalU\Anaconda2\python.exe
...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (XGBClassifier(base_score=0.5, booster='gbtree', ...      silent=True, subsample=0.90000000000000013), memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]]), memmap([0, 0, 0, ..., 1, 0, 0], dtype=int64), {'score': make_scorer(roc_auc_score, needs_threshold=True)}, memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64), array([187539, 187599, 187614, ..., 285788, 285789, 285790], dtype=int64), 5, {'colsample_bytree': 0.75, 'gamma': 0.65000000000000002, 'learning_rate': 0.05, 'max_delta_step': 1.5999999999999999, 'max_depth': 10, 'min_child_weight': 8, 'n_estimators': 1500, 'subsample': 0.90000000000000013})
        kwargs = {'error_score': 'raise', 'fit_params': {'early_stopping_rounds': 100, 'eval_metric': 'auc', 'eval_set': [(memmap([[ 0.60000002,  0.30000001,  0.30000001, ... 0.        ,
          0.        ,  0.        ]]), array([0, 0, 0, ..., 0, 0, 0], dtype=int64))]}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}
        self.items = [(<function _fit_and_score>, (XGBClassifier(base_score=0.5, booster='gbtree', ...      silent=True, subsample=0.90000000000000013), memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]]), memmap([0, 0, 0, ..., 1, 0, 0], dtype=int64), {'score': make_scorer(roc_auc_score, needs_threshold=True)}, memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64), array([187539, 187599, 187614, ..., 285788, 285789, 285790], dtype=int64), 5, {'colsample_bytree': 0.75, 'gamma': 0.65000000000000002, 'learning_rate': 0.05, 'max_delta_step': 1.5999999999999999, 'max_depth': 10, 'min_child_weight': 8, 'n_estimators': 1500, 'subsample': 0.90000000000000013}), {'error_score': 'raise', 'fit_params': {'early_stopping_rounds': 100, 'eval_metric': 'auc', 'eval_set': [(memmap([[ 0.60000002,  0.30000001,  0.30000001, ... 0.        ,
          0.        ,  0.        ]]), array([0, 0, 0, ..., 0, 0, 0], dtype=int64))]}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=XGBClassifier(base_score=0.5, booster='gbtree', ...      silent=True, subsample=0.90000000000000013), X=memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]]), y=memmap([0, 0, 0, ..., 1, 0, 0], dtype=int64), scorer={'score': make_scorer(roc_auc_score, needs_threshold=True)}, train=memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64), test=array([187539, 187599, 187614, ..., 285788, 285789, 285790], dtype=int64), verbose=5, parameters={'colsample_bytree': 0.75, 'gamma': 0.65000000000000002, 'learning_rate': 0.05, 'max_delta_step': 1.5999999999999999, 'max_depth': 10, 'min_child_weight': 8, 'n_estimators': 1500, 'subsample': 0.90000000000000013}, fit_params={'early_stopping_rounds': 100, 'eval_metric': 'auc', 'eval_set': [(memmap([[ 0.60000002,  0.30000001,  0.30000001, ... 0.        ,
          0.        ,  0.        ]]), array([0, 0, 0, ..., 0, 0, 0], dtype=int64))]}, return_train_score=True, return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    422     if parameters is not None:
    423         estimator.set_params(**parameters)
    424 
    425     start_time = time.time()
    426 
--> 427     X_train, y_train = _safe_split(estimator, X, y, train)
        X_train = undefined
        y_train = undefined
        estimator = XGBClassifier(base_score=0.5, booster='gbtree', ...      silent=True, subsample=0.90000000000000013)
        X = memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]])
        y = memmap([0, 0, 0, ..., 1, 0, 0], dtype=int64)
        train = memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64)
    428     X_test, y_test = _safe_split(estimator, X, y, test, train)
    429 
    430     is_multimetric = not callable(scorer)
    431     n_scorers = len(scorer.keys()) if is_multimetric else 1

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\utils\metaestimators.py in _safe_split(estimator=XGBClassifier(base_score=0.5, booster='gbtree', ...      silent=True, subsample=0.90000000000000013), X=memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]]), y=memmap([0, 0, 0, ..., 1, 0, 0], dtype=int64), indices=memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64), train_indices=None)
    195         if train_indices is None:
    196             X_subset = X[np.ix_(indices, indices)]
    197         else:
    198             X_subset = X[np.ix_(indices, train_indices)]
    199     else:
--> 200         X_subset = safe_indexing(X, indices)
        X_subset = undefined
        X = memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]])
        indices = memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64)
    201 
    202     if y is not None:
    203         y_subset = safe_indexing(y, indices)
    204     else:

...........................................................................
C:\Users\KrystalU\Anaconda2\lib\site-packages\sklearn\utils\__init__.py in safe_indexing(X=memmap([[ 0.60000002,  0.1       ,  0.2       , ... 0.        ,
          0.        ,  2.        ]]), indices=memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64))
    155             return X.copy().iloc[indices]
    156     elif hasattr(X, "shape"):
    157         if hasattr(X, 'take') and (hasattr(indices, 'dtype') and
    158                                    indices.dtype.kind == 'i'):
    159             # This is often substantially faster than X[indices]
--> 160             return X.take(indices, axis=0)
        X.take = <built-in method take of memmap object>
        indices = memmap([     0,      1,      2, ..., 476166, 476167, 476168], dtype=int64)
    161         else:
    162             return X[indices]
    163     else:
    164         return [X[idx] for idx in indices]

MemoryError: 
___________________________________________________________________________

In [None]:
nrounds = 2000  # need to change to 2000
kfold = 5  # need to change to 5

# xgb
print('xgb start...')
params = {'eta': 0.025, 'max_depth': 7, 'subsample': 0.8, 'colsample_bytree': 0.4,
          'objective': 'binary:logistic', 'eval_metric': 'auc', 'silent': True, 'max_delta_step':1.8,
          'min_child_weight':8, 'gamma':0.65}

skf = StratifiedKFold(n_splits=kfold, random_state=2016)
for i, (train_index, test_index) in enumerate(skf.split(X, y)):
    print('xgb kfold: {}  of  {} : '.format(i + 1, kfold))
    X_train, X_valid = X[train_index], X[test_index]
    y_train, y_valid = y[train_index], y[test_index]
    d_train = xgb.DMatrix(X_train, y_train)
    d_valid = xgb.DMatrix(X_valid, y_valid)
    watchlist = [(d_train, 'train'), (d_valid, 'valid')]
    xgb_model = xgb.train(params, d_train, nrounds, watchlist, early_stopping_rounds=200,
                          feval=gini_xgb, maximize=True, verbose_eval=100)
    

xgb start...
xgb kfold: 1  of  5 : 
[0]	train-auc:0.517322	valid-auc:0.515124	train-gini:0.034869	valid-gini:0.038543
Multiple eval metrics have been passed: 'valid-gini' will be used for early stopping.

Will train until valid-gini hasn't improved in 200 rounds.
[100]	train-auc:0.663629	valid-auc:0.632771	train-gini:0.327258	valid-gini:0.265542
[200]	train-auc:0.686143	valid-auc:0.637335	train-gini:0.372285	valid-gini:0.274669
[300]	train-auc:0.706637	valid-auc:0.639461	train-gini:0.413274	valid-gini:0.278922
[400]	train-auc:0.720794	valid-auc:0.640172	train-gini:0.441589	valid-gini:0.280343


In [10]:
hp.quniform(5,10)/10

TypeError: require string label

TypeError: ap_quniform_sampler() takes at least 5 arguments (6 given)

In [None]:
param_test1 = {'max_depth': [6, 7, 8],
               'min_child_weight': [8, 9, 10]}

fit_dict = {"eval_set": [(X_test, y_test)],
            "early_stopping_rounds": 100,
            'eval_metric': 'auc'}

xgb_test1 = xgb.XGBClassifier(random_state=42, 
                              objective='binary:logistic', 
                              n_estimators=1000, 
                              learning_rate=0.05,
                              max_delta_step = 1.8,
                              colsample_bytree = 0.4,
                              subsample = 0.8,
                              gamma = 0.65)

gsearch1 = GridSearchCV(estimator=xgb_test1,
                         param_grid=param_test1,
                         scoring='roc_auc',
                         cv=5,
                         verbose=5,
                         n_jobs=8,
                         fit_params=fit_dict)
start = time()
gsearch1.fit(X_train, y_train)
print 'Training time: {} mins.'.format((time() - start) / 60)
print gsearch1.best_params_, gsearch1.best_score_



Fitting 5 folds for each of 9 candidates, totalling 45 fits


In [4]:
param_test1 = {'max_depth': [6, 7, 8],
               'min_child_weight': [7, 8, 9]}

fit_dict = {"eval_set": [(X_test, y_test)],
            "early_stopping_rounds": 100,
            'eval_metric': 'auc'}

xgb_test1 = xgb.XGBClassifier(random_state=42, objective='binary:logistic', n_estimators=1000, learning_rate=0.05)
gsearch1 = GridSearchCV(estimator=xgb_test1,
                         param_grid=param_test1,
                         scoring='roc_auc',
                         cv=5,
                         verbose=5,
                         n_jobs=4,
                         fit_params=fit_dict)
start = time()
gsearch1.fit(X_train, y_train)
print 'Training time: {} mins.'.format((time() - start) / 60)
print gsearch1.best_params_, gsearch1.best_score_



Fitting 5 folds for each of 9 candidates, totalling 45 fits


[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed: 29.3min
[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed: 129.8min finished


[0]	validation_0-auc:0.61043
Will train until validation_0-auc hasn't improved in 100 rounds.
[1]	validation_0-auc:0.615832
[2]	validation_0-auc:0.619473
[3]	validation_0-auc:0.620328
[4]	validation_0-auc:0.621087
[5]	validation_0-auc:0.620728
[6]	validation_0-auc:0.621331
[7]	validation_0-auc:0.622922
[8]	validation_0-auc:0.622666
[9]	validation_0-auc:0.622906
[10]	validation_0-auc:0.623276
[11]	validation_0-auc:0.623459
[12]	validation_0-auc:0.622909
[13]	validation_0-auc:0.623347
[14]	validation_0-auc:0.623822
[15]	validation_0-auc:0.623287
[16]	validation_0-auc:0.623799
[17]	validation_0-auc:0.624209
[18]	validation_0-auc:0.624459
[19]	validation_0-auc:0.624536
[20]	validation_0-auc:0.624798
[21]	validation_0-auc:0.62503
[22]	validation_0-auc:0.62487
[23]	validation_0-auc:0.624982
[24]	validation_0-auc:0.625003
[25]	validation_0-auc:0.625165
[26]	validation_0-auc:0.624933
[27]	validation_0-auc:0.6253
[28]	validation_0-auc:0.625482
[29]	validation_0-auc:0.62537
[30]	validation_0-auc

[259]	validation_0-auc:0.64625
[260]	validation_0-auc:0.646275
[261]	validation_0-auc:0.646297
[262]	validation_0-auc:0.646186
[263]	validation_0-auc:0.646228
[264]	validation_0-auc:0.646203
[265]	validation_0-auc:0.646188
[266]	validation_0-auc:0.646225
[267]	validation_0-auc:0.646182
[268]	validation_0-auc:0.646186
[269]	validation_0-auc:0.646225
[270]	validation_0-auc:0.646129
[271]	validation_0-auc:0.646226
[272]	validation_0-auc:0.646312
[273]	validation_0-auc:0.64631
[274]	validation_0-auc:0.646408
[275]	validation_0-auc:0.646387
[276]	validation_0-auc:0.646411
[277]	validation_0-auc:0.646398
[278]	validation_0-auc:0.646397
[279]	validation_0-auc:0.646324
[280]	validation_0-auc:0.646279
[281]	validation_0-auc:0.646261
[282]	validation_0-auc:0.646253
[283]	validation_0-auc:0.646253
[284]	validation_0-auc:0.646209
[285]	validation_0-auc:0.646241
[286]	validation_0-auc:0.646262
[287]	validation_0-auc:0.646251
[288]	validation_0-auc:0.646233
[289]	validation_0-auc:0.646278
[290]	vali

In [5]:
spw = len(y[y==0]) / len(y[y==1])
param_test3 = {'gamma':[i/10.0 for i in range(5,9)],
               'scale_pos_weight': [spw-2,spw,spw+2]}

xgb_test3 = xgb.XGBClassifier(learning_rate =0.05, n_estimators=1000, max_depth=6,
                             min_child_weight=8,objective= 'binary:logistic',random_state=42)
                         
fit_dict = {"eval_set": [(X_test, y_test)],
            "early_stopping_rounds": 100,
            'eval_metric': 'auc'}

gsearch3 = GridSearchCV(xgb_test3,
                        param_grid = param_test3, 
                        scoring='roc_auc',
                        n_jobs=8, 
                        cv=5,
                        verbose = 5,
                        fit_params=fit_dict)

start = time()
gsearch3.fit(X_train, y_train)
print 'Training time: {} mins.'.format((time() - start) / 60)
print gsearch3.best_params_, gsearch3.best_score_



Fitting 5 folds for each of 12 candidates, totalling 60 fits


[Parallel(n_jobs=8)]: Done   2 tasks      | elapsed: 11.9min
[Parallel(n_jobs=8)]: Done  58 out of  60 | elapsed: 89.5min remaining:  3.1min
[Parallel(n_jobs=8)]: Done  60 out of  60 | elapsed: 90.4min finished


[0]	validation_0-auc:0.616494
Will train until validation_0-auc hasn't improved in 100 rounds.
[1]	validation_0-auc:0.619581
[2]	validation_0-auc:0.619352
[3]	validation_0-auc:0.621276
[4]	validation_0-auc:0.62215
[5]	validation_0-auc:0.623399
[6]	validation_0-auc:0.624644
[7]	validation_0-auc:0.625205
[8]	validation_0-auc:0.625797
[9]	validation_0-auc:0.626463
[10]	validation_0-auc:0.626602
[11]	validation_0-auc:0.627424
[12]	validation_0-auc:0.627948
[13]	validation_0-auc:0.627913
[14]	validation_0-auc:0.628819
[15]	validation_0-auc:0.629453
[16]	validation_0-auc:0.629686
[17]	validation_0-auc:0.630794
[18]	validation_0-auc:0.631529
[19]	validation_0-auc:0.63261
[20]	validation_0-auc:0.632726
[21]	validation_0-auc:0.633321
[22]	validation_0-auc:0.633937
[23]	validation_0-auc:0.634454
[24]	validation_0-auc:0.634806
[25]	validation_0-auc:0.634866
[26]	validation_0-auc:0.635375
[27]	validation_0-auc:0.635699
[28]	validation_0-auc:0.635709
[29]	validation_0-auc:0.635993
[30]	validation_0

In [8]:
len(y_train[y_train==0]) / len(y_train[y_train==1])

26.436107554417415