In [12]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn import metrics
from sklearn.metrics import make_scorer
from xgboost.sklearn import XGBClassifier
import lightgbm as lgb

In [2]:
train_features = pd.read_csv('../final_features/final_train_features.csv')
test_features = pd.read_csv('../final_features/final_test_features.csv')

In [3]:
train_y = train_features['label']
train_X = train_features.drop(['uid','label'], axis=1)

test_uids = test_features['uid']
test_X = test_features.drop(['uid', 'label'], axis=1)

In [4]:
train_X.shape
test_X.shape

(4999, 637)

(3000, 637)

# xgb

In [5]:
# load data
dtrain_xgb = xgb.DMatrix(train_X, label = train_y)
dtest_xgb = xgb.DMatrix(test_X)

In [8]:
xgb_paras = {
    'eta': 0.05,
    'max_depth': 5,
    'min_child_weight': 5,
    'gamma': 0,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'binary:logistic',
    'nthread': 4,
    'scale_pos_weight': 1,
    'seed': 27
} 

In [10]:
def metric_xgb(preds, dtrain):
    
    label = dtrain.get_label()
    
    pre = pd.DataFrame({'preds':preds, 'label':label})
    pre = pre.sort_values(by='preds', ascending=False)
    
    auc = metrics.roc_auc_score(pre.label, pre.preds)

    pre.preds = pre.preds.map(lambda x: 1 if x>=0.5 else 0)

    f1 = metrics.f1_score(pre.label, pre.preds) 
    
    res = 0.6*auc +0.4*f1
    
    return 'metric_xgb', res

In [11]:
xgb.cv(xgb_paras, dtrain_xgb, num_boost_round=1000, nfold=5,feval=metric_xgb, maximize=True, 
       early_stopping_rounds=100, verbose_eval=True)

[0]	train-error:0.130526+0.00471027	train-metric_xgb:0.791622+0.021171	test-error:0.144831+0.00531654	test-metric_xgb:0.758284+0.0161615
[1]	train-error:0.102521+0.00529637	train-metric_xgb:0.834592+0.0130146	test-error:0.122625+0.00628987	test-metric_xgb:0.79294+0.0135468
[2]	train-error:0.09947+0.00176114	train-metric_xgb:0.840977+0.00481494	test-error:0.119026+0.00820821	test-metric_xgb:0.800615+0.022044
[3]	train-error:0.0977694+0.00270438	train-metric_xgb:0.848624+0.00437773	test-error:0.111025+0.0099779	test-metric_xgb:0.817706+0.0251189
[4]	train-error:0.096019+0.00368409	train-metric_xgb:0.851566+0.00798334	test-error:0.111824+0.00604194	test-metric_xgb:0.817386+0.0195706
[5]	train-error:0.095169+0.00362264	train-metric_xgb:0.854193+0.0074201	test-error:0.113025+0.0100555	test-metric_xgb:0.817119+0.0249664
[6]	train-error:0.0941688+0.00222228	train-metric_xgb:0.856265+0.00647673	test-error:0.113025+0.00895254	test-metric_xgb:0.818526+0.0215868
[7]	train-error:0.0927684+0.002491

[59]	train-error:0.0703142+0.00188913	train-metric_xgb:0.902853+0.00334203	test-error:0.104824+0.0129938	test-metric_xgb:0.840252+0.0246196
[60]	train-error:0.0698642+0.00228648	train-metric_xgb:0.903529+0.0038207	test-error:0.104224+0.0130584	test-metric_xgb:0.840936+0.0247504
[61]	train-error:0.0691638+0.00227544	train-metric_xgb:0.904798+0.0036672	test-error:0.103624+0.0125905	test-metric_xgb:0.84183+0.0240008
[62]	train-error:0.0685638+0.00190968	train-metric_xgb:0.905587+0.00338439	test-error:0.102423+0.0126383	test-metric_xgb:0.843476+0.0242485
[63]	train-error:0.068164+0.00235802	train-metric_xgb:0.906197+0.00392399	test-error:0.102423+0.0126383	test-metric_xgb:0.843454+0.0241959
[64]	train-error:0.0669636+0.0018422	train-metric_xgb:0.907873+0.00321386	test-error:0.101823+0.0124448	test-metric_xgb:0.844482+0.024277
[65]	train-error:0.0670134+0.00140746	train-metric_xgb:0.908017+0.00261508	test-error:0.103224+0.0120856	test-metric_xgb:0.843156+0.0238828
[66]	train-error:0.0666634

[118]	train-error:0.044759+0.00146473	train-metric_xgb:0.941725+0.00186645	test-error:0.0996224+0.0104114	test-metric_xgb:0.852328+0.0209248
[119]	train-error:0.044759+0.00156378	train-metric_xgb:0.941817+0.00198059	test-error:0.0998224+0.0106915	test-metric_xgb:0.852189+0.0211948
[120]	train-error:0.0442588+0.0017386	train-metric_xgb:0.942477+0.00228766	test-error:0.100223+0.0112401	test-metric_xgb:0.851921+0.0216798
[121]	train-error:0.0440088+0.00192949	train-metric_xgb:0.942876+0.00245029	test-error:0.100223+0.0112401	test-metric_xgb:0.851914+0.0216712
[122]	train-error:0.0438088+0.00165343	train-metric_xgb:0.943246+0.00207237	test-error:0.0994228+0.0115459	test-metric_xgb:0.853139+0.0219714
[123]	train-error:0.0432086+0.00152682	train-metric_xgb:0.944085+0.00179985	test-error:0.0992228+0.011173	test-metric_xgb:0.853248+0.0220535
[124]	train-error:0.0431086+0.0015372	train-metric_xgb:0.944267+0.00187277	test-error:0.099023+0.0113815	test-metric_xgb:0.8536+0.022148
[125]	train-error

[177]	train-error:0.0273558+0.00149875	train-metric_xgb:0.966008+0.00205945	test-error:0.0956218+0.0103357	test-metric_xgb:0.85881+0.0207325
[178]	train-error:0.0272556+0.00152663	train-metric_xgb:0.966183+0.00212169	test-error:0.0956216+0.00983756	test-metric_xgb:0.85881+0.0203255
[179]	train-error:0.0271056+0.00140338	train-metric_xgb:0.966374+0.00197543	test-error:0.0952216+0.00997571	test-metric_xgb:0.859467+0.0202169
[180]	train-error:0.0268054+0.00125063	train-metric_xgb:0.966767+0.00176197	test-error:0.0954222+0.010997	test-metric_xgb:0.859244+0.0218432
[181]	train-error:0.0265054+0.00135263	train-metric_xgb:0.967195+0.00187296	test-error:0.095222+0.0107151	test-metric_xgb:0.859528+0.0215794
[182]	train-error:0.0262552+0.00154292	train-metric_xgb:0.967496+0.00207928	test-error:0.095022+0.01025	test-metric_xgb:0.859716+0.0208743
[183]	train-error:0.0260552+0.00134673	train-metric_xgb:0.967753+0.00175453	test-error:0.0952218+0.0101569	test-metric_xgb:0.859506+0.0208503
[184]	train

[235]	train-error:0.0163034+0.00140967	train-metric_xgb:0.980417+0.00174872	test-error:0.094021+0.00846545	test-metric_xgb:0.862344+0.0182222
[236]	train-error:0.0161534+0.00142912	train-metric_xgb:0.9806+0.00177129	test-error:0.093621+0.00796998	test-metric_xgb:0.862811+0.0179248
[237]	train-error:0.0158534+0.00125173	train-metric_xgb:0.980978+0.00155849	test-error:0.0938208+0.00757536	test-metric_xgb:0.862586+0.0171408
[238]	train-error:0.0156534+0.00131049	train-metric_xgb:0.981225+0.00162134	test-error:0.094421+0.00776447	test-metric_xgb:0.861649+0.0173912
[239]	train-error:0.0155532+0.00116691	train-metric_xgb:0.981352+0.00147138	test-error:0.0944212+0.00864486	test-metric_xgb:0.861671+0.0184502
[240]	train-error:0.0154532+0.00125957	train-metric_xgb:0.981474+0.00157445	test-error:0.094221+0.00818629	test-metric_xgb:0.862057+0.0177473
[241]	train-error:0.015353+0.00142025	train-metric_xgb:0.981615+0.00175006	test-error:0.094221+0.00818629	test-metric_xgb:0.862058+0.0177495
[242]	t

[293]	train-error:0.0090518+0.000797509	train-metric_xgb:0.989439+0.000991444	test-error:0.0930198+0.00685851	test-metric_xgb:0.863371+0.0157659
[294]	train-error:0.0090518+0.000600684	train-metric_xgb:0.989456+0.000760687	test-error:0.0934198+0.0067278	test-metric_xgb:0.862934+0.0153699
[295]	train-error:0.0087516+0.000908956	train-metric_xgb:0.989795+0.00111442	test-error:0.0932198+0.0061637	test-metric_xgb:0.863241+0.0147428
[296]	train-error:0.0086516+0.00108006	train-metric_xgb:0.989912+0.0013149	test-error:0.0932198+0.00626029	test-metric_xgb:0.863185+0.0150674
[297]	train-error:0.0084518+0.00106629	train-metric_xgb:0.990147+0.00129637	test-error:0.0928198+0.00669381	test-metric_xgb:0.863629+0.0155751
[298]	train-error:0.0083016+0.000828396	train-metric_xgb:0.990327+0.00101764	test-error:0.0928198+0.00635666	test-metric_xgb:0.863632+0.0150538
[299]	train-error:0.0082516+0.000949316	train-metric_xgb:0.99039+0.00115231	test-error:0.0924198+0.00699306	test-metric_xgb:0.864205+0.0158

[351]	train-error:0.0045008+0.00074189	train-metric_xgb:0.994848+0.000854799	test-error:0.0926202+0.0071401	test-metric_xgb:0.864616+0.0157528
[352]	train-error:0.0045508+0.000678469	train-metric_xgb:0.994795+0.000779337	test-error:0.0922202+0.00749114	test-metric_xgb:0.865168+0.0162608
[353]	train-error:0.0045008+0.000591862	train-metric_xgb:0.994853+0.00068682	test-error:0.0916202+0.00722644	test-metric_xgb:0.865915+0.0163132
[354]	train-error:0.0044008+0.000604368	train-metric_xgb:0.994965+0.000696146	test-error:0.0914202+0.00755179	test-metric_xgb:0.866066+0.016553
[355]	train-error:0.0041508+0.000830939	train-metric_xgb:0.995249+0.00095769	test-error:0.0924204+0.00752551	test-metric_xgb:0.864952+0.0163688
[356]	train-error:0.0041008+0.000860488	train-metric_xgb:0.995306+0.000987296	test-error:0.0918202+0.00735753	test-metric_xgb:0.865855+0.0159678
[357]	train-error:0.0040008+0.000652227	train-metric_xgb:0.995418+0.000762649	test-error:0.0922202+0.00727442	test-metric_xgb:0.865256+

[409]	train-error:0.0020004+0.000387686	train-metric_xgb:0.997727+0.000438242	test-error:0.0920198+0.00739451	test-metric_xgb:0.865165+0.0160455
[410]	train-error:0.0019502+0.0004004	train-metric_xgb:0.997785+0.000451852	test-error:0.0922198+0.00704492	test-metric_xgb:0.864986+0.0157536
[411]	train-error:0.0019002+0.00033947	train-metric_xgb:0.997844+0.000374711	test-error:0.0920198+0.0068614	test-metric_xgb:0.865126+0.0156962
[412]	train-error:0.0019502+0.0004004	train-metric_xgb:0.997789+0.000449606	test-error:0.0922198+0.00704492	test-metric_xgb:0.864946+0.0157579
[413]	train-error:0.0018002+0.00040035	train-metric_xgb:0.997956+0.000450663	test-error:0.09242+0.00724883	test-metric_xgb:0.864747+0.0160126
[414]	train-error:0.0019004+0.000339529	train-metric_xgb:0.997845+0.00038578	test-error:0.09242+0.00724883	test-metric_xgb:0.864745+0.0160003
[415]	train-error:0.0018004+0.000430535	train-metric_xgb:0.997959+0.000485191	test-error:0.0918198+0.00710258	test-metric_xgb:0.865291+0.01582

[467]	train-error:0.0009+0.000489898	train-metric_xgb:0.998989+0.000547125	test-error:0.0908202+0.00757454	test-metric_xgb:0.86642+0.0168139
[468]	train-error:0.00085+0.000514782	train-metric_xgb:0.999045+0.000574523	test-error:0.0904202+0.0072574	test-metric_xgb:0.866783+0.0165975
[469]	train-error:0.00085+0.000514782	train-metric_xgb:0.999045+0.000575183	test-error:0.0904202+0.00744781	test-metric_xgb:0.866791+0.016757
[470]	train-error:0.0008+0.000430116	train-metric_xgb:0.999101+0.000480855	test-error:0.0900198+0.00715248	test-metric_xgb:0.867318+0.0159679
[471]	train-error:0.00085+0.000406202	train-metric_xgb:0.999045+0.000454288	test-error:0.0900198+0.00763924	test-metric_xgb:0.867193+0.0163395
[472]	train-error:0.00085+0.000406202	train-metric_xgb:0.999046+0.000453899	test-error:0.09062+0.00757744	test-metric_xgb:0.866654+0.0165535
[473]	train-error:0.00085+0.000406202	train-metric_xgb:0.999046+0.000454262	test-error:0.08982+0.00803552	test-metric_xgb:0.867342+0.0169383
[474]	tr

[526]	train-error:0.00045+0.0001	train-metric_xgb:0.999498+0.000113029	test-error:0.0904206+0.0083163	test-metric_xgb:0.867199+0.0179983
[527]	train-error:0.00055+0.000187083	train-metric_xgb:0.999387+0.000208171	test-error:0.0902206+0.00847874	test-metric_xgb:0.867373+0.0181287
[528]	train-error:0.00045+0.0001	train-metric_xgb:0.999498+0.000113226	test-error:0.0898206+0.00836095	test-metric_xgb:0.867693+0.0180382
[529]	train-error:0.0004+0.0002	train-metric_xgb:0.999553+0.000223415	test-error:0.0906204+0.00816742	test-metric_xgb:0.866994+0.0176627
[530]	train-error:0.0004+0.0002	train-metric_xgb:0.999553+0.000223415	test-error:0.0902204+0.00816352	test-metric_xgb:0.867578+0.0177038
[531]	train-error:0.0004+0.0002	train-metric_xgb:0.999553+0.000223415	test-error:0.0904204+0.00864378	test-metric_xgb:0.867435+0.0179873
[532]	train-error:0.00045+0.000244949	train-metric_xgb:0.999497+0.000273457	test-error:0.0904204+0.00864378	test-metric_xgb:0.867414+0.0179719
[533]	train-error:0.0004+0.0

[586]	train-error:0.00035+0.0002	train-metric_xgb:0.99961+0.000222362	test-error:0.0912208+0.0085026	test-metric_xgb:0.865918+0.0185103
[587]	train-error:0.0003+0.000187083	train-metric_xgb:0.999666+0.000207187	test-error:0.0912208+0.00861941	test-metric_xgb:0.865964+0.0185835
[588]	train-error:0.0003+0.000187083	train-metric_xgb:0.999666+0.000207187	test-error:0.091021+0.00934825	test-metric_xgb:0.866076+0.0196189
[589]	train-error:0.00035+0.0002	train-metric_xgb:0.99961+0.000222362	test-error:0.090821+0.00977539	test-metric_xgb:0.866217+0.0201835
[590]	train-error:0.0003+0.000187083	train-metric_xgb:0.999666+0.0002089	test-error:0.091421+0.00948752	test-metric_xgb:0.865696+0.019907
[591]	train-error:0.00025+0.000158114	train-metric_xgb:0.999722+0.000175829	test-error:0.0910208+0.00904116	test-metric_xgb:0.866105+0.0191498
[592]	train-error:0.0003+0.000187083	train-metric_xgb:0.999666+0.0002089	test-error:0.0908208+0.00931187	test-metric_xgb:0.866143+0.0193771
[593]	train-error:0.0003

Unnamed: 0,test-error-mean,test-error-std,test-metric_xgb-mean,test-metric_xgb-std,train-error-mean,train-error-std,train-metric_xgb-mean,train-metric_xgb-std
0,0.144831,0.005317,0.758284,0.016162,0.130526,0.004710,0.791622,0.021171
1,0.122625,0.006290,0.792940,0.013547,0.102521,0.005296,0.834592,0.013015
2,0.119026,0.008208,0.800615,0.022044,0.099470,0.001761,0.840977,0.004815
3,0.111025,0.009978,0.817706,0.025119,0.097769,0.002704,0.848624,0.004378
4,0.111824,0.006042,0.817386,0.019571,0.096019,0.003684,0.851566,0.007983
5,0.113025,0.010056,0.817119,0.024966,0.095169,0.003623,0.854193,0.007420
6,0.113025,0.008953,0.818526,0.021587,0.094169,0.002222,0.856265,0.006477
7,0.112625,0.010046,0.820101,0.022273,0.092768,0.002491,0.858998,0.006966
8,0.111226,0.012437,0.822239,0.026745,0.093869,0.002453,0.859104,0.006413
9,0.113427,0.011717,0.821049,0.026624,0.093219,0.001211,0.860517,0.004979


# lgb

In [13]:
# load data
dtrain_lgb = lgb.Dataset(train_X, label = train_y)
dtest_lgb = lgb.Dataset(test_X)

In [16]:
lgb_paras =  {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'is_training_metric': False,
    'min_data_in_leaf': 12,
    'num_leaves': 64,
    'learning_rate': 0.08,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'verbosity':-1,
    'seed': 27
#    'gpu_device_id':2,
#    'device':'gpu'
#    'lambda_l1': 0.001,
#    'skip_drop': 0.95,
#    'max_drop' : 10
#     'lambda_l2': 0.005
#     'num_threads': 18
}    

In [18]:
def metric_lgb(preds,dtrain):
    
    label = dtrain.get_label()
    
    
    pre = pd.DataFrame({'preds':preds,'label':label})
    pre= pre.sort_values(by='preds',ascending=False)
    
    auc = metrics.roc_auc_score(pre.label,pre.preds)

    pre.preds = pre.preds.map(lambda x: 1 if x>=0.5 else 0)

    f1 = metrics.f1_score(pre.label,pre.preds)
    
    
    res = 0.6*auc +0.4*f1
    
    return 'metric_lgb',res,True

In [19]:
lgb.cv(lgb_paras, dtrain_lgb, num_boost_round=1000, nfold=5, feval=metric_lgb, early_stopping_rounds=100,
       verbose_eval=True, metrics=['metric_lgb'])

[1]	cv_agg's metric_lgb: 0.784351 + 0.0147898
[2]	cv_agg's metric_lgb: 0.796028 + 0.0139363
[3]	cv_agg's metric_lgb: 0.802483 + 0.0125497
[4]	cv_agg's metric_lgb: 0.805685 + 0.0185651
[5]	cv_agg's metric_lgb: 0.813794 + 0.0169204
[6]	cv_agg's metric_lgb: 0.819237 + 0.0156119
[7]	cv_agg's metric_lgb: 0.824206 + 0.0138286
[8]	cv_agg's metric_lgb: 0.826342 + 0.0149176
[9]	cv_agg's metric_lgb: 0.829631 + 0.0142108
[10]	cv_agg's metric_lgb: 0.830604 + 0.0106844
[11]	cv_agg's metric_lgb: 0.832859 + 0.0141452
[12]	cv_agg's metric_lgb: 0.833533 + 0.0148996
[13]	cv_agg's metric_lgb: 0.835856 + 0.0137476
[14]	cv_agg's metric_lgb: 0.834772 + 0.0148067
[15]	cv_agg's metric_lgb: 0.833421 + 0.0130485
[16]	cv_agg's metric_lgb: 0.833755 + 0.015103
[17]	cv_agg's metric_lgb: 0.831912 + 0.0159906
[18]	cv_agg's metric_lgb: 0.834022 + 0.0146248
[19]	cv_agg's metric_lgb: 0.837031 + 0.0148708
[20]	cv_agg's metric_lgb: 0.835691 + 0.0123187
[21]	cv_agg's metric_lgb: 0.839148 + 0.0153821
[22]	cv_agg's metric_lg

[173]	cv_agg's metric_lgb: 0.866686 + 0.00848217
[174]	cv_agg's metric_lgb: 0.866067 + 0.00889126
[175]	cv_agg's metric_lgb: 0.866452 + 0.00891195
[176]	cv_agg's metric_lgb: 0.866464 + 0.00922939
[177]	cv_agg's metric_lgb: 0.867376 + 0.00929191
[178]	cv_agg's metric_lgb: 0.8669 + 0.00955215
[179]	cv_agg's metric_lgb: 0.867328 + 0.0109304
[180]	cv_agg's metric_lgb: 0.867476 + 0.0111288
[181]	cv_agg's metric_lgb: 0.86732 + 0.0106612
[182]	cv_agg's metric_lgb: 0.868404 + 0.0104076
[183]	cv_agg's metric_lgb: 0.86787 + 0.0116144
[184]	cv_agg's metric_lgb: 0.867389 + 0.0107516
[185]	cv_agg's metric_lgb: 0.867412 + 0.0112388
[186]	cv_agg's metric_lgb: 0.867825 + 0.0106033
[187]	cv_agg's metric_lgb: 0.867938 + 0.0109775
[188]	cv_agg's metric_lgb: 0.868093 + 0.0107281
[189]	cv_agg's metric_lgb: 0.868313 + 0.0103033
[190]	cv_agg's metric_lgb: 0.8674 + 0.0102888
[191]	cv_agg's metric_lgb: 0.868524 + 0.0100796
[192]	cv_agg's metric_lgb: 0.868727 + 0.0107982
[193]	cv_agg's metric_lgb: 0.869501 + 0.

{'metric_lgb-mean': [0.7843507733310273,
  0.7960281150081514,
  0.8024830665130989,
  0.8056854295292327,
  0.8137936821998402,
  0.8192365415809965,
  0.8242059399163132,
  0.8263415972092794,
  0.8296311885804833,
  0.8306035547186585,
  0.8328591230655193,
  0.8335325031176438,
  0.8358560002718542,
  0.8347717505574389,
  0.8334213272385764,
  0.8337548169221101,
  0.8319115498252616,
  0.8340224407893068,
  0.8370312041775352,
  0.8356906687642564,
  0.8391478358827709,
  0.841601960595279,
  0.841490088912123,
  0.8414775789937977,
  0.8435569591683663,
  0.8442583547208568,
  0.8463585842045325,
  0.8463096520838975,
  0.8479756114862752,
  0.8486480588638894,
  0.8511917602866435,
  0.8506730629458203,
  0.8517746387030394,
  0.8530536759193144,
  0.8566218623348828,
  0.8549957582317067,
  0.8548207853349161,
  0.8551798402309908,
  0.855077612758502,
  0.8551602025741118,
  0.8555977945473086,
  0.8543595105949804,
  0.8546852206348212,
  0.8557423634269826,
  0.855190325401

# xgb&lgb stacking

In [128]:
xgb_classifier_paras = {
    
} 
model_xgb = xgb.XGBClassifier(learning_rate=0.05,
    max_depth=5,
    min_child_weight=5,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective='binary:logistic',
    nthread=4,
    scale_pos_weight=1,
    random_state=27,
    n_estimators=1000,
    silent=True)

In [129]:
lgb_classifier_paras =  {
    
}    
model_lgb = lgb.LGBMClassifier(boosting_type='gbdt',
    objective='binary',
    is_training_metric=False,
    min_data_in_leaf=12,
    num_leaves=64,
    learning_rate=0.08,
    feature_fraction=0.8,
    bagging_fraction=0.8,
    verbosity=-1,
    seed=27,
    n_estimators=203)

In [130]:
def metric_for_cv(y, y_pred):

    pre = pd.DataFrame({'preds':y_pred[:,1], 'label':y})
    pre = pre.sort_values(by='preds', ascending=False)
    
    auc = metrics.roc_auc_score(pre.label, pre.preds)

    pre.preds = pre.preds.map(lambda x: 1 if x>=0.5 else 0)

    f1 = metrics.f1_score(pre.label, pre.preds) 
    
    res = 0.6*auc +0.4*f1
    
    return res

cv_scorer = make_scorer(metric_for_cv, greater_is_better=True, needs_proba=True)

In [131]:
from sklearn.linear_model import LogisticRegression
from mlxtend.classifier import StackingClassifier
from sklearn import model_selection

lr = LogisticRegression()
sclf = StackingClassifier(classifiers=[model_xgb, model_lgb], meta_classifier=lr)

for clf, label in zip([model_xgb, model_lgb, sclf], 
                      ['XGB', 'LGB', 'StackingClassifier']):

    scores = model_selection.cross_val_score(clf, train_X, train_y, cv=5, scoring=cv_scorer)
    print(label, ": ", scores)


XGB :  [0.79034319 0.82449945 0.88912693 0.87340251 0.76335079]
LGB :  [0.7984222  0.81209427 0.87806098 0.8837128  0.72782245]


  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:
  if diff:


KeyboardInterrupt: 

In [41]:
a = [0.78187359, 0.79206299, 0.89496167, 0.87388139, 0.74013814]
b = [0.79312131, 0.82715688, 0.89066815, 0.87295905, 0.75127001]
np.mean(a)
np.mean(b)

0.816583556

0.8270350800000001

# stacking by wlc

In [105]:
from sklearn.model_selection import KFold

class BasicModel(object):
    """Parent class of basic models"""
    def train(self, x_train, y_train, x_val, y_val):
        """return a trained model and eval metric of validation data"""
        pass
    
    def predict(self, model, x_test):
        """return the predicted result of test data"""
        pass
    
    def get_oof(self, x_train, y_train, x_test, n_folds = 5):
        """K-fold stacking"""
        num_train, num_test = x_train.shape[0], x_test.shape[0]
        oof_train = np.zeros((num_train,)) 
        oof_test = np.zeros((num_test,))
        oof_test_all_fold = np.zeros((num_test, n_folds))
#         scores = []
        KF = KFold(n_splits = n_folds, shuffle=True, random_state=27)
        for i, (train_index, val_index) in enumerate(KF.split(x_train)):
            print('{0} fold, train {1}, val {2}'.format(i, 
                                                        len(train_index),
                                                        len(val_index)))
            print((train_index))
            x_tra, y_tra = x_train.iloc[train_index], y_train.iloc[train_index]
            x_val, y_val = x_train.iloc[val_index], y_train.iloc[val_index]
            model = self.train(x_tra, y_tra, x_val, y_val)
#             scores.append(score)
            oof_train[val_index] = self.predict(model, x_val)
            print('oof_train', oof_train)
            oof_test_all_fold[:, i] = self.predict(model, x_test)
        oof_test = np.mean(oof_test_all_fold, axis=1)
#         print('all scores {0}, average {1}'.format(scores, np.mean(scores)))
        return oof_train, oof_test

In [106]:
class myXGBClassifier(BasicModel):
    def __init__(self):
        """set parameters"""
        self.num_rounds=1000
        self.early_stopping_rounds = 100
        self.params = {
            'eta': 0.05,
            'max_depth': 5,
            'min_child_weight': 5,
            'gamma': 0,
            'subsample': 0.8,
            'colsample_bytree': 0.8,
            'objective': 'binary:logistic',
            'nthread': 4,
            'scale_pos_weight': 1,
            'seed': 27
         }
        
    def train(self, x_train, y_train, x_val, y_val):
        print('train with xgb model')
        xgbtrain = xgb.DMatrix(x_train, y_train)
        xgbval = xgb.DMatrix(x_val, y_val)
        watchlist = [(xgbtrain,'train'), (xgbval, 'val')]
        model = xgb.train(self.params, 
                          xgbtrain, 
                          self.num_rounds,
                          watchlist,
                          feval = metric_xgb,
                          maximize = True,
                          early_stopping_rounds = self.early_stopping_rounds)
        return model

    def predict(self, model, x_test):
        print('test with xgb model')
        xgbtest = xgb.DMatrix(x_test)
        return model.predict(xgbtest)

In [95]:
xgb_classifier = myXGBClassifier()
xgb_oof_train, xgb_oof_test = xgb_classifier.get_oof(train_X, train_y, test_X)

0 fold, train 3999, val 1000
[   3    7    8 ... 4992 4995 4997]
train with xgb model
[0]	train-error:0.117529	val-error:0.164	train-metric_xgb:0.764604	val-metric_xgb:0.704985
Multiple eval metrics have been passed: 'val-metric_xgb' will be used for early stopping.

Will train until val-metric_xgb hasn't improved in 100 rounds.
[1]	train-error:0.098775	val-error:0.127	train-metric_xgb:0.830923	val-metric_xgb:0.798781
[2]	train-error:0.101025	val-error:0.121	train-metric_xgb:0.827602	val-metric_xgb:0.808613
[3]	train-error:0.097024	val-error:0.121	train-metric_xgb:0.835797	val-metric_xgb:0.813135
[4]	train-error:0.098525	val-error:0.116	train-metric_xgb:0.838678	val-metric_xgb:0.820767
[5]	train-error:0.095774	val-error:0.111	train-metric_xgb:0.845889	val-metric_xgb:0.831004
[6]	train-error:0.094274	val-error:0.114	train-metric_xgb:0.84802	val-metric_xgb:0.827503
[7]	train-error:0.093523	val-error:0.111	train-metric_xgb:0.851721	val-metric_xgb:0.832718
[8]	train-error:0.092023	val-erro

[87]	train-error:0.055764	val-error:0.108	train-metric_xgb:0.923676	val-metric_xgb:0.849699
[88]	train-error:0.055014	val-error:0.108	train-metric_xgb:0.924742	val-metric_xgb:0.849031
[89]	train-error:0.054264	val-error:0.107	train-metric_xgb:0.925947	val-metric_xgb:0.850083
[90]	train-error:0.054514	val-error:0.107	train-metric_xgb:0.92567	val-metric_xgb:0.850695
[91]	train-error:0.054264	val-error:0.105	train-metric_xgb:0.926211	val-metric_xgb:0.852936
[92]	train-error:0.053513	val-error:0.106	train-metric_xgb:0.927292	val-metric_xgb:0.851685
[93]	train-error:0.054014	val-error:0.107	train-metric_xgb:0.92667	val-metric_xgb:0.85037
[94]	train-error:0.054014	val-error:0.107	train-metric_xgb:0.926915	val-metric_xgb:0.850396
[95]	train-error:0.053013	val-error:0.107	train-metric_xgb:0.928341	val-metric_xgb:0.851077
[96]	train-error:0.053013	val-error:0.106	train-metric_xgb:0.928471	val-metric_xgb:0.851052
[97]	train-error:0.052263	val-error:0.106	train-metric_xgb:0.929341	val-metric_xgb:

[176]	train-error:0.027757	val-error:0.1	train-metric_xgb:0.964768	val-metric_xgb:0.862284
[177]	train-error:0.027507	val-error:0.101	train-metric_xgb:0.965155	val-metric_xgb:0.861426
[178]	train-error:0.027507	val-error:0.101	train-metric_xgb:0.96518	val-metric_xgb:0.861475
[179]	train-error:0.027007	val-error:0.1	train-metric_xgb:0.965805	val-metric_xgb:0.862918
[180]	train-error:0.026757	val-error:0.1	train-metric_xgb:0.966191	val-metric_xgb:0.862956
[181]	train-error:0.026507	val-error:0.098	train-metric_xgb:0.966564	val-metric_xgb:0.865727
[182]	train-error:0.026507	val-error:0.099	train-metric_xgb:0.966636	val-metric_xgb:0.864354
[183]	train-error:0.026257	val-error:0.099	train-metric_xgb:0.966912	val-metric_xgb:0.864411
[184]	train-error:0.026507	val-error:0.099	train-metric_xgb:0.966635	val-metric_xgb:0.863809
[185]	train-error:0.026257	val-error:0.1	train-metric_xgb:0.966964	val-metric_xgb:0.862998
[186]	train-error:0.026007	val-error:0.099	train-metric_xgb:0.967262	val-metric

[265]	train-error:0.011253	val-error:0.102	train-metric_xgb:0.98639	val-metric_xgb:0.860821
[266]	train-error:0.011003	val-error:0.1	train-metric_xgb:0.986695	val-metric_xgb:0.862477
[267]	train-error:0.010753	val-error:0.1	train-metric_xgb:0.986985	val-metric_xgb:0.862518
[268]	train-error:0.010503	val-error:0.099	train-metric_xgb:0.98728	val-metric_xgb:0.863347
[269]	train-error:0.010503	val-error:0.098	train-metric_xgb:0.987289	val-metric_xgb:0.864173
[270]	train-error:0.010503	val-error:0.098	train-metric_xgb:0.987299	val-metric_xgb:0.864214
[271]	train-error:0.011003	val-error:0.099	train-metric_xgb:0.986742	val-metric_xgb:0.862717
[272]	train-error:0.011003	val-error:0.099	train-metric_xgb:0.986742	val-metric_xgb:0.863298
[273]	train-error:0.010503	val-error:0.1	train-metric_xgb:0.987359	val-metric_xgb:0.861897
[274]	train-error:0.010503	val-error:0.101	train-metric_xgb:0.987361	val-metric_xgb:0.861017
[275]	train-error:0.010003	val-error:0.101	train-metric_xgb:0.987959	val-metri

[43]	train-error:0.077519	val-error:0.1	train-metric_xgb:0.888533	val-metric_xgb:0.849638
[44]	train-error:0.077769	val-error:0.1	train-metric_xgb:0.888158	val-metric_xgb:0.849058
[45]	train-error:0.077519	val-error:0.102	train-metric_xgb:0.888649	val-metric_xgb:0.847294
[46]	train-error:0.076519	val-error:0.102	train-metric_xgb:0.890219	val-metric_xgb:0.848187
[47]	train-error:0.076519	val-error:0.102	train-metric_xgb:0.890381	val-metric_xgb:0.847412
[48]	train-error:0.077269	val-error:0.102	train-metric_xgb:0.889322	val-metric_xgb:0.847563
[49]	train-error:0.076769	val-error:0.102	train-metric_xgb:0.890265	val-metric_xgb:0.847529
[50]	train-error:0.076269	val-error:0.104	train-metric_xgb:0.891181	val-metric_xgb:0.845883
[51]	train-error:0.076769	val-error:0.104	train-metric_xgb:0.890745	val-metric_xgb:0.846144
[52]	train-error:0.076769	val-error:0.103	train-metric_xgb:0.890938	val-metric_xgb:0.847872
[53]	train-error:0.077019	val-error:0.102	train-metric_xgb:0.890645	val-metric_xgb:0

[133]	train-error:0.03926	val-error:0.088	train-metric_xgb:0.949581	val-metric_xgb:0.872682
[134]	train-error:0.03876	val-error:0.088	train-metric_xgb:0.950124	val-metric_xgb:0.872736
[135]	train-error:0.03876	val-error:0.087	train-metric_xgb:0.9503	val-metric_xgb:0.874268
[136]	train-error:0.037759	val-error:0.088	train-metric_xgb:0.95146	val-metric_xgb:0.872762
[137]	train-error:0.036759	val-error:0.088	train-metric_xgb:0.952725	val-metric_xgb:0.872774
[138]	train-error:0.037509	val-error:0.09	train-metric_xgb:0.951845	val-metric_xgb:0.870284
[139]	train-error:0.036509	val-error:0.089	train-metric_xgb:0.953271	val-metric_xgb:0.871839
[140]	train-error:0.035759	val-error:0.089	train-metric_xgb:0.954393	val-metric_xgb:0.871822
[141]	train-error:0.036259	val-error:0.089	train-metric_xgb:0.953743	val-metric_xgb:0.871683
[142]	train-error:0.036509	val-error:0.089	train-metric_xgb:0.95346	val-metric_xgb:0.871608
[143]	train-error:0.036759	val-error:0.089	train-metric_xgb:0.953275	val-metri

[222]	train-error:0.020755	val-error:0.087	train-metric_xgb:0.97533	val-metric_xgb:0.875151
[223]	train-error:0.020255	val-error:0.088	train-metric_xgb:0.97591	val-metric_xgb:0.873689
[224]	train-error:0.020255	val-error:0.087	train-metric_xgb:0.975917	val-metric_xgb:0.875164
[225]	train-error:0.020255	val-error:0.087	train-metric_xgb:0.975929	val-metric_xgb:0.875282
[226]	train-error:0.020255	val-error:0.085	train-metric_xgb:0.975941	val-metric_xgb:0.878297
[227]	train-error:0.020255	val-error:0.085	train-metric_xgb:0.976037	val-metric_xgb:0.877772
[228]	train-error:0.019755	val-error:0.087	train-metric_xgb:0.976642	val-metric_xgb:0.874799
[229]	train-error:0.019505	val-error:0.089	train-metric_xgb:0.976957	val-metric_xgb:0.87237
[230]	train-error:0.019505	val-error:0.089	train-metric_xgb:0.976978	val-metric_xgb:0.872395
[231]	train-error:0.018505	val-error:0.088	train-metric_xgb:0.97807	val-metric_xgb:0.873318
[232]	train-error:0.019005	val-error:0.086	train-metric_xgb:0.977584	val-m

[311]	train-error:0.007752	val-error:0.084	train-metric_xgb:0.991159	val-metric_xgb:0.87868
[312]	train-error:0.007752	val-error:0.085	train-metric_xgb:0.991166	val-metric_xgb:0.877709
[313]	train-error:0.007752	val-error:0.085	train-metric_xgb:0.991165	val-metric_xgb:0.877139
[314]	train-error:0.007502	val-error:0.084	train-metric_xgb:0.99145	val-metric_xgb:0.878634
[315]	train-error:0.007502	val-error:0.083	train-metric_xgb:0.991452	val-metric_xgb:0.880744
[316]	train-error:0.007752	val-error:0.083	train-metric_xgb:0.991164	val-metric_xgb:0.880237
[317]	train-error:0.007252	val-error:0.085	train-metric_xgb:0.991731	val-metric_xgb:0.878407
[318]	train-error:0.007252	val-error:0.085	train-metric_xgb:0.991733	val-metric_xgb:0.878428
[319]	train-error:0.007252	val-error:0.086	train-metric_xgb:0.991742	val-metric_xgb:0.876844
[320]	train-error:0.006752	val-error:0.087	train-metric_xgb:0.992315	val-metric_xgb:0.875341
[321]	train-error:0.007002	val-error:0.087	train-metric_xgb:0.992039	val

[400]	train-error:0.0015	val-error:0.087	train-metric_xgb:0.998313	val-metric_xgb:0.875589
[401]	train-error:0.0015	val-error:0.087	train-metric_xgb:0.998314	val-metric_xgb:0.87558
[402]	train-error:0.0015	val-error:0.087	train-metric_xgb:0.998315	val-metric_xgb:0.875563
[403]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998041	val-metric_xgb:0.876137
[404]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998041	val-metric_xgb:0.876159
[405]	train-error:0.002001	val-error:0.088	train-metric_xgb:0.997767	val-metric_xgb:0.874667
[406]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998042	val-metric_xgb:0.876108
[407]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998043	val-metric_xgb:0.876154
[408]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998047	val-metric_xgb:0.875509
[409]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998047	val-metric_xgb:0.87553
[410]	train-error:0.00175	val-error:0.087	train-metric_xgb:0.998046	val-metric_xgb:0

[70]	train-error:0.066267	val-error:0.09	train-metric_xgb:0.909689	val-metric_xgb:0.856058
[71]	train-error:0.065766	val-error:0.09	train-metric_xgb:0.910254	val-metric_xgb:0.856302
[72]	train-error:0.064266	val-error:0.087	train-metric_xgb:0.91233	val-metric_xgb:0.859839
[73]	train-error:0.064266	val-error:0.086	train-metric_xgb:0.912569	val-metric_xgb:0.860938
[74]	train-error:0.063516	val-error:0.085	train-metric_xgb:0.913617	val-metric_xgb:0.863008
[75]	train-error:0.063266	val-error:0.088	train-metric_xgb:0.914279	val-metric_xgb:0.860053
[76]	train-error:0.062016	val-error:0.088	train-metric_xgb:0.915773	val-metric_xgb:0.859647
[77]	train-error:0.061515	val-error:0.089	train-metric_xgb:0.91654	val-metric_xgb:0.858724
[78]	train-error:0.061515	val-error:0.088	train-metric_xgb:0.916823	val-metric_xgb:0.859717
[79]	train-error:0.060515	val-error:0.088	train-metric_xgb:0.918097	val-metric_xgb:0.859005
[80]	train-error:0.059515	val-error:0.085	train-metric_xgb:0.91921	val-metric_xgb:0.

[159]	train-error:0.031508	val-error:0.08	train-metric_xgb:0.960418	val-metric_xgb:0.876462
[160]	train-error:0.031758	val-error:0.08	train-metric_xgb:0.960173	val-metric_xgb:0.876466
[161]	train-error:0.030508	val-error:0.079	train-metric_xgb:0.961771	val-metric_xgb:0.877408
[162]	train-error:0.030258	val-error:0.08	train-metric_xgb:0.962121	val-metric_xgb:0.876524
[163]	train-error:0.029507	val-error:0.08	train-metric_xgb:0.96298	val-metric_xgb:0.876018
[164]	train-error:0.028507	val-error:0.079	train-metric_xgb:0.964201	val-metric_xgb:0.877615
[165]	train-error:0.028757	val-error:0.079	train-metric_xgb:0.963991	val-metric_xgb:0.877627
[166]	train-error:0.028507	val-error:0.079	train-metric_xgb:0.964387	val-metric_xgb:0.877664
[167]	train-error:0.028257	val-error:0.079	train-metric_xgb:0.964773	val-metric_xgb:0.877652
[168]	train-error:0.027757	val-error:0.079	train-metric_xgb:0.965365	val-metric_xgb:0.877163
[169]	train-error:0.027257	val-error:0.079	train-metric_xgb:0.965977	val-me

[248]	train-error:0.012003	val-error:0.08	train-metric_xgb:0.985721	val-metric_xgb:0.877102
[249]	train-error:0.012503	val-error:0.081	train-metric_xgb:0.985189	val-metric_xgb:0.876199
[250]	train-error:0.012503	val-error:0.083	train-metric_xgb:0.985208	val-metric_xgb:0.873807
[251]	train-error:0.012003	val-error:0.081	train-metric_xgb:0.985788	val-metric_xgb:0.875542
[252]	train-error:0.012253	val-error:0.081	train-metric_xgb:0.985502	val-metric_xgb:0.875633
[253]	train-error:0.012253	val-error:0.082	train-metric_xgb:0.985526	val-metric_xgb:0.874109
[254]	train-error:0.012003	val-error:0.081	train-metric_xgb:0.985825	val-metric_xgb:0.87555
[255]	train-error:0.012003	val-error:0.083	train-metric_xgb:0.98584	val-metric_xgb:0.873181
[256]	train-error:0.011753	val-error:0.081	train-metric_xgb:0.98612	val-metric_xgb:0.876154
[257]	train-error:0.012003	val-error:0.081	train-metric_xgb:0.985843	val-metric_xgb:0.87624
[258]	train-error:0.011753	val-error:0.081	train-metric_xgb:0.986125	val-me

[337]	train-error:0.004751	val-error:0.082	train-metric_xgb:0.994509	val-metric_xgb:0.87581
[338]	train-error:0.004751	val-error:0.082	train-metric_xgb:0.99451	val-metric_xgb:0.875881
[339]	train-error:0.004501	val-error:0.082	train-metric_xgb:0.994798	val-metric_xgb:0.875877
[340]	train-error:0.004501	val-error:0.082	train-metric_xgb:0.9948	val-metric_xgb:0.875798
[341]	train-error:0.004501	val-error:0.082	train-metric_xgb:0.994803	val-metric_xgb:0.875711
[342]	train-error:0.004251	val-error:0.082	train-metric_xgb:0.995088	val-metric_xgb:0.875637
[343]	train-error:0.004251	val-error:0.083	train-metric_xgb:0.995091	val-metric_xgb:0.873646
[344]	train-error:0.004251	val-error:0.084	train-metric_xgb:0.995097	val-metric_xgb:0.872623
[345]	train-error:0.004251	val-error:0.084	train-metric_xgb:0.9951	val-metric_xgb:0.872594
[346]	train-error:0.004251	val-error:0.084	train-metric_xgb:0.995102	val-metric_xgb:0.872565
[347]	train-error:0.004251	val-error:0.084	train-metric_xgb:0.995103	val-met

[49]	train-error:0.072018	val-error:0.112	train-metric_xgb:0.898076	val-metric_xgb:0.831568
[50]	train-error:0.071768	val-error:0.111	train-metric_xgb:0.898813	val-metric_xgb:0.83321
[51]	train-error:0.070518	val-error:0.111	train-metric_xgb:0.900454	val-metric_xgb:0.833511
[52]	train-error:0.070518	val-error:0.112	train-metric_xgb:0.900664	val-metric_xgb:0.832335
[53]	train-error:0.069517	val-error:0.111	train-metric_xgb:0.901735	val-metric_xgb:0.833942
[54]	train-error:0.069267	val-error:0.111	train-metric_xgb:0.902223	val-metric_xgb:0.834235
[55]	train-error:0.067767	val-error:0.111	train-metric_xgb:0.904424	val-metric_xgb:0.83427
[56]	train-error:0.067767	val-error:0.109	train-metric_xgb:0.904664	val-metric_xgb:0.836681
[57]	train-error:0.068267	val-error:0.11	train-metric_xgb:0.904022	val-metric_xgb:0.835242
[58]	train-error:0.068017	val-error:0.112	train-metric_xgb:0.904743	val-metric_xgb:0.833675
[59]	train-error:0.067517	val-error:0.112	train-metric_xgb:0.905509	val-metric_xgb:

[139]	train-error:0.03801	val-error:0.101	train-metric_xgb:0.951009	val-metric_xgb:0.857771
[140]	train-error:0.03801	val-error:0.101	train-metric_xgb:0.951148	val-metric_xgb:0.857877
[141]	train-error:0.03801	val-error:0.101	train-metric_xgb:0.951193	val-metric_xgb:0.857963
[142]	train-error:0.036759	val-error:0.1	train-metric_xgb:0.952787	val-metric_xgb:0.858756
[143]	train-error:0.036509	val-error:0.099	train-metric_xgb:0.953173	val-metric_xgb:0.860207
[144]	train-error:0.035259	val-error:0.1	train-metric_xgb:0.954658	val-metric_xgb:0.859287
[145]	train-error:0.034009	val-error:0.1	train-metric_xgb:0.95625	val-metric_xgb:0.85926
[146]	train-error:0.034009	val-error:0.1	train-metric_xgb:0.956385	val-metric_xgb:0.859197
[147]	train-error:0.033508	val-error:0.1	train-metric_xgb:0.957048	val-metric_xgb:0.859201
[148]	train-error:0.032758	val-error:0.1	train-metric_xgb:0.957948	val-metric_xgb:0.859264
[149]	train-error:0.032758	val-error:0.1	train-metric_xgb:0.958023	val-metric_xgb:0.859

[228]	train-error:0.013753	val-error:0.093	train-metric_xgb:0.983109	val-metric_xgb:0.868951
[229]	train-error:0.013503	val-error:0.092	train-metric_xgb:0.983395	val-metric_xgb:0.870342
[230]	train-error:0.013253	val-error:0.092	train-metric_xgb:0.983756	val-metric_xgb:0.870412
[231]	train-error:0.012753	val-error:0.091	train-metric_xgb:0.984354	val-metric_xgb:0.871295
[232]	train-error:0.012503	val-error:0.092	train-metric_xgb:0.984664	val-metric_xgb:0.870526
[233]	train-error:0.012503	val-error:0.092	train-metric_xgb:0.984683	val-metric_xgb:0.870475
[234]	train-error:0.012503	val-error:0.093	train-metric_xgb:0.984683	val-metric_xgb:0.869057
[235]	train-error:0.012753	val-error:0.094	train-metric_xgb:0.98443	val-metric_xgb:0.867819
[236]	train-error:0.012503	val-error:0.093	train-metric_xgb:0.984754	val-metric_xgb:0.869248
[237]	train-error:0.012253	val-error:0.095	train-metric_xgb:0.985034	val-metric_xgb:0.86644
[238]	train-error:0.012003	val-error:0.096	train-metric_xgb:0.985361	val

[317]	train-error:0.005251	val-error:0.094	train-metric_xgb:0.993782	val-metric_xgb:0.869368
[318]	train-error:0.006002	val-error:0.093	train-metric_xgb:0.992923	val-metric_xgb:0.870285
[319]	train-error:0.005751	val-error:0.093	train-metric_xgb:0.993211	val-metric_xgb:0.870262
[320]	train-error:0.005751	val-error:0.093	train-metric_xgb:0.993225	val-metric_xgb:0.870234
[321]	train-error:0.005751	val-error:0.092	train-metric_xgb:0.993226	val-metric_xgb:0.871078
[322]	train-error:0.005751	val-error:0.092	train-metric_xgb:0.993235	val-metric_xgb:0.871137
[323]	train-error:0.005501	val-error:0.091	train-metric_xgb:0.993526	val-metric_xgb:0.871902
[324]	train-error:0.005251	val-error:0.091	train-metric_xgb:0.993819	val-metric_xgb:0.871902
[325]	train-error:0.005001	val-error:0.091	train-metric_xgb:0.994118	val-metric_xgb:0.871887
[326]	train-error:0.004751	val-error:0.092	train-metric_xgb:0.994412	val-metric_xgb:0.871047
[327]	train-error:0.004501	val-error:0.092	train-metric_xgb:0.994709	v

[406]	train-error:0.002751	val-error:0.092	train-metric_xgb:0.996856	val-metric_xgb:0.870925
[407]	train-error:0.002751	val-error:0.091	train-metric_xgb:0.996858	val-metric_xgb:0.871691
[408]	train-error:0.002751	val-error:0.091	train-metric_xgb:0.996856	val-metric_xgb:0.871699
[409]	train-error:0.002501	val-error:0.091	train-metric_xgb:0.997138	val-metric_xgb:0.871699
[410]	train-error:0.002251	val-error:0.091	train-metric_xgb:0.997423	val-metric_xgb:0.871808
[411]	train-error:0.002251	val-error:0.092	train-metric_xgb:0.997425	val-metric_xgb:0.870361
[412]	train-error:0.002251	val-error:0.093	train-metric_xgb:0.997426	val-metric_xgb:0.869577
[413]	train-error:0.002251	val-error:0.092	train-metric_xgb:0.997428	val-metric_xgb:0.870451
[414]	train-error:0.002251	val-error:0.092	train-metric_xgb:0.99743	val-metric_xgb:0.870428
[415]	train-error:0.002251	val-error:0.092	train-metric_xgb:0.997431	val-metric_xgb:0.870361
[416]	train-error:0.002251	val-error:0.092	train-metric_xgb:0.997433	va

[66]	train-error:0.06625	val-error:0.1001	train-metric_xgb:0.911857	val-metric_xgb:0.833811
[67]	train-error:0.06625	val-error:0.1001	train-metric_xgb:0.91198	val-metric_xgb:0.833854
[68]	train-error:0.06575	val-error:0.101101	train-metric_xgb:0.912767	val-metric_xgb:0.833107
[69]	train-error:0.06525	val-error:0.101101	train-metric_xgb:0.913495	val-metric_xgb:0.833128
[70]	train-error:0.065	val-error:0.101101	train-metric_xgb:0.913766	val-metric_xgb:0.833281
[71]	train-error:0.06575	val-error:0.099099	train-metric_xgb:0.913029	val-metric_xgb:0.836826
[72]	train-error:0.0655	val-error:0.099099	train-metric_xgb:0.913392	val-metric_xgb:0.836634
[73]	train-error:0.064	val-error:0.099099	train-metric_xgb:0.915402	val-metric_xgb:0.836477
[74]	train-error:0.06425	val-error:0.099099	train-metric_xgb:0.915214	val-metric_xgb:0.837371
[75]	train-error:0.06275	val-error:0.099099	train-metric_xgb:0.917319	val-metric_xgb:0.837384
[76]	train-error:0.0625	val-error:0.099099	train-metric_xgb:0.917797	v

[154]	train-error:0.03425	val-error:0.095095	train-metric_xgb:0.958165	val-metric_xgb:0.847275
[155]	train-error:0.03375	val-error:0.093093	train-metric_xgb:0.958668	val-metric_xgb:0.85
[156]	train-error:0.03375	val-error:0.093093	train-metric_xgb:0.958703	val-metric_xgb:0.849943
[157]	train-error:0.0335	val-error:0.093093	train-metric_xgb:0.959069	val-metric_xgb:0.850764
[158]	train-error:0.033	val-error:0.091091	train-metric_xgb:0.959695	val-metric_xgb:0.852433
[159]	train-error:0.03275	val-error:0.092092	train-metric_xgb:0.960039	val-metric_xgb:0.851644
[160]	train-error:0.033	val-error:0.093093	train-metric_xgb:0.959785	val-metric_xgb:0.85076
[161]	train-error:0.03225	val-error:0.094094	train-metric_xgb:0.960648	val-metric_xgb:0.849101
[162]	train-error:0.0325	val-error:0.093093	train-metric_xgb:0.960364	val-metric_xgb:0.85076
[163]	train-error:0.03275	val-error:0.092092	train-metric_xgb:0.960173	val-metric_xgb:0.851631
[164]	train-error:0.0325	val-error:0.093093	train-metric_xgb:0

[242]	train-error:0.01525	val-error:0.097097	train-metric_xgb:0.982302	val-metric_xgb:0.846167
[243]	train-error:0.01525	val-error:0.096096	train-metric_xgb:0.982307	val-metric_xgb:0.846947
[244]	train-error:0.01525	val-error:0.096096	train-metric_xgb:0.982334	val-metric_xgb:0.847012
[245]	train-error:0.01525	val-error:0.095095	train-metric_xgb:0.982397	val-metric_xgb:0.847755
[246]	train-error:0.01475	val-error:0.096096	train-metric_xgb:0.98295	val-metric_xgb:0.846864
[247]	train-error:0.01475	val-error:0.096096	train-metric_xgb:0.983001	val-metric_xgb:0.846951
[248]	train-error:0.0145	val-error:0.096096	train-metric_xgb:0.983313	val-metric_xgb:0.846999
[249]	train-error:0.0145	val-error:0.096096	train-metric_xgb:0.983325	val-metric_xgb:0.847043
[250]	train-error:0.0145	val-error:0.096096	train-metric_xgb:0.983342	val-metric_xgb:0.847069
[251]	train-error:0.01425	val-error:0.096096	train-metric_xgb:0.983626	val-metric_xgb:0.847117
[252]	train-error:0.01425	val-error:0.096096	train-met

In [97]:
a = [0.870255, 0.880744, 0.878976, 0.871902, 0.852433]
np.mean(a)

0.870862

In [102]:
class myLGBClassifier(BasicModel):
    def __init__(self):
        self.num_boost_round = 1000
        self.early_stopping_rounds = 100
        self.params = {
            'boosting_type': 'gbdt',
            'objective': 'binary',
            'is_training_metric': False,
            'min_data_in_leaf': 12,
            'num_leaves': 64,
            'learning_rate': 0.08,
            'feature_fraction': 0.8,
            'bagging_fraction': 0.8,
            'verbosity':-1,
            'seed': 27,
            'metric': 'metric_lgb'
            }
       
    def train(self, x_train, y_train, x_val, y_val):
        print('train with lgb model')
        lgbtrain = lgb.Dataset(x_train, y_train)
        lgbval = lgb.Dataset(x_val, y_val)
        model = lgb.train(self.params, 
                          lgbtrain,
                          valid_sets = lgbval,
                          valid_names = 'lgbval',
                          verbose_eval = True,
                          num_boost_round = self.num_boost_round,
                          feval = metric_lgb,
                          early_stopping_rounds = self.early_stopping_rounds)
        return model
    
    def predict(self, model, x_test):
        print('test with lgb model')
        return model.predict(x_test, num_iteration=model.best_iteration)

In [107]:
lgb_classifier = myLGBClassifier()
lgb_oof_train, lgb_oof_test = lgb_classifier.get_oof(train_X, train_y, test_X)

0 fold, train 3999, val 1000
[   3    7    8 ... 4992 4995 4997]
train with lgb model
[1]	lgbval's metric_lgb: 0.794336
Training until validation scores don't improve for 100 rounds.
[2]	lgbval's metric_lgb: 0.817503
[3]	lgbval's metric_lgb: 0.809638
[4]	lgbval's metric_lgb: 0.808449
[5]	lgbval's metric_lgb: 0.818078
[6]	lgbval's metric_lgb: 0.822517
[7]	lgbval's metric_lgb: 0.82
[8]	lgbval's metric_lgb: 0.829995
[9]	lgbval's metric_lgb: 0.83024
[10]	lgbval's metric_lgb: 0.828149
[11]	lgbval's metric_lgb: 0.827969
[12]	lgbval's metric_lgb: 0.823295
[13]	lgbval's metric_lgb: 0.824086
[14]	lgbval's metric_lgb: 0.824957
[15]	lgbval's metric_lgb: 0.822755
[16]	lgbval's metric_lgb: 0.822666
[17]	lgbval's metric_lgb: 0.823028
[18]	lgbval's metric_lgb: 0.824662
[19]	lgbval's metric_lgb: 0.826289
[20]	lgbval's metric_lgb: 0.828212
[21]	lgbval's metric_lgb: 0.828843
[22]	lgbval's metric_lgb: 0.826013
[23]	lgbval's metric_lgb: 0.827352
[24]	lgbval's metric_lgb: 0.832185
[25]	lgbval's metric_lgb:

[229]	lgbval's metric_lgb: 0.855232
[230]	lgbval's metric_lgb: 0.856034
[231]	lgbval's metric_lgb: 0.856125
[232]	lgbval's metric_lgb: 0.856147
[233]	lgbval's metric_lgb: 0.857014
[234]	lgbval's metric_lgb: 0.857829
[235]	lgbval's metric_lgb: 0.85698
[236]	lgbval's metric_lgb: 0.857799
[237]	lgbval's metric_lgb: 0.856969
[238]	lgbval's metric_lgb: 0.857769
[239]	lgbval's metric_lgb: 0.857022
[240]	lgbval's metric_lgb: 0.857806
[241]	lgbval's metric_lgb: 0.85844
[242]	lgbval's metric_lgb: 0.859271
[243]	lgbval's metric_lgb: 0.857886
[244]	lgbval's metric_lgb: 0.857131
[245]	lgbval's metric_lgb: 0.856374
[246]	lgbval's metric_lgb: 0.856412
[247]	lgbval's metric_lgb: 0.856472
[248]	lgbval's metric_lgb: 0.856502
[249]	lgbval's metric_lgb: 0.856465
[250]	lgbval's metric_lgb: 0.856552
[251]	lgbval's metric_lgb: 0.85648
[252]	lgbval's metric_lgb: 0.856472
[253]	lgbval's metric_lgb: 0.856434
[254]	lgbval's metric_lgb: 0.856555
[255]	lgbval's metric_lgb: 0.857369
[256]	lgbval's metric_lgb: 0.85

[115]	lgbval's metric_lgb: 0.888802
[116]	lgbval's metric_lgb: 0.887368
[117]	lgbval's metric_lgb: 0.887448
[118]	lgbval's metric_lgb: 0.886002
[119]	lgbval's metric_lgb: 0.884627
[120]	lgbval's metric_lgb: 0.887608
[121]	lgbval's metric_lgb: 0.886994
[122]	lgbval's metric_lgb: 0.886023
[123]	lgbval's metric_lgb: 0.885381
[124]	lgbval's metric_lgb: 0.888261
[125]	lgbval's metric_lgb: 0.884944
[126]	lgbval's metric_lgb: 0.885183
[127]	lgbval's metric_lgb: 0.885137
[128]	lgbval's metric_lgb: 0.885116
[129]	lgbval's metric_lgb: 0.883667
[130]	lgbval's metric_lgb: 0.886657
[131]	lgbval's metric_lgb: 0.883722
[132]	lgbval's metric_lgb: 0.885141
[133]	lgbval's metric_lgb: 0.882227
[134]	lgbval's metric_lgb: 0.883853
[135]	lgbval's metric_lgb: 0.888269
[136]	lgbval's metric_lgb: 0.886888
[137]	lgbval's metric_lgb: 0.885423
[138]	lgbval's metric_lgb: 0.886733
[139]	lgbval's metric_lgb: 0.885343
[140]	lgbval's metric_lgb: 0.887705
[141]	lgbval's metric_lgb: 0.885289
[142]	lgbval's metric_lgb: 0

[355]	lgbval's metric_lgb: 0.89625
[356]	lgbval's metric_lgb: 0.893439
[357]	lgbval's metric_lgb: 0.893934
[358]	lgbval's metric_lgb: 0.893964
[359]	lgbval's metric_lgb: 0.892541
[360]	lgbval's metric_lgb: 0.893989
[361]	lgbval's metric_lgb: 0.896313
[362]	lgbval's metric_lgb: 0.895383
[363]	lgbval's metric_lgb: 0.89537
[364]	lgbval's metric_lgb: 0.893968
[365]	lgbval's metric_lgb: 0.893972
[366]	lgbval's metric_lgb: 0.895387
[367]	lgbval's metric_lgb: 0.895416
[368]	lgbval's metric_lgb: 0.893993
[369]	lgbval's metric_lgb: 0.895366
[370]	lgbval's metric_lgb: 0.893892
[371]	lgbval's metric_lgb: 0.893863
[372]	lgbval's metric_lgb: 0.892499
[373]	lgbval's metric_lgb: 0.89247
[374]	lgbval's metric_lgb: 0.892491
[375]	lgbval's metric_lgb: 0.892478
[376]	lgbval's metric_lgb: 0.893888
[377]	lgbval's metric_lgb: 0.893842
[378]	lgbval's metric_lgb: 0.895252
[379]	lgbval's metric_lgb: 0.895269
[380]	lgbval's metric_lgb: 0.895265
[381]	lgbval's metric_lgb: 0.893854
[382]	lgbval's metric_lgb: 0.89

[119]	lgbval's metric_lgb: 0.864969
[120]	lgbval's metric_lgb: 0.863546
[121]	lgbval's metric_lgb: 0.862687
[122]	lgbval's metric_lgb: 0.864261
[123]	lgbval's metric_lgb: 0.864381
[124]	lgbval's metric_lgb: 0.864422
[125]	lgbval's metric_lgb: 0.864331
[126]	lgbval's metric_lgb: 0.867352
[127]	lgbval's metric_lgb: 0.865859
[128]	lgbval's metric_lgb: 0.865974
[129]	lgbval's metric_lgb: 0.865859
[130]	lgbval's metric_lgb: 0.86758
[131]	lgbval's metric_lgb: 0.867563
[132]	lgbval's metric_lgb: 0.868268
[133]	lgbval's metric_lgb: 0.868364
[134]	lgbval's metric_lgb: 0.868884
[135]	lgbval's metric_lgb: 0.867416
[136]	lgbval's metric_lgb: 0.867267
[137]	lgbval's metric_lgb: 0.86826
[138]	lgbval's metric_lgb: 0.867482
[139]	lgbval's metric_lgb: 0.865861
[140]	lgbval's metric_lgb: 0.867424
[141]	lgbval's metric_lgb: 0.866022
[142]	lgbval's metric_lgb: 0.866068
[143]	lgbval's metric_lgb: 0.866055
[144]	lgbval's metric_lgb: 0.865865
[145]	lgbval's metric_lgb: 0.865877
[146]	lgbval's metric_lgb: 0.8

[137]	lgbval's metric_lgb: 0.877728
[138]	lgbval's metric_lgb: 0.877744
[139]	lgbval's metric_lgb: 0.877814
[140]	lgbval's metric_lgb: 0.879664
[141]	lgbval's metric_lgb: 0.880972
[142]	lgbval's metric_lgb: 0.878727
[143]	lgbval's metric_lgb: 0.878766
[144]	lgbval's metric_lgb: 0.879577
[145]	lgbval's metric_lgb: 0.87879
[146]	lgbval's metric_lgb: 0.877397
[147]	lgbval's metric_lgb: 0.879628
[148]	lgbval's metric_lgb: 0.87991
[149]	lgbval's metric_lgb: 0.878514
[150]	lgbval's metric_lgb: 0.879965
[151]	lgbval's metric_lgb: 0.878596
[152]	lgbval's metric_lgb: 0.880883
[153]	lgbval's metric_lgb: 0.87649
[154]	lgbval's metric_lgb: 0.877834
[155]	lgbval's metric_lgb: 0.879166
[156]	lgbval's metric_lgb: 0.877732
[157]	lgbval's metric_lgb: 0.878009
[158]	lgbval's metric_lgb: 0.877251
[159]	lgbval's metric_lgb: 0.877235
[160]	lgbval's metric_lgb: 0.875549
[161]	lgbval's metric_lgb: 0.876068
[162]	lgbval's metric_lgb: 0.877037
[163]	lgbval's metric_lgb: 0.876166
[164]	lgbval's metric_lgb: 0.87

4 fold, train 4000, val 999
[   0    1    2 ... 4996 4997 4998]
train with lgb model
[1]	lgbval's metric_lgb: 0.754137
Training until validation scores don't improve for 100 rounds.
[2]	lgbval's metric_lgb: 0.765559
[3]	lgbval's metric_lgb: 0.789902
[4]	lgbval's metric_lgb: 0.795443
[5]	lgbval's metric_lgb: 0.788442
[6]	lgbval's metric_lgb: 0.795443
[7]	lgbval's metric_lgb: 0.812771
[8]	lgbval's metric_lgb: 0.812604
[9]	lgbval's metric_lgb: 0.820217
[10]	lgbval's metric_lgb: 0.82135
[11]	lgbval's metric_lgb: 0.821958
[12]	lgbval's metric_lgb: 0.825267
[13]	lgbval's metric_lgb: 0.826755
[14]	lgbval's metric_lgb: 0.822435
[15]	lgbval's metric_lgb: 0.827037
[16]	lgbval's metric_lgb: 0.823561
[17]	lgbval's metric_lgb: 0.826931
[18]	lgbval's metric_lgb: 0.830885
[19]	lgbval's metric_lgb: 0.829066
[20]	lgbval's metric_lgb: 0.833427
[21]	lgbval's metric_lgb: 0.833079
[22]	lgbval's metric_lgb: 0.836406
[23]	lgbval's metric_lgb: 0.832844
[24]	lgbval's metric_lgb: 0.837437
[25]	lgbval's metric_l

[233]	lgbval's metric_lgb: 0.862876
[234]	lgbval's metric_lgb: 0.864648
[235]	lgbval's metric_lgb: 0.86533
[236]	lgbval's metric_lgb: 0.864356
[237]	lgbval's metric_lgb: 0.866297
[238]	lgbval's metric_lgb: 0.867809
[239]	lgbval's metric_lgb: 0.866945
[240]	lgbval's metric_lgb: 0.866857
[241]	lgbval's metric_lgb: 0.867571
[242]	lgbval's metric_lgb: 0.865972
[243]	lgbval's metric_lgb: 0.867579
[244]	lgbval's metric_lgb: 0.865789
[245]	lgbval's metric_lgb: 0.868902
[246]	lgbval's metric_lgb: 0.869072
[247]	lgbval's metric_lgb: 0.870777
[248]	lgbval's metric_lgb: 0.867583
[249]	lgbval's metric_lgb: 0.869037
[250]	lgbval's metric_lgb: 0.865948
[251]	lgbval's metric_lgb: 0.869211
[252]	lgbval's metric_lgb: 0.867561
[253]	lgbval's metric_lgb: 0.869115
[254]	lgbval's metric_lgb: 0.867487
[255]	lgbval's metric_lgb: 0.866591
[256]	lgbval's metric_lgb: 0.866609
[257]	lgbval's metric_lgb: 0.868061
[258]	lgbval's metric_lgb: 0.864829
[259]	lgbval's metric_lgb: 0.868161
[260]	lgbval's metric_lgb: 0.

In [104]:
b = [0.859271, 0.896313, 0.869245, 0.88362, 0.870777]
np.mean(b)

0.8758452

In [126]:
stacked_train = pd.DataFrame({'xgb_oof_train': xgb_oof_train, 'lgb_oof_train': lgb_oof_train})
stacked_test = pd.DataFrame({'xgb_oof_test': xgb_oof_test, 'lgb_oof_test': lgb_oof_test})
stacked_train_xgb = xgb.DMatrix(stacked_train, label = train_y)
stacked_test_xgb = xgb.DMatrix(stacked_test)

In [127]:
xgb.cv(xgb_paras, stacked_train_xgb, num_boost_round=1000, nfold=5,feval=metric_xgb, maximize=True, 
       early_stopping_rounds=100, verbose_eval=True)

[0]	train-error:0.0853674+0.00186051	train-metric_xgb:0.875052+0.0041461	test-error:0.0908202+0.00899854	test-metric_xgb:0.864215+0.0210812
[1]	train-error:0.0817168+0.00463026	train-metric_xgb:0.880946+0.00679439	test-error:0.0850192+0.00687196	test-metric_xgb:0.87224+0.0192909
[2]	train-error:0.079966+0.00259913	train-metric_xgb:0.883211+0.00442403	test-error:0.0862196+0.00886702	test-metric_xgb:0.870919+0.0208857
[3]	train-error:0.0802162+0.00176005	train-metric_xgb:0.883171+0.0034826	test-error:0.0866194+0.0081391	test-metric_xgb:0.87052+0.0192407
[4]	train-error:0.0800662+0.00161318	train-metric_xgb:0.883472+0.00384888	test-error:0.0862198+0.0099332	test-metric_xgb:0.871736+0.0209098
[5]	train-error:0.0809664+0.00165777	train-metric_xgb:0.882847+0.00364145	test-error:0.0858198+0.0103482	test-metric_xgb:0.872667+0.0217945
[6]	train-error:0.0799164+0.00202614	train-metric_xgb:0.884993+0.00271388	test-error:0.0862198+0.00930959	test-metric_xgb:0.872386+0.021417
[7]	train-error:0.0803

[59]	train-error:0.0778658+0.00257325	train-metric_xgb:0.890313+0.00479947	test-error:0.0848184+0.00851037	test-metric_xgb:0.875731+0.0168788
[60]	train-error:0.0778158+0.00273773	train-metric_xgb:0.890368+0.00491419	test-error:0.0850186+0.00868902	test-metric_xgb:0.87545+0.0173338
[61]	train-error:0.0775156+0.0027808	train-metric_xgb:0.890673+0.00499199	test-error:0.0846186+0.00825554	test-metric_xgb:0.875666+0.0172185
[62]	train-error:0.0774656+0.0027189	train-metric_xgb:0.890672+0.00502869	test-error:0.0846186+0.00825554	test-metric_xgb:0.87567+0.0171831
[63]	train-error:0.0774656+0.00267719	train-metric_xgb:0.890723+0.00494617	test-error:0.0850186+0.00838445	test-metric_xgb:0.875305+0.0171462
[64]	train-error:0.0777156+0.00287098	train-metric_xgb:0.890391+0.00518155	test-error:0.0848184+0.00819917	test-metric_xgb:0.875649+0.0166652
[65]	train-error:0.0776156+0.00267366	train-metric_xgb:0.8905+0.00497046	test-error:0.0848186+0.00817719	test-metric_xgb:0.875601+0.016928
[66]	train-er

Unnamed: 0,test-error-mean,test-error-std,test-metric_xgb-mean,test-metric_xgb-std,train-error-mean,train-error-std,train-metric_xgb-mean,train-metric_xgb-std
0,0.09082,0.008999,0.864215,0.021081,0.085367,0.001861,0.875052,0.004146
1,0.085019,0.006872,0.87224,0.019291,0.081717,0.00463,0.880946,0.006794
2,0.08622,0.008867,0.870919,0.020886,0.079966,0.002599,0.883211,0.004424
3,0.086619,0.008139,0.87052,0.019241,0.080216,0.00176,0.883171,0.003483
4,0.08622,0.009933,0.871736,0.02091,0.080066,0.001613,0.883472,0.003849
5,0.08582,0.010348,0.872667,0.021794,0.080966,0.001658,0.882847,0.003641
6,0.08622,0.00931,0.872386,0.021417,0.079916,0.002026,0.884993,0.002714
7,0.08482,0.009313,0.874213,0.021274,0.080316,0.001285,0.884678,0.002508
8,0.08602,0.009237,0.872801,0.02104,0.079866,0.002089,0.884972,0.0035
9,0.085419,0.009181,0.874406,0.019712,0.080016,0.002389,0.88529,0.004098
