In [1]:
import time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
more_features = False
use_staking = False
use_kernal_params = True
do_augment = True
fast = False

train_df = pd.read_csv('input/train' + ('_more_features' if more_features else '') + '.csv')
test_df = pd.read_csv('input/test'  + ('_more_features' if more_features else '') + '.csv')

do_lda = False

fix_data_skew = False

if fix_data_skew:
    trues = train_df.loc[train_df['target'] == 1]
    falses = train_df.loc[train_df['target'] != 1].sample(frac=1)[:len(trues)]
    train_df = pd.concat([trues, falses], ignore_index=True).sample(frac=1)
else:
    train_df = train_df
    
X_test = test_df.drop('ID_code',axis=1)
X = train_df.drop(['ID_code','target'],axis=1)
y = train_df['target']

In [3]:
if do_lda:    
    lda = LDA(solver='svd', n_components=5, store_covariance=True)
    X_lda = pd.DataFrame(lda.fit_transform(X, y))
    X_test_lda = pd.DataFrame(lda.transform(X_test))
    X["lda"] = X_lda
    X_test["lda"] = X_test_lda

In [4]:
_, X_bottomhalf, _, y_bottomhalf = train_test_split(X, y, test_size=0.8, random_state=10)

In [5]:
if use_staking:
    X = X_bottomhalf
    y = y_bottomhalf

In [6]:
n_splits = 5
folds = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [7]:
params = {
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'min_data_in_leaf': 2881,
    'max_depth': 0,
    'num_leaves': 3,
    'learning_rate': 0.01,
    'bagging_freq': 3,
    #'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 0.9),
    'feature_fraction': 0.8990901412442585,
    'bagging_seed': 11,
    'reg_alpha':  1.1173044727720816,
    'reg_lambda': 6.9285776442737514,
    'random_state': 42,
    'verbosity': -1,
    'subsample': 0.8054415526396443,
    'min_child_weight': 38.138072621096654,
    'num_threads': 4,
    'max_bin': 483
}

if use_kernal_params:
    # https://www.kaggle.com/jiweiliu/lgb-2-leaves-augment (lb: 0.901)
    params = {
        "objective" : "binary",
        "metric" : "auc",
        "boosting": 'gbdt',
        "max_depth" : -1,
        "num_leaves" : 13,
        "learning_rate" : 0.01,
        "bagging_freq": 5,
        "bagging_fraction" : 0.4,
        "feature_fraction" : 0.05,
        "min_data_in_leaf": 80,
        "min_sum_heassian_in_leaf": 10,
        "tree_learner": "serial",
        "boost_from_average": "false",
        #"lambda_l1" : 5,
        #"lambda_l2" : 5,
        "bagging_seed" : 13,
        "verbosity" : 1,
        "seed": 42
    }

In [8]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [9]:
oof = train_df[['ID_code', 'target']]
oof['predict'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [10]:
predictions = test_df[['ID_code']]
val_aucs = []

In [11]:
score = 0.0
prediction = np.zeros(len(X_test))
for fold_n, (train_index, valid_index) in enumerate(folds.split(X,y)):
    print('Fold', fold_n, 'started at', time.ctime())
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    N = 10
    
    if not do_augment:
        N = 1
    
    p_valid,yp = 0,0
    for i in range(N):
        if do_augment:
            X_t, y_t = augment(X_train.values, y_train.values)
            X_t = pd.DataFrame(X_t)
            X_t = X_t.add_prefix('var_')

            trn_data = lgb.Dataset(X_t, label=y_t)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        else:
            trn_data = lgb.Dataset(X_train, label=y_train)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        
        evals_result = {}
        
        early_stopping_rounds = 800 if fast else 3000
        
        lgb_clf = lgb.train(params,
                        trn_data,
                        num_boost_round=2000000,
                        valid_sets = [trn_data, val_data],
                        early_stopping_rounds=early_stopping_rounds,
                        verbose_eval = 300,
                        evals_result=evals_result
                       )
        p_valid += lgb_clf.predict(X_valid)
        yp += lgb_clf.predict(X_test)
    
    oof['predict'][valid_index] = p_valid/N
    val_score = roc_auc_score(y_valid, p_valid)
    val_aucs.append(val_score)
    
    predictions['fold{}'.format(fold_n+1)] = yp/N

Fold 0 started at Mon Mar 25 14:05:06 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.876072	valid_1's auc: 0.866644
[600]	training's auc: 0.887225	valid_1's auc: 0.876606
[900]	training's auc: 0.892283	valid_1's auc: 0.881255
[1200]	training's auc: 0.895171	valid_1's auc: 0.883434
[1500]	training's auc: 0.897984	valid_1's auc: 0.885709
[1800]	training's auc: 0.900425	valid_1's auc: 0.887589
[2100]	training's auc: 0.902616	valid_1's auc: 0.889123
[2400]	training's auc: 0.904606	valid_1's auc: 0.89065
[2700]	training's auc: 0.906604	valid_1's auc: 0.892006
[3000]	training's auc: 0.908225	valid_1's auc: 0.893087
[3300]	training's auc: 0.9098	valid_1's auc: 0.894103
[3600]	training's auc: 0.911222	valid_1's auc: 0.894984
[3900]	training's auc: 0.91263	valid_1's auc: 0.895768
[4200]	training's auc: 0.913892	valid_1's auc: 0.896522
[4500]	training's auc: 0.915051	valid_1's auc: 0.89707
[4800]	training's auc: 0.916123	valid_1's auc: 0.897536
[5100

[1800]	training's auc: 0.900137	valid_1's auc: 0.88738
[2100]	training's auc: 0.902329	valid_1's auc: 0.889159
[2400]	training's auc: 0.904196	valid_1's auc: 0.890594
[2700]	training's auc: 0.90623	valid_1's auc: 0.89189
[3000]	training's auc: 0.907795	valid_1's auc: 0.892988
[3300]	training's auc: 0.909311	valid_1's auc: 0.894031
[3600]	training's auc: 0.910711	valid_1's auc: 0.894913
[3900]	training's auc: 0.912048	valid_1's auc: 0.895762
[4200]	training's auc: 0.913302	valid_1's auc: 0.896404
[4500]	training's auc: 0.914429	valid_1's auc: 0.897002
[4800]	training's auc: 0.915521	valid_1's auc: 0.897534
[5100]	training's auc: 0.91653	valid_1's auc: 0.898074
[5400]	training's auc: 0.917472	valid_1's auc: 0.898416
[5700]	training's auc: 0.918367	valid_1's auc: 0.898731
[6000]	training's auc: 0.919232	valid_1's auc: 0.898939
[6300]	training's auc: 0.920035	valid_1's auc: 0.899207
[6600]	training's auc: 0.920811	valid_1's auc: 0.899354
[6900]	training's auc: 0.921567	valid_1's auc: 0.899

[1500]	training's auc: 0.898011	valid_1's auc: 0.885829
[1800]	training's auc: 0.900485	valid_1's auc: 0.887759
[2100]	training's auc: 0.902765	valid_1's auc: 0.889569
[2400]	training's auc: 0.904664	valid_1's auc: 0.891041
[2700]	training's auc: 0.906683	valid_1's auc: 0.892378
[3000]	training's auc: 0.908323	valid_1's auc: 0.893384
[3300]	training's auc: 0.909889	valid_1's auc: 0.894319
[3600]	training's auc: 0.911255	valid_1's auc: 0.895064
[3900]	training's auc: 0.912666	valid_1's auc: 0.895873
[4200]	training's auc: 0.913929	valid_1's auc: 0.896603
[4500]	training's auc: 0.915109	valid_1's auc: 0.897152
[4800]	training's auc: 0.916186	valid_1's auc: 0.89765
[5100]	training's auc: 0.917202	valid_1's auc: 0.898164
[5400]	training's auc: 0.918147	valid_1's auc: 0.898548
[5700]	training's auc: 0.919023	valid_1's auc: 0.89882
[6000]	training's auc: 0.91986	valid_1's auc: 0.899014
[6300]	training's auc: 0.920667	valid_1's auc: 0.899314
[6600]	training's auc: 0.921439	valid_1's auc: 0.89

Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.877017	valid_1's auc: 0.866419
[600]	training's auc: 0.887995	valid_1's auc: 0.876139
[900]	training's auc: 0.89298	valid_1's auc: 0.88082
[1200]	training's auc: 0.895892	valid_1's auc: 0.883324
[1500]	training's auc: 0.898571	valid_1's auc: 0.885666
[1800]	training's auc: 0.901026	valid_1's auc: 0.88767
[2100]	training's auc: 0.903262	valid_1's auc: 0.889528
[2400]	training's auc: 0.905187	valid_1's auc: 0.89101
[2700]	training's auc: 0.907154	valid_1's auc: 0.892319
[3000]	training's auc: 0.908804	valid_1's auc: 0.893554
[3300]	training's auc: 0.910323	valid_1's auc: 0.894423
[3600]	training's auc: 0.911749	valid_1's auc: 0.895272
[3900]	training's auc: 0.913107	valid_1's auc: 0.896015
[4200]	training's auc: 0.914349	valid_1's auc: 0.896766
[4500]	training's auc: 0.915475	valid_1's auc: 0.897304
[4800]	training's auc: 0.916566	valid_1's auc: 0.897803
[5100]	training's auc: 0.917581	valid_1's auc: 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 1 started at Mon Mar 25 15:08:10 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.875625	valid_1's auc: 0.866726
[600]	training's auc: 0.886908	valid_1's auc: 0.877952
[900]	training's auc: 0.891907	valid_1's auc: 0.883005
[1200]	training's auc: 0.894941	valid_1's auc: 0.885441
[1500]	training's auc: 0.897801	valid_1's auc: 0.887627
[1800]	training's auc: 0.900363	valid_1's auc: 0.889656
[2100]	training's auc: 0.902603	valid_1's auc: 0.891236
[2400]	training's auc: 0.904488	valid_1's auc: 0.892387
[2700]	training's auc: 0.906525	valid_1's auc: 0.89373
[3000]	training's auc: 0.90814	valid_1's auc: 0.894704
[3300]	training's auc: 0.909691	valid_1's auc: 0.895741
[3600]	training's auc: 0.911065	valid_1's auc: 0.896432
[3900]	training's auc: 0.912471	valid_1's auc: 0.897202
[4200]	training's auc: 0.913726	valid_1's auc: 0.897802
[4500]	training's auc: 0.91487	valid_1's auc: 0.898399
[4800]	training's auc: 0.915945	valid_1's auc: 0.898855
[51

[2100]	training's auc: 0.902731	valid_1's auc: 0.891021
[2400]	training's auc: 0.904626	valid_1's auc: 0.89227
[2700]	training's auc: 0.906602	valid_1's auc: 0.893615
[3000]	training's auc: 0.908216	valid_1's auc: 0.894564
[3300]	training's auc: 0.909799	valid_1's auc: 0.895558
[3600]	training's auc: 0.911162	valid_1's auc: 0.896298
[3900]	training's auc: 0.912564	valid_1's auc: 0.897201
[4200]	training's auc: 0.913825	valid_1's auc: 0.897727
[4500]	training's auc: 0.914945	valid_1's auc: 0.898293
[4800]	training's auc: 0.916024	valid_1's auc: 0.898851
[5100]	training's auc: 0.917038	valid_1's auc: 0.899252
[5400]	training's auc: 0.917943	valid_1's auc: 0.899573
[5700]	training's auc: 0.918846	valid_1's auc: 0.899747
[6000]	training's auc: 0.919687	valid_1's auc: 0.899939
[6300]	training's auc: 0.920484	valid_1's auc: 0.900139
[6600]	training's auc: 0.921254	valid_1's auc: 0.900303
[6900]	training's auc: 0.922028	valid_1's auc: 0.900518
[7200]	training's auc: 0.922761	valid_1's auc: 0.

[2700]	training's auc: 0.907102	valid_1's auc: 0.893231
[3000]	training's auc: 0.908693	valid_1's auc: 0.894069
[3300]	training's auc: 0.910247	valid_1's auc: 0.895093
[3600]	training's auc: 0.911665	valid_1's auc: 0.8958
[3900]	training's auc: 0.913046	valid_1's auc: 0.89675
[4200]	training's auc: 0.914307	valid_1's auc: 0.897304
[4500]	training's auc: 0.915477	valid_1's auc: 0.897824
[4800]	training's auc: 0.916546	valid_1's auc: 0.898277
[5100]	training's auc: 0.917575	valid_1's auc: 0.898648
[5400]	training's auc: 0.918504	valid_1's auc: 0.899008
[5700]	training's auc: 0.919369	valid_1's auc: 0.899316
[6000]	training's auc: 0.92019	valid_1's auc: 0.899482
[6300]	training's auc: 0.921011	valid_1's auc: 0.899703
[6600]	training's auc: 0.921766	valid_1's auc: 0.899884
[6900]	training's auc: 0.92254	valid_1's auc: 0.900002
[7200]	training's auc: 0.923283	valid_1's auc: 0.900073
[7500]	training's auc: 0.923987	valid_1's auc: 0.900185
[7800]	training's auc: 0.92469	valid_1's auc: 0.90028

[900]	training's auc: 0.891447	valid_1's auc: 0.883417
[1200]	training's auc: 0.894479	valid_1's auc: 0.885595
[1500]	training's auc: 0.897259	valid_1's auc: 0.887845
[1800]	training's auc: 0.899748	valid_1's auc: 0.889738
[2100]	training's auc: 0.902044	valid_1's auc: 0.891322
[2400]	training's auc: 0.904005	valid_1's auc: 0.892609
[2700]	training's auc: 0.906042	valid_1's auc: 0.893963
[3000]	training's auc: 0.907668	valid_1's auc: 0.89482
[3300]	training's auc: 0.909188	valid_1's auc: 0.895663
[3600]	training's auc: 0.910587	valid_1's auc: 0.896433
[3900]	training's auc: 0.911986	valid_1's auc: 0.897258
[4200]	training's auc: 0.913249	valid_1's auc: 0.897783
[4500]	training's auc: 0.914414	valid_1's auc: 0.898329
[4800]	training's auc: 0.915493	valid_1's auc: 0.898843
[5100]	training's auc: 0.91653	valid_1's auc: 0.899243
[5400]	training's auc: 0.917444	valid_1's auc: 0.899552
[5700]	training's auc: 0.918341	valid_1's auc: 0.899812
[6000]	training's auc: 0.919194	valid_1's auc: 0.90

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2 started at Mon Mar 25 16:15:53 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.874881	valid_1's auc: 0.870685
[600]	training's auc: 0.885671	valid_1's auc: 0.88179
[900]	training's auc: 0.890644	valid_1's auc: 0.885898
[1200]	training's auc: 0.893481	valid_1's auc: 0.887994
[1500]	training's auc: 0.896198	valid_1's auc: 0.890748
[1800]	training's auc: 0.898655	valid_1's auc: 0.892666
[2100]	training's auc: 0.900878	valid_1's auc: 0.894535
[2400]	training's auc: 0.902877	valid_1's auc: 0.89617
[2700]	training's auc: 0.904824	valid_1's auc: 0.897656
[3000]	training's auc: 0.906498	valid_1's auc: 0.898771
[3300]	training's auc: 0.908016	valid_1's auc: 0.899891
[3600]	training's auc: 0.909462	valid_1's auc: 0.900974
[3900]	training's auc: 0.910879	valid_1's auc: 0.901855
[4200]	training's auc: 0.912138	valid_1's auc: 0.902646
[4500]	training's auc: 0.91329	valid_1's auc: 0.903268
[4800]	training's auc: 0.914375	valid_1's auc: 0.903733
[51

[14400]	training's auc: 0.936799	valid_1's auc: 0.906773
[14700]	training's auc: 0.937386	valid_1's auc: 0.906767
[15000]	training's auc: 0.93796	valid_1's auc: 0.906804
[15300]	training's auc: 0.938541	valid_1's auc: 0.906783
[15600]	training's auc: 0.939117	valid_1's auc: 0.906729
[15900]	training's auc: 0.939695	valid_1's auc: 0.906673
[16200]	training's auc: 0.940245	valid_1's auc: 0.906726
[16500]	training's auc: 0.940791	valid_1's auc: 0.90674
Early stopping, best iteration is:
[13548]	training's auc: 0.935127	valid_1's auc: 0.906859
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.875193	valid_1's auc: 0.870375
[600]	training's auc: 0.886411	valid_1's auc: 0.881825
[900]	training's auc: 0.891169	valid_1's auc: 0.8859
[1200]	training's auc: 0.894217	valid_1's auc: 0.888212
[1500]	training's auc: 0.896971	valid_1's auc: 0.890958
[1800]	training's auc: 0.899447	valid_1's auc: 0.893006
[2100]	training's auc: 0.901693	valid_1's auc: 0.894765
[24

[8100]	training's auc: 0.923853	valid_1's auc: 0.906352
[8400]	training's auc: 0.924548	valid_1's auc: 0.906478
[8700]	training's auc: 0.925231	valid_1's auc: 0.906532
[9000]	training's auc: 0.925896	valid_1's auc: 0.906574
[9300]	training's auc: 0.92658	valid_1's auc: 0.906582
[9600]	training's auc: 0.92726	valid_1's auc: 0.906607
[9900]	training's auc: 0.927919	valid_1's auc: 0.906636
[10200]	training's auc: 0.92855	valid_1's auc: 0.906579
[10500]	training's auc: 0.929194	valid_1's auc: 0.906571
[10800]	training's auc: 0.92983	valid_1's auc: 0.906575
[11100]	training's auc: 0.930475	valid_1's auc: 0.906542
[11400]	training's auc: 0.931093	valid_1's auc: 0.906574
[11700]	training's auc: 0.9317	valid_1's auc: 0.90654
[12000]	training's auc: 0.932306	valid_1's auc: 0.906555
[12300]	training's auc: 0.932922	valid_1's auc: 0.906554
[12600]	training's auc: 0.933519	valid_1's auc: 0.906548
[12900]	training's auc: 0.934127	valid_1's auc: 0.906544
Early stopping, best iteration is:
[9940]	tra

[10200]	training's auc: 0.928126	valid_1's auc: 0.907249
[10500]	training's auc: 0.928781	valid_1's auc: 0.907271
[10800]	training's auc: 0.929427	valid_1's auc: 0.907266
[11100]	training's auc: 0.930059	valid_1's auc: 0.907246
[11400]	training's auc: 0.930694	valid_1's auc: 0.907246
[11700]	training's auc: 0.931302	valid_1's auc: 0.907274
[12000]	training's auc: 0.931932	valid_1's auc: 0.907242
[12300]	training's auc: 0.932533	valid_1's auc: 0.907241
[12600]	training's auc: 0.933141	valid_1's auc: 0.90727
[12900]	training's auc: 0.933746	valid_1's auc: 0.907305
[13200]	training's auc: 0.934345	valid_1's auc: 0.90728
[13500]	training's auc: 0.934948	valid_1's auc: 0.907262
[13800]	training's auc: 0.935547	valid_1's auc: 0.907252
[14100]	training's auc: 0.936137	valid_1's auc: 0.907187
[14400]	training's auc: 0.936722	valid_1's auc: 0.9072
[14700]	training's auc: 0.93731	valid_1's auc: 0.9072
[15000]	training's auc: 0.937902	valid_1's auc: 0.907173
[15300]	training's auc: 0.938481	valid

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3 started at Mon Mar 25 17:21:00 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.876249	valid_1's auc: 0.862151
[600]	training's auc: 0.887412	valid_1's auc: 0.874504
[900]	training's auc: 0.892133	valid_1's auc: 0.878783
[1200]	training's auc: 0.894993	valid_1's auc: 0.881201
[1500]	training's auc: 0.897769	valid_1's auc: 0.883276
[1800]	training's auc: 0.900208	valid_1's auc: 0.885588
[2100]	training's auc: 0.902451	valid_1's auc: 0.887355
[2400]	training's auc: 0.904414	valid_1's auc: 0.888735
[2700]	training's auc: 0.906372	valid_1's auc: 0.89016
[3000]	training's auc: 0.90799	valid_1's auc: 0.891429
[3300]	training's auc: 0.909514	valid_1's auc: 0.892533
[3600]	training's auc: 0.910928	valid_1's auc: 0.893458
[3900]	training's auc: 0.91231	valid_1's auc: 0.894373
[4200]	training's auc: 0.913575	valid_1's auc: 0.895164
[4500]	training's auc: 0.914689	valid_1's auc: 0.895805
[4800]	training's auc: 0.915792	valid_1's auc: 0.896404
[51

[12000]	training's auc: 0.93258	valid_1's auc: 0.899786
[12300]	training's auc: 0.933197	valid_1's auc: 0.899846
[12600]	training's auc: 0.933832	valid_1's auc: 0.899864
[12900]	training's auc: 0.934427	valid_1's auc: 0.899906
[13200]	training's auc: 0.935025	valid_1's auc: 0.899908
[13500]	training's auc: 0.935616	valid_1's auc: 0.899912
[13800]	training's auc: 0.936216	valid_1's auc: 0.899865
[14100]	training's auc: 0.936804	valid_1's auc: 0.899828
[14400]	training's auc: 0.937406	valid_1's auc: 0.899793
[14700]	training's auc: 0.937997	valid_1's auc: 0.899746
[15000]	training's auc: 0.938582	valid_1's auc: 0.899772
[15300]	training's auc: 0.939151	valid_1's auc: 0.89977
[15600]	training's auc: 0.939718	valid_1's auc: 0.899765
[15900]	training's auc: 0.940303	valid_1's auc: 0.899702
[16200]	training's auc: 0.940861	valid_1's auc: 0.899637
Early stopping, best iteration is:
[13454]	training's auc: 0.935531	valid_1's auc: 0.899928
Training until validation scores don't improve for 3000

[8400]	training's auc: 0.925812	valid_1's auc: 0.899084
[8700]	training's auc: 0.926466	valid_1's auc: 0.899181
[9000]	training's auc: 0.927129	valid_1's auc: 0.899266
[9300]	training's auc: 0.92778	valid_1's auc: 0.899325
[9600]	training's auc: 0.928418	valid_1's auc: 0.899329
[9900]	training's auc: 0.929042	valid_1's auc: 0.899331
[10200]	training's auc: 0.929673	valid_1's auc: 0.899358
[10500]	training's auc: 0.930302	valid_1's auc: 0.899389
[10800]	training's auc: 0.930935	valid_1's auc: 0.899435
[11100]	training's auc: 0.931564	valid_1's auc: 0.899423
[11400]	training's auc: 0.932183	valid_1's auc: 0.899449
[11700]	training's auc: 0.93279	valid_1's auc: 0.899441
[12000]	training's auc: 0.933385	valid_1's auc: 0.89941
[12300]	training's auc: 0.933984	valid_1's auc: 0.899422
[12600]	training's auc: 0.934592	valid_1's auc: 0.899479
[12900]	training's auc: 0.935187	valid_1's auc: 0.899495
[13200]	training's auc: 0.935785	valid_1's auc: 0.899491
[13500]	training's auc: 0.936376	valid_1

[5700]	training's auc: 0.918759	valid_1's auc: 0.897669
[6000]	training's auc: 0.919591	valid_1's auc: 0.897912
[6300]	training's auc: 0.920382	valid_1's auc: 0.898218
[6600]	training's auc: 0.921165	valid_1's auc: 0.898509
[6900]	training's auc: 0.921937	valid_1's auc: 0.898763
[7200]	training's auc: 0.92269	valid_1's auc: 0.898966
[7500]	training's auc: 0.923398	valid_1's auc: 0.899048
[7800]	training's auc: 0.924086	valid_1's auc: 0.899136
[8100]	training's auc: 0.924787	valid_1's auc: 0.899266
[8400]	training's auc: 0.925486	valid_1's auc: 0.899408
[8700]	training's auc: 0.926142	valid_1's auc: 0.899517
[9000]	training's auc: 0.926793	valid_1's auc: 0.899543
[9300]	training's auc: 0.927458	valid_1's auc: 0.899584
[9600]	training's auc: 0.928119	valid_1's auc: 0.899577
[9900]	training's auc: 0.928766	valid_1's auc: 0.899628
[10200]	training's auc: 0.92941	valid_1's auc: 0.899632
[10500]	training's auc: 0.930023	valid_1's auc: 0.899611
[10800]	training's auc: 0.930647	valid_1's auc: 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 4 started at Mon Mar 25 18:28:23 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.877084	valid_1's auc: 0.864334
[600]	training's auc: 0.887713	valid_1's auc: 0.87265
[900]	training's auc: 0.892696	valid_1's auc: 0.876459
[1200]	training's auc: 0.895619	valid_1's auc: 0.879203
[1500]	training's auc: 0.898295	valid_1's auc: 0.881336
[1800]	training's auc: 0.900736	valid_1's auc: 0.883213
[2100]	training's auc: 0.902946	valid_1's auc: 0.884951
[2400]	training's auc: 0.904765	valid_1's auc: 0.886343
[2700]	training's auc: 0.906713	valid_1's auc: 0.887877
[3000]	training's auc: 0.908389	valid_1's auc: 0.889199
[3300]	training's auc: 0.909934	valid_1's auc: 0.89034
[3600]	training's auc: 0.911328	valid_1's auc: 0.891304
[3900]	training's auc: 0.912689	valid_1's auc: 0.892219
[4200]	training's auc: 0.913954	valid_1's auc: 0.893086
[4500]	training's auc: 0.915049	valid_1's auc: 0.893713
[4800]	training's auc: 0.916141	valid_1's auc: 0.894306
[5

Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.875935	valid_1's auc: 0.86313
[600]	training's auc: 0.887355	valid_1's auc: 0.871738
[900]	training's auc: 0.892228	valid_1's auc: 0.876011
[1200]	training's auc: 0.895167	valid_1's auc: 0.879012
[1500]	training's auc: 0.897897	valid_1's auc: 0.881061
[1800]	training's auc: 0.900363	valid_1's auc: 0.883213
[2100]	training's auc: 0.902629	valid_1's auc: 0.88511
[2400]	training's auc: 0.904594	valid_1's auc: 0.886578
[2700]	training's auc: 0.906517	valid_1's auc: 0.888178
[3000]	training's auc: 0.908208	valid_1's auc: 0.889514
[3300]	training's auc: 0.90978	valid_1's auc: 0.890659
[3600]	training's auc: 0.911214	valid_1's auc: 0.891632
[3900]	training's auc: 0.912611	valid_1's auc: 0.892495
[4200]	training's auc: 0.913891	valid_1's auc: 0.893305
[4500]	training's auc: 0.915064	valid_1's auc: 0.893874
[4800]	training's auc: 0.916161	valid_1's auc: 0.894405
[5100]	training's auc: 0.917182	valid_1's auc:

[1500]	training's auc: 0.898396	valid_1's auc: 0.881525
[1800]	training's auc: 0.900814	valid_1's auc: 0.883412
[2100]	training's auc: 0.903024	valid_1's auc: 0.885098
[2400]	training's auc: 0.904922	valid_1's auc: 0.886488
[2700]	training's auc: 0.906816	valid_1's auc: 0.88796
[3000]	training's auc: 0.908502	valid_1's auc: 0.889326
[3300]	training's auc: 0.910066	valid_1's auc: 0.89047
[3600]	training's auc: 0.911502	valid_1's auc: 0.891352
[3900]	training's auc: 0.912866	valid_1's auc: 0.892256
[4200]	training's auc: 0.914108	valid_1's auc: 0.893034
[4500]	training's auc: 0.915213	valid_1's auc: 0.893595
[4800]	training's auc: 0.916313	valid_1's auc: 0.89415
[5100]	training's auc: 0.917354	valid_1's auc: 0.89476
[5400]	training's auc: 0.918302	valid_1's auc: 0.895245
[5700]	training's auc: 0.919174	valid_1's auc: 0.895569
[6000]	training's auc: 0.920038	valid_1's auc: 0.895821
[6300]	training's auc: 0.920863	valid_1's auc: 0.89608
[6600]	training's auc: 0.921642	valid_1's auc: 0.8962

[2100]	training's auc: 0.903015	valid_1's auc: 0.885015
[2400]	training's auc: 0.904868	valid_1's auc: 0.886469
[2700]	training's auc: 0.906756	valid_1's auc: 0.887959
[3000]	training's auc: 0.908387	valid_1's auc: 0.889346
[3300]	training's auc: 0.909939	valid_1's auc: 0.890396
[3600]	training's auc: 0.911343	valid_1's auc: 0.891443
[3900]	training's auc: 0.912699	valid_1's auc: 0.892399
[4200]	training's auc: 0.913977	valid_1's auc: 0.893175
[4500]	training's auc: 0.915088	valid_1's auc: 0.893749
[4800]	training's auc: 0.916187	valid_1's auc: 0.894378
[5100]	training's auc: 0.917209	valid_1's auc: 0.894868
[5400]	training's auc: 0.91815	valid_1's auc: 0.895241
[5700]	training's auc: 0.919024	valid_1's auc: 0.895631
[6000]	training's auc: 0.919887	valid_1's auc: 0.895904
[6300]	training's auc: 0.920693	valid_1's auc: 0.896084
[6600]	training's auc: 0.921489	valid_1's auc: 0.896355
[6900]	training's auc: 0.922281	valid_1's auc: 0.896615
[7200]	training's auc: 0.923023	valid_1's auc: 0.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [12]:
mean_auc = np.mean(val_aucs)
std_auc = np.std(val_aucs)
all_auc = roc_auc_score(oof['target'], oof['predict'])
print("Mean auc: %.9f, std: %.9f. All auc: %.9f." % (mean_auc, std_auc, all_auc))

Mean auc: 0.901727458, std: 0.003096157. All auc: 0.901709296.


In [13]:
# submission
predictions['target'] = np.mean(predictions[[col for col in predictions.columns if col not in ['ID_code', 'target']]].values, axis=1)
predictions.to_csv('lgb_all_predictions.csv', index=None)
sub_df = pd.DataFrame({"ID_code":test_df["ID_code"].values})
sub_df["target"] = predictions['target']
sub_df.to_csv("lgb_submission.csv", index=False)
oof.to_csv('lgb_oof.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
