In [1]:
import time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
more_features = False
use_staking = False
use_kernal_params = False
do_augment = True
fast = False

train_df = pd.read_csv('input/train' + ('_more_features' if more_features else '') + '.csv')
test_df = pd.read_csv('input/test'  + ('_more_features' if more_features else '') + '.csv')

do_lda = False

fix_data_skew = False

if fix_data_skew:
    trues = train_df.loc[train_df['target'] == 1]
    falses = train_df.loc[train_df['target'] != 1].sample(frac=1)[:len(trues)]
    train_df = pd.concat([trues, falses], ignore_index=True).sample(frac=1)
else:
    train_df = train_df
    
X_test = test_df.drop('ID_code',axis=1)
X = train_df.drop(['ID_code','target'],axis=1)
y = train_df['target']

In [3]:
if do_lda:    
    lda = LDA(solver='svd', n_components=5, store_covariance=True)
    X_lda = pd.DataFrame(lda.fit_transform(X, y))
    X_test_lda = pd.DataFrame(lda.transform(X_test))
    X["lda"] = X_lda
    X_test["lda"] = X_test_lda

In [4]:
_, X_bottomhalf, _, y_bottomhalf = train_test_split(X, y, test_size=0.8, random_state=10)

In [5]:
if use_staking:
    X = X_bottomhalf
    y = y_bottomhalf

In [6]:
n_splits = 5
folds = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [7]:
params = {
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'min_data_in_leaf': 2881,
    'max_depth': 0,
    'num_leaves': 3,
    'learning_rate': 0.01,
    'bagging_freq': 3,
    #'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 0.9),
    'feature_fraction': 0.8990901412442585,
    'bagging_seed': 11,
    'reg_alpha':  1.1173044727720816,
    'reg_lambda': 6.9285776442737514,
    'random_state': 42,
    'verbosity': -1,
    'subsample': 0.8054415526396443,
    'min_child_weight': 38.138072621096654,
    'num_threads': 4,
    'max_bin': 483
}

if use_kernal_params:
    # https://www.kaggle.com/jiweiliu/lgb-2-leaves-augment (lb: 0.901)
    params = {
        "objective" : "binary",
        "metric" : "auc",
        "boosting": 'gbdt',
        "max_depth" : -1,
        "num_leaves" : 13,
        "learning_rate" : 0.01,
        "bagging_freq": 5,
        "bagging_fraction" : 0.4,
        "feature_fraction" : 0.05,
        "min_data_in_leaf": 80,
        "min_sum_heassian_in_leaf": 10,
        "tree_learner": "serial",
        "boost_from_average": "false",
        #"lambda_l1" : 5,
        #"lambda_l2" : 5,
        "bagging_seed" : 13,
        "verbosity" : 1,
        "seed": 42
    }

In [8]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [9]:
oof = train_df[['ID_code', 'target']]
oof['predict'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [10]:
predictions = test_df[['ID_code']]
val_aucs = []

In [11]:
score = 0.0
prediction = np.zeros(len(X_test))
for fold_n, (train_index, valid_index) in enumerate(folds.split(X,y)):
    print('Fold', fold_n, 'started at', time.ctime())
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    N = 5
    
    if not do_augment:
        N = 1
    
    p_valid,yp = 0,0
    for i in range(N):
        if do_augment:
            X_t, y_t = augment(X_train.values, y_train.values)
            X_t = pd.DataFrame(X_t)
            X_t = X_t.add_prefix('var_')

            trn_data = lgb.Dataset(X_t, label=y_t)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        else:
            trn_data = lgb.Dataset(X_train, label=y_train)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        
        evals_result = {}
        
        early_stopping_rounds = 800 if fast else 3000
        
        lgb_clf = lgb.train(params,
                        trn_data,
                        num_boost_round=2000000,
                        valid_sets = [trn_data, val_data],
                        early_stopping_rounds=early_stopping_rounds,
                        verbose_eval = 300,
                        evals_result=evals_result
                       )
        p_valid += lgb_clf.predict(X_valid)
        yp += lgb_clf.predict(X_test)
    
    oof['predict'][valid_index] = p_valid/N
    val_score = roc_auc_score(y_valid, p_valid)
    val_aucs.append(val_score)
    
    predictions['fold{}'.format(fold_n+1)] = yp/N

Fold 0 started at Thu Mar 21 08:22:28 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.762861	valid_1's auc: 0.754327
[600]	training's auc: 0.800331	valid_1's auc: 0.793664
[900]	training's auc: 0.821566	valid_1's auc: 0.814712
[1200]	training's auc: 0.835953	valid_1's auc: 0.828902
[1500]	training's auc: 0.84586	valid_1's auc: 0.838316
[1800]	training's auc: 0.8539	valid_1's auc: 0.845865
[2100]	training's auc: 0.860567	valid_1's auc: 0.852207
[2400]	training's auc: 0.865914	valid_1's auc: 0.857406
[2700]	training's auc: 0.870586	valid_1's auc: 0.86169
[3000]	training's auc: 0.874406	valid_1's auc: 0.86521
[3300]	training's auc: 0.877704	valid_1's auc: 0.868278
[3600]	training's auc: 0.880643	valid_1's auc: 0.870893
[3900]	training's auc: 0.883327	valid_1's auc: 0.873284
[4200]	training's auc: 0.8856	valid_1's auc: 0.875346
[4500]	training's auc: 0.887653	valid_1's auc: 0.877173
[4800]	training's auc: 0.889475	valid_1's auc: 0.878789
[5100]	

[8700]	training's auc: 0.902674	valid_1's auc: 0.890477
[9000]	training's auc: 0.903334	valid_1's auc: 0.891037
[9300]	training's auc: 0.903934	valid_1's auc: 0.891504
[9600]	training's auc: 0.904502	valid_1's auc: 0.891995
[9900]	training's auc: 0.90501	valid_1's auc: 0.892419
[10200]	training's auc: 0.905554	valid_1's auc: 0.892875
[10500]	training's auc: 0.905985	valid_1's auc: 0.893232
[10800]	training's auc: 0.906442	valid_1's auc: 0.893683
[11100]	training's auc: 0.906897	valid_1's auc: 0.894025
[11400]	training's auc: 0.907306	valid_1's auc: 0.894356
[11700]	training's auc: 0.907688	valid_1's auc: 0.894688
[12000]	training's auc: 0.908057	valid_1's auc: 0.894989
[12300]	training's auc: 0.908424	valid_1's auc: 0.895325
[12600]	training's auc: 0.908765	valid_1's auc: 0.895601
[12900]	training's auc: 0.909072	valid_1's auc: 0.895861
[13200]	training's auc: 0.909345	valid_1's auc: 0.89609
[13500]	training's auc: 0.909651	valid_1's auc: 0.896316
[13800]	training's auc: 0.909932	valid

[19800]	training's auc: 0.913694	valid_1's auc: 0.899329
[20100]	training's auc: 0.913827	valid_1's auc: 0.899409
[20400]	training's auc: 0.913958	valid_1's auc: 0.899508
[20700]	training's auc: 0.91408	valid_1's auc: 0.899559
[21000]	training's auc: 0.914201	valid_1's auc: 0.899614
[21300]	training's auc: 0.914326	valid_1's auc: 0.899671
[21600]	training's auc: 0.914441	valid_1's auc: 0.899714
[21900]	training's auc: 0.914565	valid_1's auc: 0.899791
[22200]	training's auc: 0.914687	valid_1's auc: 0.89987
[22500]	training's auc: 0.914804	valid_1's auc: 0.899915
[22800]	training's auc: 0.914916	valid_1's auc: 0.899939
[23100]	training's auc: 0.915017	valid_1's auc: 0.89996
[23400]	training's auc: 0.915122	valid_1's auc: 0.90001
[23700]	training's auc: 0.915231	valid_1's auc: 0.900052
[24000]	training's auc: 0.91533	valid_1's auc: 0.900048
[24300]	training's auc: 0.915428	valid_1's auc: 0.900084
[24600]	training's auc: 0.91554	valid_1's auc: 0.900134
[24900]	training's auc: 0.915637	vali

[30000]	training's auc: 0.917709	valid_1's auc: 0.899727
[30300]	training's auc: 0.917821	valid_1's auc: 0.899694
[30600]	training's auc: 0.917921	valid_1's auc: 0.899697
[30900]	training's auc: 0.918014	valid_1's auc: 0.899692
[31200]	training's auc: 0.918114	valid_1's auc: 0.899717
[31500]	training's auc: 0.918211	valid_1's auc: 0.899689
[31800]	training's auc: 0.918307	valid_1's auc: 0.899702
[32100]	training's auc: 0.918417	valid_1's auc: 0.899705
[32400]	training's auc: 0.918517	valid_1's auc: 0.899691
[32700]	training's auc: 0.91862	valid_1's auc: 0.899696
Early stopping, best iteration is:
[29964]	training's auc: 0.917696	valid_1's auc: 0.89973
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.761398	valid_1's auc: 0.753519
[600]	training's auc: 0.799785	valid_1's auc: 0.793776
[900]	training's auc: 0.82093	valid_1's auc: 0.814884
[1200]	training's auc: 0.834265	valid_1's auc: 0.828198
[1500]	training's auc: 0.844633	valid_1's auc: 0.838334


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 1 started at Thu Mar 21 10:37:44 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.76179	valid_1's auc: 0.757995
[600]	training's auc: 0.800996	valid_1's auc: 0.796414
[900]	training's auc: 0.820842	valid_1's auc: 0.815685
[1200]	training's auc: 0.834943	valid_1's auc: 0.829593
[1500]	training's auc: 0.845351	valid_1's auc: 0.83991
[1800]	training's auc: 0.853702	valid_1's auc: 0.848043
[2100]	training's auc: 0.859775	valid_1's auc: 0.853786
[2400]	training's auc: 0.865207	valid_1's auc: 0.858857
[2700]	training's auc: 0.86956	valid_1's auc: 0.863189
[3000]	training's auc: 0.873411	valid_1's auc: 0.866753
[3300]	training's auc: 0.876658	valid_1's auc: 0.869879
[3600]	training's auc: 0.8797	valid_1's auc: 0.87264
[3900]	training's auc: 0.882289	valid_1's auc: 0.874866
[4200]	training's auc: 0.884519	valid_1's auc: 0.876781
[4500]	training's auc: 0.886555	valid_1's auc: 0.87863
[4800]	training's auc: 0.888404	valid_1's auc: 0.88034
[5100]	t

[9000]	training's auc: 0.902941	valid_1's auc: 0.892367
[9300]	training's auc: 0.903549	valid_1's auc: 0.893013
[9600]	training's auc: 0.904116	valid_1's auc: 0.893374
[9900]	training's auc: 0.904621	valid_1's auc: 0.89376
[10200]	training's auc: 0.905173	valid_1's auc: 0.894228
[10500]	training's auc: 0.905656	valid_1's auc: 0.894584
[10800]	training's auc: 0.906147	valid_1's auc: 0.894997
[11100]	training's auc: 0.906566	valid_1's auc: 0.895321
[11400]	training's auc: 0.907024	valid_1's auc: 0.895675
[11700]	training's auc: 0.907381	valid_1's auc: 0.895954
[12000]	training's auc: 0.907769	valid_1's auc: 0.89628
[12300]	training's auc: 0.908084	valid_1's auc: 0.896458
[12600]	training's auc: 0.908424	valid_1's auc: 0.896741
[12900]	training's auc: 0.908728	valid_1's auc: 0.896925
[13200]	training's auc: 0.909043	valid_1's auc: 0.897148
[13500]	training's auc: 0.909317	valid_1's auc: 0.897325
[13800]	training's auc: 0.909605	valid_1's auc: 0.897543
[14100]	training's auc: 0.909873	vali

[18000]	training's auc: 0.91265	valid_1's auc: 0.899125
[18300]	training's auc: 0.912827	valid_1's auc: 0.899181
[18600]	training's auc: 0.912989	valid_1's auc: 0.899282
[18900]	training's auc: 0.913136	valid_1's auc: 0.899349
[19200]	training's auc: 0.913276	valid_1's auc: 0.899431
[19500]	training's auc: 0.913426	valid_1's auc: 0.899543
[19800]	training's auc: 0.913552	valid_1's auc: 0.89961
[20100]	training's auc: 0.913686	valid_1's auc: 0.899672
[20400]	training's auc: 0.913826	valid_1's auc: 0.899734
[20700]	training's auc: 0.913942	valid_1's auc: 0.899808
[21000]	training's auc: 0.91407	valid_1's auc: 0.899891
[21300]	training's auc: 0.914184	valid_1's auc: 0.899906
[21600]	training's auc: 0.914317	valid_1's auc: 0.899952
[21900]	training's auc: 0.914439	valid_1's auc: 0.899982
[22200]	training's auc: 0.914565	valid_1's auc: 0.900014
[22500]	training's auc: 0.914673	valid_1's auc: 0.900074
[22800]	training's auc: 0.914788	valid_1's auc: 0.900097
[23100]	training's auc: 0.914896	v

[27000]	training's auc: 0.916188	valid_1's auc: 0.90042
[27300]	training's auc: 0.916283	valid_1's auc: 0.900432
[27600]	training's auc: 0.916373	valid_1's auc: 0.900463
[27900]	training's auc: 0.916464	valid_1's auc: 0.900458
[28200]	training's auc: 0.916558	valid_1's auc: 0.900438
[28500]	training's auc: 0.916655	valid_1's auc: 0.90045
[28800]	training's auc: 0.916745	valid_1's auc: 0.900444
[29100]	training's auc: 0.916845	valid_1's auc: 0.900441
[29400]	training's auc: 0.916935	valid_1's auc: 0.900444
[29700]	training's auc: 0.917027	valid_1's auc: 0.900457
[30000]	training's auc: 0.917119	valid_1's auc: 0.900468
[30300]	training's auc: 0.917215	valid_1's auc: 0.900466
[30600]	training's auc: 0.917307	valid_1's auc: 0.90046
[30900]	training's auc: 0.917398	valid_1's auc: 0.900477
[31200]	training's auc: 0.917487	valid_1's auc: 0.900504
[31500]	training's auc: 0.917572	valid_1's auc: 0.900497
[31800]	training's auc: 0.917658	valid_1's auc: 0.900481
[32100]	training's auc: 0.917756	v

[33900]	training's auc: 0.918403	valid_1's auc: 0.900613
[34200]	training's auc: 0.918491	valid_1's auc: 0.900584
[34500]	training's auc: 0.918585	valid_1's auc: 0.900571
[34800]	training's auc: 0.918672	valid_1's auc: 0.900551
[35100]	training's auc: 0.918762	valid_1's auc: 0.900533
[35400]	training's auc: 0.918853	valid_1's auc: 0.900534
[35700]	training's auc: 0.918939	valid_1's auc: 0.900535
[36000]	training's auc: 0.919028	valid_1's auc: 0.900531
[36300]	training's auc: 0.919121	valid_1's auc: 0.90053
Early stopping, best iteration is:
[33409]	training's auc: 0.918253	valid_1's auc: 0.900645


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2 started at Thu Mar 21 12:55:18 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.762525	valid_1's auc: 0.758354
[600]	training's auc: 0.800718	valid_1's auc: 0.795696
[900]	training's auc: 0.820797	valid_1's auc: 0.81594
[1200]	training's auc: 0.835271	valid_1's auc: 0.830607
[1500]	training's auc: 0.845039	valid_1's auc: 0.840077
[1800]	training's auc: 0.853319	valid_1's auc: 0.84817
[2100]	training's auc: 0.859661	valid_1's auc: 0.854408
[2400]	training's auc: 0.865027	valid_1's auc: 0.859643
[2700]	training's auc: 0.869622	valid_1's auc: 0.864079
[3000]	training's auc: 0.873214	valid_1's auc: 0.867599
[3300]	training's auc: 0.876672	valid_1's auc: 0.870991
[3600]	training's auc: 0.87952	valid_1's auc: 0.873712
[3900]	training's auc: 0.882045	valid_1's auc: 0.876257
[4200]	training's auc: 0.884336	valid_1's auc: 0.878464
[4500]	training's auc: 0.886397	valid_1's auc: 0.880621
[4800]	training's auc: 0.888389	valid_1's auc: 0.882454
[51

[7200]	training's auc: 0.897794	valid_1's auc: 0.891892
[7500]	training's auc: 0.898619	valid_1's auc: 0.892685
[7800]	training's auc: 0.899452	valid_1's auc: 0.893384
[8100]	training's auc: 0.900178	valid_1's auc: 0.89411
[8400]	training's auc: 0.900944	valid_1's auc: 0.894747
[8700]	training's auc: 0.901667	valid_1's auc: 0.895447
[9000]	training's auc: 0.902306	valid_1's auc: 0.896042
[9300]	training's auc: 0.90287	valid_1's auc: 0.896625
[9600]	training's auc: 0.903406	valid_1's auc: 0.897152
[9900]	training's auc: 0.90394	valid_1's auc: 0.897675
[10200]	training's auc: 0.90448	valid_1's auc: 0.89822
[10500]	training's auc: 0.90495	valid_1's auc: 0.898598
[10800]	training's auc: 0.905397	valid_1's auc: 0.899053
[11100]	training's auc: 0.905804	valid_1's auc: 0.899428
[11400]	training's auc: 0.9062	valid_1's auc: 0.899843
[11700]	training's auc: 0.906585	valid_1's auc: 0.900159
[12000]	training's auc: 0.906917	valid_1's auc: 0.900503
[12300]	training's auc: 0.907285	valid_1's auc: 0

[9900]	training's auc: 0.90384	valid_1's auc: 0.898317
[10200]	training's auc: 0.904321	valid_1's auc: 0.89873
[10500]	training's auc: 0.904771	valid_1's auc: 0.899146
[10800]	training's auc: 0.905243	valid_1's auc: 0.89952
[11100]	training's auc: 0.905667	valid_1's auc: 0.899949
[11400]	training's auc: 0.906078	valid_1's auc: 0.900332
[11700]	training's auc: 0.906448	valid_1's auc: 0.900645
[12000]	training's auc: 0.90684	valid_1's auc: 0.900931
[12300]	training's auc: 0.907238	valid_1's auc: 0.901276
[12600]	training's auc: 0.90757	valid_1's auc: 0.901556
[12900]	training's auc: 0.907872	valid_1's auc: 0.901837
[13200]	training's auc: 0.908173	valid_1's auc: 0.902043
[13500]	training's auc: 0.90849	valid_1's auc: 0.902265
[13800]	training's auc: 0.90876	valid_1's auc: 0.902457
[14100]	training's auc: 0.909064	valid_1's auc: 0.902699
[14400]	training's auc: 0.909314	valid_1's auc: 0.902887
[14700]	training's auc: 0.909573	valid_1's auc: 0.903063
[15000]	training's auc: 0.909825	valid_

[16200]	training's auc: 0.910791	valid_1's auc: 0.903969
[16500]	training's auc: 0.910994	valid_1's auc: 0.904095
[16800]	training's auc: 0.911178	valid_1's auc: 0.904195
[17100]	training's auc: 0.911342	valid_1's auc: 0.904268
[17400]	training's auc: 0.911527	valid_1's auc: 0.904421
[17700]	training's auc: 0.911687	valid_1's auc: 0.904538
[18000]	training's auc: 0.911839	valid_1's auc: 0.904634
[18300]	training's auc: 0.911989	valid_1's auc: 0.904715
[18600]	training's auc: 0.91216	valid_1's auc: 0.904815
[18900]	training's auc: 0.91232	valid_1's auc: 0.904872
[19200]	training's auc: 0.912455	valid_1's auc: 0.904949
[19500]	training's auc: 0.912579	valid_1's auc: 0.905025
[19800]	training's auc: 0.91272	valid_1's auc: 0.905106
[20100]	training's auc: 0.912853	valid_1's auc: 0.905156
[20400]	training's auc: 0.912995	valid_1's auc: 0.905224
[20700]	training's auc: 0.913111	valid_1's auc: 0.905281
[21000]	training's auc: 0.913246	valid_1's auc: 0.90536
[21300]	training's auc: 0.913375	va

[18600]	training's auc: 0.911849	valid_1's auc: 0.904953
[18900]	training's auc: 0.911998	valid_1's auc: 0.905004
[19200]	training's auc: 0.91215	valid_1's auc: 0.905078
[19500]	training's auc: 0.912293	valid_1's auc: 0.905165
[19800]	training's auc: 0.912439	valid_1's auc: 0.905267
[20100]	training's auc: 0.912581	valid_1's auc: 0.905347
[20400]	training's auc: 0.91272	valid_1's auc: 0.905382
[20700]	training's auc: 0.91284	valid_1's auc: 0.905468
[21000]	training's auc: 0.91296	valid_1's auc: 0.905514
[21300]	training's auc: 0.913068	valid_1's auc: 0.905562
[21600]	training's auc: 0.913193	valid_1's auc: 0.905647
[21900]	training's auc: 0.913327	valid_1's auc: 0.905709
[22200]	training's auc: 0.91344	valid_1's auc: 0.905759
[22500]	training's auc: 0.91355	valid_1's auc: 0.905817
[22800]	training's auc: 0.913648	valid_1's auc: 0.905853
[23100]	training's auc: 0.913751	valid_1's auc: 0.905902
[23400]	training's auc: 0.913863	valid_1's auc: 0.905959
[23700]	training's auc: 0.913973	vali

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3 started at Thu Mar 21 15:28:17 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.764706	valid_1's auc: 0.749726
[600]	training's auc: 0.800334	valid_1's auc: 0.78592
[900]	training's auc: 0.820686	valid_1's auc: 0.806762
[1200]	training's auc: 0.834948	valid_1's auc: 0.821601
[1500]	training's auc: 0.845065	valid_1's auc: 0.832423
[1800]	training's auc: 0.853166	valid_1's auc: 0.840757
[2100]	training's auc: 0.859924	valid_1's auc: 0.847362
[2400]	training's auc: 0.865507	valid_1's auc: 0.852673
[2700]	training's auc: 0.869951	valid_1's auc: 0.857147
[3000]	training's auc: 0.873781	valid_1's auc: 0.861108
[3300]	training's auc: 0.877057	valid_1's auc: 0.864279
[3600]	training's auc: 0.880086	valid_1's auc: 0.867242
[3900]	training's auc: 0.882662	valid_1's auc: 0.869682
[4200]	training's auc: 0.885048	valid_1's auc: 0.871984
[4500]	training's auc: 0.887189	valid_1's auc: 0.873969
[4800]	training's auc: 0.888978	valid_1's auc: 0.875789
[

[3900]	training's auc: 0.882713	valid_1's auc: 0.869686
[4200]	training's auc: 0.885003	valid_1's auc: 0.871921
[4500]	training's auc: 0.887176	valid_1's auc: 0.874085
[4800]	training's auc: 0.888866	valid_1's auc: 0.875821
[5100]	training's auc: 0.890644	valid_1's auc: 0.8775
[5400]	training's auc: 0.892193	valid_1's auc: 0.878875
[5700]	training's auc: 0.893551	valid_1's auc: 0.880198
[6000]	training's auc: 0.894868	valid_1's auc: 0.881312
[6300]	training's auc: 0.896074	valid_1's auc: 0.882381
[6600]	training's auc: 0.89713	valid_1's auc: 0.883301
[6900]	training's auc: 0.898153	valid_1's auc: 0.884178
[7200]	training's auc: 0.899087	valid_1's auc: 0.885017
[7500]	training's auc: 0.899939	valid_1's auc: 0.885772
[7800]	training's auc: 0.900795	valid_1's auc: 0.886551
[8100]	training's auc: 0.901585	valid_1's auc: 0.887354
[8400]	training's auc: 0.90227	valid_1's auc: 0.888012
[8700]	training's auc: 0.902946	valid_1's auc: 0.888663
[9000]	training's auc: 0.903624	valid_1's auc: 0.889

[9600]	training's auc: 0.90466	valid_1's auc: 0.890007
[9900]	training's auc: 0.905185	valid_1's auc: 0.890471
[10200]	training's auc: 0.905741	valid_1's auc: 0.890961
[10500]	training's auc: 0.90624	valid_1's auc: 0.891382
[10800]	training's auc: 0.906672	valid_1's auc: 0.891786
[11100]	training's auc: 0.907113	valid_1's auc: 0.892138
[11400]	training's auc: 0.907512	valid_1's auc: 0.892452
[11700]	training's auc: 0.907848	valid_1's auc: 0.892793
[12000]	training's auc: 0.908192	valid_1's auc: 0.893105
[12300]	training's auc: 0.90853	valid_1's auc: 0.893378
[12600]	training's auc: 0.9089	valid_1's auc: 0.893683
[12900]	training's auc: 0.909189	valid_1's auc: 0.893997
[13200]	training's auc: 0.909482	valid_1's auc: 0.894217
[13500]	training's auc: 0.909765	valid_1's auc: 0.894442
[13800]	training's auc: 0.910043	valid_1's auc: 0.894704
[14100]	training's auc: 0.910284	valid_1's auc: 0.894903
[14400]	training's auc: 0.910528	valid_1's auc: 0.895155
[14700]	training's auc: 0.910763	valid

[14700]	training's auc: 0.911405	valid_1's auc: 0.895532
[15000]	training's auc: 0.911651	valid_1's auc: 0.895748
[15300]	training's auc: 0.91187	valid_1's auc: 0.895897
[15600]	training's auc: 0.912049	valid_1's auc: 0.896011
[15900]	training's auc: 0.912225	valid_1's auc: 0.89613
[16200]	training's auc: 0.912403	valid_1's auc: 0.896267
[16500]	training's auc: 0.912609	valid_1's auc: 0.896444
[16800]	training's auc: 0.912798	valid_1's auc: 0.896619
[17100]	training's auc: 0.912979	valid_1's auc: 0.896785
[17400]	training's auc: 0.91315	valid_1's auc: 0.896951
[17700]	training's auc: 0.913307	valid_1's auc: 0.897062
[18000]	training's auc: 0.913472	valid_1's auc: 0.897194
[18300]	training's auc: 0.913613	valid_1's auc: 0.897329
[18600]	training's auc: 0.913764	valid_1's auc: 0.897476
[18900]	training's auc: 0.913907	valid_1's auc: 0.897567
[19200]	training's auc: 0.914055	valid_1's auc: 0.897651
[19500]	training's auc: 0.914177	valid_1's auc: 0.897715
[19800]	training's auc: 0.914303	v

[17700]	training's auc: 0.911988	valid_1's auc: 0.897197
[18000]	training's auc: 0.91216	valid_1's auc: 0.897328
[18300]	training's auc: 0.912312	valid_1's auc: 0.89747
[18600]	training's auc: 0.912464	valid_1's auc: 0.897565
[18900]	training's auc: 0.912606	valid_1's auc: 0.897681
[19200]	training's auc: 0.912758	valid_1's auc: 0.897747
[19500]	training's auc: 0.912889	valid_1's auc: 0.897887
[19800]	training's auc: 0.913006	valid_1's auc: 0.897936
[20100]	training's auc: 0.91314	valid_1's auc: 0.898048
[20400]	training's auc: 0.913279	valid_1's auc: 0.898086
[20700]	training's auc: 0.913418	valid_1's auc: 0.898189
[21000]	training's auc: 0.913536	valid_1's auc: 0.898284
[21300]	training's auc: 0.91367	valid_1's auc: 0.898396
[21600]	training's auc: 0.913775	valid_1's auc: 0.898434
[21900]	training's auc: 0.91389	valid_1's auc: 0.898518
[22200]	training's auc: 0.914005	valid_1's auc: 0.898579
[22500]	training's auc: 0.914112	valid_1's auc: 0.898639
[22800]	training's auc: 0.914235	val

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 4 started at Thu Mar 21 17:58:45 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.764662	valid_1's auc: 0.761483
[600]	training's auc: 0.802106	valid_1's auc: 0.794791
[900]	training's auc: 0.823472	valid_1's auc: 0.813948
[1200]	training's auc: 0.837128	valid_1's auc: 0.825838
[1500]	training's auc: 0.847503	valid_1's auc: 0.835012
[1800]	training's auc: 0.85535	valid_1's auc: 0.842104
[2100]	training's auc: 0.861664	valid_1's auc: 0.847904
[2400]	training's auc: 0.866942	valid_1's auc: 0.852577
[2700]	training's auc: 0.871226	valid_1's auc: 0.856617
[3000]	training's auc: 0.874868	valid_1's auc: 0.859928
[3300]	training's auc: 0.878384	valid_1's auc: 0.863014
[3600]	training's auc: 0.881182	valid_1's auc: 0.865535
[3900]	training's auc: 0.883677	valid_1's auc: 0.867929
[4200]	training's auc: 0.885941	valid_1's auc: 0.870052
[4500]	training's auc: 0.887949	valid_1's auc: 0.871935
[4800]	training's auc: 0.889741	valid_1's auc: 0.873658
[

[10200]	training's auc: 0.906117	valid_1's auc: 0.889361
[10500]	training's auc: 0.90659	valid_1's auc: 0.889775
[10800]	training's auc: 0.907063	valid_1's auc: 0.890191
[11100]	training's auc: 0.907479	valid_1's auc: 0.890531
[11400]	training's auc: 0.907884	valid_1's auc: 0.890886
[11700]	training's auc: 0.908288	valid_1's auc: 0.891223
[12000]	training's auc: 0.908633	valid_1's auc: 0.891539
[12300]	training's auc: 0.908972	valid_1's auc: 0.891813
[12600]	training's auc: 0.909338	valid_1's auc: 0.892101
[12900]	training's auc: 0.909683	valid_1's auc: 0.892373
[13200]	training's auc: 0.909966	valid_1's auc: 0.892629
[13500]	training's auc: 0.910281	valid_1's auc: 0.892874
[13800]	training's auc: 0.910564	valid_1's auc: 0.89311
[14100]	training's auc: 0.91083	valid_1's auc: 0.893301
[14400]	training's auc: 0.911074	valid_1's auc: 0.893461
[14700]	training's auc: 0.911316	valid_1's auc: 0.893693
[15000]	training's auc: 0.911544	valid_1's auc: 0.893923
[15300]	training's auc: 0.911796	v

[18900]	training's auc: 0.912975	valid_1's auc: 0.895569
[19200]	training's auc: 0.913108	valid_1's auc: 0.895647
[19500]	training's auc: 0.913256	valid_1's auc: 0.895749
[19800]	training's auc: 0.913392	valid_1's auc: 0.895819
[20100]	training's auc: 0.913526	valid_1's auc: 0.895904
[20400]	training's auc: 0.913658	valid_1's auc: 0.895953
[20700]	training's auc: 0.913777	valid_1's auc: 0.896018
[21000]	training's auc: 0.913916	valid_1's auc: 0.896078
[21300]	training's auc: 0.91405	valid_1's auc: 0.896149
[21600]	training's auc: 0.914177	valid_1's auc: 0.896207
[21900]	training's auc: 0.914288	valid_1's auc: 0.896255
[22200]	training's auc: 0.914407	valid_1's auc: 0.896344
[22500]	training's auc: 0.914531	valid_1's auc: 0.896397
[22800]	training's auc: 0.914645	valid_1's auc: 0.896451
[23100]	training's auc: 0.914758	valid_1's auc: 0.89652
[23400]	training's auc: 0.91487	valid_1's auc: 0.89656
[23700]	training's auc: 0.914977	valid_1's auc: 0.896617
[24000]	training's auc: 0.915074	va

[27600]	training's auc: 0.917138	valid_1's auc: 0.896966
[27900]	training's auc: 0.917243	valid_1's auc: 0.896988
[28200]	training's auc: 0.917345	valid_1's auc: 0.897037
[28500]	training's auc: 0.917446	valid_1's auc: 0.897029
[28800]	training's auc: 0.917538	valid_1's auc: 0.897036
[29100]	training's auc: 0.91763	valid_1's auc: 0.897033
[29400]	training's auc: 0.917737	valid_1's auc: 0.897065
[29700]	training's auc: 0.917822	valid_1's auc: 0.89709
[30000]	training's auc: 0.917924	valid_1's auc: 0.897105
[30300]	training's auc: 0.918024	valid_1's auc: 0.897099
[30600]	training's auc: 0.918112	valid_1's auc: 0.897084
[30900]	training's auc: 0.918209	valid_1's auc: 0.897092
[31200]	training's auc: 0.9183	valid_1's auc: 0.89709
[31500]	training's auc: 0.918395	valid_1's auc: 0.897116
[31800]	training's auc: 0.918484	valid_1's auc: 0.897123
[32100]	training's auc: 0.918585	valid_1's auc: 0.897114
[32400]	training's auc: 0.918675	valid_1's auc: 0.897128
[32700]	training's auc: 0.918781	val

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [12]:
mean_auc = np.mean(val_aucs)
std_auc = np.std(val_aucs)
all_auc = roc_auc_score(oof['target'], oof['predict'])
print("Mean auc: %.9f, std: %.9f. All auc: %.9f." % (mean_auc, std_auc, all_auc))

Mean auc: 0.901320857, std: 0.003065016. All auc: 0.901319637.


In [13]:
# submission
predictions['target'] = np.mean(predictions[[col for col in predictions.columns if col not in ['ID_code', 'target']]].values, axis=1)
predictions.to_csv('lgb_all_predictions.csv', index=None)
sub_df = pd.DataFrame({"ID_code":test_df["ID_code"].values})
sub_df["target"] = predictions['target']
sub_df.to_csv("lgb_submission.csv", index=False)
oof.to_csv('lgb_oof.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
