In [1]:
import time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [2]:
more_features = True
use_staking = False
use_kernal_params = True
do_augment = True
fast = False

train_df = pd.read_csv('input/train' + ('_more_features' if more_features else '') + '.csv')
test_df = pd.read_csv('input/test'  + ('_more_features' if more_features else '') + '.csv')

do_lda = False

fix_data_skew = False

if fix_data_skew:
    trues = train_df.loc[train_df['target'] == 1]
    falses = train_df.loc[train_df['target'] != 1].sample(frac=1)[:len(trues)]
    train_df = pd.concat([trues, falses], ignore_index=True).sample(frac=1)
else:
    train_df = train_df
    
X_test = test_df.drop('ID_code',axis=1)
X = train_df.drop(['ID_code','target'],axis=1)
y = train_df['target']

In [3]:
if do_lda:    
    lda = LDA(solver='svd', n_components=5, store_covariance=True)
    X_lda = pd.DataFrame(lda.fit_transform(X, y))
    X_test_lda = pd.DataFrame(lda.transform(X_test))
    X["lda"] = X_lda
    X_test["lda"] = X_test_lda

In [4]:
_, X_bottomhalf, _, y_bottomhalf = train_test_split(X, y, test_size=0.8, random_state=10)

In [5]:
if use_staking:
    X = X_bottomhalf
    y = y_bottomhalf

In [6]:
n_splits = 5
folds = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [7]:
params = {
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'min_data_in_leaf': 2881,
    'max_depth': 0,
    'num_leaves': 3,
    'learning_rate': 0.01,
    'bagging_freq': 3,
    #'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 0.9),
    'feature_fraction': 0.8990901412442585,
    'bagging_seed': 11,
    'reg_alpha':  1.1173044727720816,
    'reg_lambda': 6.9285776442737514,
    'random_state': 42,
    'verbosity': -1,
    'subsample': 0.8054415526396443,
    'min_child_weight': 38.138072621096654,
    'num_threads': 4,
    'max_bin': 483
}

if use_kernal_params:
    # https://www.kaggle.com/jiweiliu/lgb-2-leaves-augment (lb: 0.901)
    params = {
        "objective" : "binary",
        "metric" : "auc",
        "boosting": 'gbdt',
        "max_depth" : -1,
        "num_leaves" : 13,
        "learning_rate" : 0.01,
        "bagging_freq": 5,
        "bagging_fraction" : 0.4,
        "feature_fraction" : 0.05,
        "min_data_in_leaf": 80,
        "min_sum_heassian_in_leaf": 10,
        "tree_learner": "serial",
        "boost_from_average": "false",
        #"lambda_l1" : 5,
        #"lambda_l2" : 5,
        "bagging_seed" : 13,
        "verbosity" : 1,
        "seed": 42
    }

In [8]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [9]:
oof = train_df[['ID_code', 'target']]
oof['predict'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [10]:
predictions = test_df[['ID_code']]
val_aucs = []

In [11]:
score = 0.0
prediction = np.zeros(len(X_test))
for fold_n, (train_index, valid_index) in enumerate(folds.split(X,y)):
    print('Fold', fold_n, 'started at', time.ctime())
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    N = 5
    
    if not do_augment:
        N = 1
    
    p_valid,yp = 0,0
    for i in range(N):
        if do_augment:
            X_t, y_t = augment(X_train.values, y_train.values)
            X_t = pd.DataFrame(X_t)
            X_t = X_t.add_prefix('var_')

            trn_data = lgb.Dataset(X_t, label=y_t)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        else:
            trn_data = lgb.Dataset(X_train, label=y_train)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        
        evals_result = {}
        
        early_stopping_rounds = 800 if fast else 3000
        
        lgb_clf = lgb.train(params,
                        trn_data,
                        num_boost_round=2000000,
                        valid_sets = [trn_data, val_data],
                        early_stopping_rounds=early_stopping_rounds,
                        verbose_eval = 300,
                        evals_result=evals_result
                       )
        p_valid += lgb_clf.predict(X_valid)
        yp += lgb_clf.predict(X_test)
    
    oof['predict'][valid_index] = p_valid/N
    val_score = roc_auc_score(y_valid, p_valid)
    val_aucs.append(val_score)
    
    predictions['fold{}'.format(fold_n+1)] = yp/N

Fold 0 started at Fri Mar 22 22:30:09 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.887	valid_1's auc: 0.827718
[600]	training's auc: 0.896238	valid_1's auc: 0.838558
[900]	training's auc: 0.903961	valid_1's auc: 0.848416
[1200]	training's auc: 0.910296	valid_1's auc: 0.856002
[1500]	training's auc: 0.915522	valid_1's auc: 0.862158
[1800]	training's auc: 0.919365	valid_1's auc: 0.866252
[2100]	training's auc: 0.922728	valid_1's auc: 0.869765
[2400]	training's auc: 0.925644	valid_1's auc: 0.872874
[2700]	training's auc: 0.928273	valid_1's auc: 0.875298
[3000]	training's auc: 0.930445	valid_1's auc: 0.877177
[3300]	training's auc: 0.932311	valid_1's auc: 0.878673
[3600]	training's auc: 0.93416	valid_1's auc: 0.880005
[3900]	training's auc: 0.935698	valid_1's auc: 0.881071
[4200]	training's auc: 0.937067	valid_1's auc: 0.88212
[4500]	training's auc: 0.938329	valid_1's auc: 0.883015
[4800]	training's auc: 0.939501	valid_1's auc: 0.88361
[5100]

[23700]	training's auc: 0.973338	valid_1's auc: 0.888482
[24000]	training's auc: 0.973711	valid_1's auc: 0.888503
[24300]	training's auc: 0.974077	valid_1's auc: 0.888501
[24600]	training's auc: 0.974431	valid_1's auc: 0.888445
[24900]	training's auc: 0.974789	valid_1's auc: 0.888418
[25200]	training's auc: 0.97515	valid_1's auc: 0.888429
[25500]	training's auc: 0.975502	valid_1's auc: 0.888439
[25800]	training's auc: 0.975843	valid_1's auc: 0.888451
[26100]	training's auc: 0.976184	valid_1's auc: 0.888402
[26400]	training's auc: 0.976529	valid_1's auc: 0.888418
Early stopping, best iteration is:
[23555]	training's auc: 0.97316	valid_1's auc: 0.888522
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.886661	valid_1's auc: 0.8268
[600]	training's auc: 0.896324	valid_1's auc: 0.838066
[900]	training's auc: 0.904207	valid_1's auc: 0.848379
[1200]	training's auc: 0.910324	valid_1's auc: 0.85585
[1500]	training's auc: 0.91548	valid_1's auc: 0.862093
[18

[15300]	training's auc: 0.961403	valid_1's auc: 0.888201
[15600]	training's auc: 0.961881	valid_1's auc: 0.888223
[15900]	training's auc: 0.962366	valid_1's auc: 0.888242
[16200]	training's auc: 0.962827	valid_1's auc: 0.888303
[16500]	training's auc: 0.96331	valid_1's auc: 0.888296
[16800]	training's auc: 0.963764	valid_1's auc: 0.888281
[17100]	training's auc: 0.964221	valid_1's auc: 0.888281
[17400]	training's auc: 0.964669	valid_1's auc: 0.888282
[17700]	training's auc: 0.965121	valid_1's auc: 0.888322
[18000]	training's auc: 0.965561	valid_1's auc: 0.888303
[18300]	training's auc: 0.966013	valid_1's auc: 0.888311
[18600]	training's auc: 0.966452	valid_1's auc: 0.888316
[18900]	training's auc: 0.966875	valid_1's auc: 0.888279
[19200]	training's auc: 0.967316	valid_1's auc: 0.888307
[19500]	training's auc: 0.96774	valid_1's auc: 0.888332
[19800]	training's auc: 0.968166	valid_1's auc: 0.888391
[20100]	training's auc: 0.968598	valid_1's auc: 0.888423
[20400]	training's auc: 0.969015	

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 1 started at Fri Mar 22 23:32:17 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.88624	valid_1's auc: 0.830887
[600]	training's auc: 0.89614	valid_1's auc: 0.842138
[900]	training's auc: 0.904187	valid_1's auc: 0.850742
[1200]	training's auc: 0.910385	valid_1's auc: 0.857773
[1500]	training's auc: 0.915403	valid_1's auc: 0.863858
[1800]	training's auc: 0.919184	valid_1's auc: 0.868027
[2100]	training's auc: 0.922436	valid_1's auc: 0.871381
[2400]	training's auc: 0.925241	valid_1's auc: 0.873945
[2700]	training's auc: 0.927915	valid_1's auc: 0.876217
[3000]	training's auc: 0.930127	valid_1's auc: 0.878206
[3300]	training's auc: 0.932069	valid_1's auc: 0.879719
[3600]	training's auc: 0.933905	valid_1's auc: 0.880986
[3900]	training's auc: 0.935509	valid_1's auc: 0.882024
[4200]	training's auc: 0.936885	valid_1's auc: 0.882916
[4500]	training's auc: 0.938173	valid_1's auc: 0.883685
[4800]	training's auc: 0.93932	valid_1's auc: 0.884283
[51

[900]	training's auc: 0.903265	valid_1's auc: 0.850459
[1200]	training's auc: 0.909565	valid_1's auc: 0.857522
[1500]	training's auc: 0.914716	valid_1's auc: 0.863475
[1800]	training's auc: 0.918641	valid_1's auc: 0.867497
[2100]	training's auc: 0.921868	valid_1's auc: 0.870819
[2400]	training's auc: 0.924857	valid_1's auc: 0.87366
[2700]	training's auc: 0.927455	valid_1's auc: 0.875867
[3000]	training's auc: 0.929688	valid_1's auc: 0.877693
[3300]	training's auc: 0.931625	valid_1's auc: 0.879307
[3600]	training's auc: 0.933402	valid_1's auc: 0.880536
[3900]	training's auc: 0.935008	valid_1's auc: 0.881608
[4200]	training's auc: 0.936357	valid_1's auc: 0.882491
[4500]	training's auc: 0.937631	valid_1's auc: 0.88328
[4800]	training's auc: 0.938806	valid_1's auc: 0.883865
[5100]	training's auc: 0.9399	valid_1's auc: 0.884326
[5400]	training's auc: 0.940907	valid_1's auc: 0.884814
[5700]	training's auc: 0.941838	valid_1's auc: 0.885198
[6000]	training's auc: 0.942752	valid_1's auc: 0.8855

[17700]	training's auc: 0.964801	valid_1's auc: 0.888582
[18000]	training's auc: 0.965256	valid_1's auc: 0.888575
[18300]	training's auc: 0.965698	valid_1's auc: 0.888559
[18600]	training's auc: 0.966132	valid_1's auc: 0.888602
[18900]	training's auc: 0.96657	valid_1's auc: 0.888683
[19200]	training's auc: 0.966991	valid_1's auc: 0.888672
[19500]	training's auc: 0.967435	valid_1's auc: 0.8887
[19800]	training's auc: 0.967851	valid_1's auc: 0.888717
[20100]	training's auc: 0.968269	valid_1's auc: 0.888668
[20400]	training's auc: 0.968698	valid_1's auc: 0.888687
[20700]	training's auc: 0.969115	valid_1's auc: 0.888668
[21000]	training's auc: 0.969529	valid_1's auc: 0.888683
[21300]	training's auc: 0.969938	valid_1's auc: 0.888669
[21600]	training's auc: 0.970354	valid_1's auc: 0.888679
[21900]	training's auc: 0.97075	valid_1's auc: 0.888704
[22200]	training's auc: 0.971161	valid_1's auc: 0.888667
[22500]	training's auc: 0.971553	valid_1's auc: 0.888667
Early stopping, best iteration is:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2 started at Sat Mar 23 00:35:43 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.885455	valid_1's auc: 0.836784
[600]	training's auc: 0.894669	valid_1's auc: 0.847347
[900]	training's auc: 0.902627	valid_1's auc: 0.85696
[1200]	training's auc: 0.90894	valid_1's auc: 0.864839
[1500]	training's auc: 0.913943	valid_1's auc: 0.870708
[1800]	training's auc: 0.917797	valid_1's auc: 0.874845
[2100]	training's auc: 0.92101	valid_1's auc: 0.878441
[2400]	training's auc: 0.923981	valid_1's auc: 0.881602
[2700]	training's auc: 0.926561	valid_1's auc: 0.884062
[3000]	training's auc: 0.928741	valid_1's auc: 0.885774
[3300]	training's auc: 0.930655	valid_1's auc: 0.887395
[3600]	training's auc: 0.932487	valid_1's auc: 0.888796
[3900]	training's auc: 0.934049	valid_1's auc: 0.890001
[4200]	training's auc: 0.935434	valid_1's auc: 0.890958
[4500]	training's auc: 0.936708	valid_1's auc: 0.891788
[4800]	training's auc: 0.937924	valid_1's auc: 0.892466
[51

[1800]	training's auc: 0.917825	valid_1's auc: 0.875004
[2100]	training's auc: 0.921073	valid_1's auc: 0.878822
[2400]	training's auc: 0.924017	valid_1's auc: 0.881941
[2700]	training's auc: 0.926573	valid_1's auc: 0.884304
[3000]	training's auc: 0.928686	valid_1's auc: 0.886318
[3300]	training's auc: 0.930635	valid_1's auc: 0.888014
[3600]	training's auc: 0.932404	valid_1's auc: 0.889311
[3900]	training's auc: 0.933987	valid_1's auc: 0.890374
[4200]	training's auc: 0.935373	valid_1's auc: 0.891346
[4500]	training's auc: 0.936656	valid_1's auc: 0.892242
[4800]	training's auc: 0.937851	valid_1's auc: 0.892825
[5100]	training's auc: 0.938925	valid_1's auc: 0.893442
[5400]	training's auc: 0.939949	valid_1's auc: 0.89396
[5700]	training's auc: 0.940917	valid_1's auc: 0.894533
[6000]	training's auc: 0.94182	valid_1's auc: 0.894949
[6300]	training's auc: 0.942676	valid_1's auc: 0.895149
[6600]	training's auc: 0.943485	valid_1's auc: 0.895426
[6900]	training's auc: 0.94426	valid_1's auc: 0.89

[5100]	training's auc: 0.938825	valid_1's auc: 0.89333
[5400]	training's auc: 0.93982	valid_1's auc: 0.893883
[5700]	training's auc: 0.940772	valid_1's auc: 0.894383
[6000]	training's auc: 0.941643	valid_1's auc: 0.894781
[6300]	training's auc: 0.942483	valid_1's auc: 0.895055
[6600]	training's auc: 0.943296	valid_1's auc: 0.895424
[6900]	training's auc: 0.944053	valid_1's auc: 0.895644
[7200]	training's auc: 0.944803	valid_1's auc: 0.895831
[7500]	training's auc: 0.94552	valid_1's auc: 0.896051
[7800]	training's auc: 0.946224	valid_1's auc: 0.896214
[8100]	training's auc: 0.946899	valid_1's auc: 0.896368
[8400]	training's auc: 0.947541	valid_1's auc: 0.896476
[8700]	training's auc: 0.948191	valid_1's auc: 0.896599
[9000]	training's auc: 0.948801	valid_1's auc: 0.896729
[9300]	training's auc: 0.94941	valid_1's auc: 0.896864
[9600]	training's auc: 0.950005	valid_1's auc: 0.896942
[9900]	training's auc: 0.950606	valid_1's auc: 0.897005
[10200]	training's auc: 0.951189	valid_1's auc: 0.89

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3 started at Sat Mar 23 01:33:29 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.885864	valid_1's auc: 0.831374
[600]	training's auc: 0.895486	valid_1's auc: 0.839501
[900]	training's auc: 0.903247	valid_1's auc: 0.847763
[1200]	training's auc: 0.90959	valid_1's auc: 0.8553
[1500]	training's auc: 0.91459	valid_1's auc: 0.861367
[1800]	training's auc: 0.918497	valid_1's auc: 0.865852
[2100]	training's auc: 0.921836	valid_1's auc: 0.869636
[2400]	training's auc: 0.924731	valid_1's auc: 0.872774
[2700]	training's auc: 0.927235	valid_1's auc: 0.875362
[3000]	training's auc: 0.929431	valid_1's auc: 0.877511
[3300]	training's auc: 0.931429	valid_1's auc: 0.879269
[3600]	training's auc: 0.93327	valid_1's auc: 0.880729
[3900]	training's auc: 0.93483	valid_1's auc: 0.881918
[4200]	training's auc: 0.936227	valid_1's auc: 0.883128
[4500]	training's auc: 0.937518	valid_1's auc: 0.884066
[4800]	training's auc: 0.938674	valid_1's auc: 0.884839
[5100]

[20100]	training's auc: 0.96809	valid_1's auc: 0.891289
[20400]	training's auc: 0.968511	valid_1's auc: 0.891362
[20700]	training's auc: 0.968926	valid_1's auc: 0.891359
[21000]	training's auc: 0.969334	valid_1's auc: 0.891363
[21300]	training's auc: 0.96974	valid_1's auc: 0.891349
[21600]	training's auc: 0.970149	valid_1's auc: 0.89136
[21900]	training's auc: 0.970551	valid_1's auc: 0.8914
[22200]	training's auc: 0.970957	valid_1's auc: 0.891369
[22500]	training's auc: 0.971338	valid_1's auc: 0.891353
[22800]	training's auc: 0.971725	valid_1's auc: 0.891357
[23100]	training's auc: 0.972105	valid_1's auc: 0.891329
[23400]	training's auc: 0.972491	valid_1's auc: 0.891358
[23700]	training's auc: 0.972862	valid_1's auc: 0.891331
[24000]	training's auc: 0.973219	valid_1's auc: 0.891343
[24300]	training's auc: 0.973589	valid_1's auc: 0.891334
[24600]	training's auc: 0.973957	valid_1's auc: 0.891361
Early stopping, best iteration is:
[21884]	training's auc: 0.970532	valid_1's auc: 0.891411
T

[15000]	training's auc: 0.960428	valid_1's auc: 0.891098
[15300]	training's auc: 0.960913	valid_1's auc: 0.891115
[15600]	training's auc: 0.961398	valid_1's auc: 0.891159
[15900]	training's auc: 0.961878	valid_1's auc: 0.89122
[16200]	training's auc: 0.962346	valid_1's auc: 0.891241
[16500]	training's auc: 0.9628	valid_1's auc: 0.891315
[16800]	training's auc: 0.963282	valid_1's auc: 0.891285
[17100]	training's auc: 0.963734	valid_1's auc: 0.89129
[17400]	training's auc: 0.964202	valid_1's auc: 0.891289
[17700]	training's auc: 0.964654	valid_1's auc: 0.89128
[18000]	training's auc: 0.965105	valid_1's auc: 0.891334
[18300]	training's auc: 0.965545	valid_1's auc: 0.891354
[18600]	training's auc: 0.965988	valid_1's auc: 0.891337
[18900]	training's auc: 0.966428	valid_1's auc: 0.891383
[19200]	training's auc: 0.96685	valid_1's auc: 0.891345
[19500]	training's auc: 0.967278	valid_1's auc: 0.891378
[19800]	training's auc: 0.967711	valid_1's auc: 0.891392
[20100]	training's auc: 0.968135	vali

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 4 started at Sat Mar 23 02:41:01 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.887332	valid_1's auc: 0.827511
[600]	training's auc: 0.896126	valid_1's auc: 0.835538
[900]	training's auc: 0.903842	valid_1's auc: 0.844912
[1200]	training's auc: 0.91006	valid_1's auc: 0.852505
[1500]	training's auc: 0.915259	valid_1's auc: 0.858619
[1800]	training's auc: 0.919163	valid_1's auc: 0.86323
[2100]	training's auc: 0.922394	valid_1's auc: 0.866863
[2400]	training's auc: 0.925256	valid_1's auc: 0.870069
[2700]	training's auc: 0.927791	valid_1's auc: 0.872653
[3000]	training's auc: 0.930033	valid_1's auc: 0.874823
[3300]	training's auc: 0.932006	valid_1's auc: 0.876492
[3600]	training's auc: 0.933852	valid_1's auc: 0.877963
[3900]	training's auc: 0.935422	valid_1's auc: 0.879259
[4200]	training's auc: 0.936806	valid_1's auc: 0.880253
[4500]	training's auc: 0.938073	valid_1's auc: 0.881149
[4800]	training's auc: 0.939233	valid_1's auc: 0.881796
[5

[1500]	training's auc: 0.915765	valid_1's auc: 0.858072
[1800]	training's auc: 0.919528	valid_1's auc: 0.862504
[2100]	training's auc: 0.922751	valid_1's auc: 0.866153
[2400]	training's auc: 0.925622	valid_1's auc: 0.869391
[2700]	training's auc: 0.928091	valid_1's auc: 0.871895
[3000]	training's auc: 0.930321	valid_1's auc: 0.874057
[3300]	training's auc: 0.932181	valid_1's auc: 0.875595
[3600]	training's auc: 0.933999	valid_1's auc: 0.877047
[3900]	training's auc: 0.935533	valid_1's auc: 0.878389
[4200]	training's auc: 0.936926	valid_1's auc: 0.879507
[4500]	training's auc: 0.938148	valid_1's auc: 0.880296
[4800]	training's auc: 0.93929	valid_1's auc: 0.88109
[5100]	training's auc: 0.940381	valid_1's auc: 0.881806
[5400]	training's auc: 0.941374	valid_1's auc: 0.882392
[5700]	training's auc: 0.942318	valid_1's auc: 0.88282
[6000]	training's auc: 0.943198	valid_1's auc: 0.883302
[6300]	training's auc: 0.944039	valid_1's auc: 0.88368
[6600]	training's auc: 0.944843	valid_1's auc: 0.884

[4800]	training's auc: 0.939353	valid_1's auc: 0.881596
[5100]	training's auc: 0.940429	valid_1's auc: 0.882203
[5400]	training's auc: 0.941436	valid_1's auc: 0.882706
[5700]	training's auc: 0.942392	valid_1's auc: 0.883154
[6000]	training's auc: 0.943278	valid_1's auc: 0.883631
[6300]	training's auc: 0.94411	valid_1's auc: 0.884007
[6600]	training's auc: 0.944903	valid_1's auc: 0.884338
[6900]	training's auc: 0.945655	valid_1's auc: 0.884561
[7200]	training's auc: 0.946361	valid_1's auc: 0.884795
[7500]	training's auc: 0.947066	valid_1's auc: 0.885037
[7800]	training's auc: 0.947745	valid_1's auc: 0.885243
[8100]	training's auc: 0.948423	valid_1's auc: 0.885366
[8400]	training's auc: 0.949042	valid_1's auc: 0.885545
[8700]	training's auc: 0.94968	valid_1's auc: 0.885697
[9000]	training's auc: 0.950291	valid_1's auc: 0.885785
[9300]	training's auc: 0.950881	valid_1's auc: 0.885884
[9600]	training's auc: 0.951471	valid_1's auc: 0.885996
[9900]	training's auc: 0.952042	valid_1's auc: 0.8

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [12]:
mean_auc = np.mean(val_aucs)
std_auc = np.std(val_aucs)
all_auc = roc_auc_score(oof['target'], oof['predict'])
print("Mean auc: %.9f, std: %.9f. All auc: %.9f." % (mean_auc, std_auc, all_auc))

Mean auc: 0.891135677, std: 0.003832281. All auc: 0.891087850.


In [13]:
# submission
predictions['target'] = np.mean(predictions[[col for col in predictions.columns if col not in ['ID_code', 'target']]].values, axis=1)
predictions.to_csv('lgb_all_predictions.csv', index=None)
sub_df = pd.DataFrame({"ID_code":test_df["ID_code"].values})
sub_df["target"] = predictions['target']
sub_df.to_csv("lgb_submission.csv", index=False)
oof.to_csv('lgb_oof.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
