In [8]:
import time
import numpy as np
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import KFold,StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

In [9]:
more_features = False
use_staking = False
use_kernal_params = True
do_augment = True
fast = False
use_kernel_params_tuned = True

train_df = pd.read_csv('input/train' + ('_more_features' if more_features else '') + '.csv')
test_df = pd.read_csv('input/test'  + ('_more_features' if more_features else '') + '.csv')

do_lda = False

fix_data_skew = False

if fix_data_skew:
    trues = train_df.loc[train_df['target'] == 1]
    falses = train_df.loc[train_df['target'] != 1].sample(frac=1)[:len(trues)]
    train_df = pd.concat([trues, falses], ignore_index=True).sample(frac=1)
else:
    train_df = train_df
    
X_test = test_df.drop('ID_code',axis=1)
X = train_df.drop(['ID_code','target'],axis=1)
y = train_df['target']

In [10]:
if do_lda:    
    lda = LDA(solver='svd', n_components=5, store_covariance=True)
    X_lda = pd.DataFrame(lda.fit_transform(X, y))
    X_test_lda = pd.DataFrame(lda.transform(X_test))
    X["lda"] = X_lda
    X_test["lda"] = X_test_lda

In [11]:
_, X_bottomhalf, _, y_bottomhalf = train_test_split(X, y, test_size=0.8, random_state=10)

In [12]:
if use_staking:
    X = X_bottomhalf
    y = y_bottomhalf

In [13]:
n_splits = 5
folds = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

In [14]:
params = {
    'metric': 'auc',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'min_data_in_leaf': 2881,
    'max_depth': 0,
    'num_leaves': 3,
    'learning_rate': 0.01,
    'bagging_freq': 3,
    #'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 0.9),
    'feature_fraction': 0.8990901412442585,
    'bagging_seed': 11,
    'reg_alpha':  1.1173044727720816,
    'reg_lambda': 6.9285776442737514,
    'random_state': 42,
    'verbosity': -1,
    'subsample': 0.8054415526396443,
    'min_child_weight': 38.138072621096654,
    'num_threads': 4,
    'max_bin': 483
}

if use_kernal_params:
    # https://www.kaggle.com/jiweiliu/lgb-2-leaves-augment (lb: 0.901)
    params = {
        "objective" : "binary",
        "metric" : "auc",
        "boosting": 'gbdt',
        "max_depth" : -1,
        "num_leaves" : 13,
        "learning_rate" : 0.01,
        "bagging_freq": 5,
        "bagging_fraction" : 0.4,
        "feature_fraction" : 0.05,
        "min_data_in_leaf": 80,
        "min_sum_heassian_in_leaf": 10,
        "tree_learner": "serial",
        "boost_from_average": "false",
        #"lambda_l1" : 5,
        #"lambda_l2" : 5,
        "bagging_seed" : 13,
        "verbosity" : 1,
        "seed": 42
    }
   
if use_kernel_params_tuned:
    # apply tuning over kernel params
    # optune score: 0.09976555762908212 
    params = {
        "objective" : "binary",
        "metric" : "auc",
        "boosting": 'gbdt',
        "max_depth" : -1,
        "num_leaves" : 14,
        "learning_rate" : 0.01,
        "bagging_freq": 4,
        "bagging_fraction" : 0.48231252804226993,
        "feature_fraction" : 0.04283121565948458,
        "min_data_in_leaf": 76,
        "min_sum_heassian_in_leaf": 12,
        "tree_learner": "serial",
        "num_threads": 6,
        "boost_from_average": "false",
        #"lambda_l1" : 5,
        #"lambda_l2" : 5,
        "bagging_seed" : 13,
        "verbosity" : 1,
        "seed": 42
    }

In [15]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [16]:
oof = train_df[['ID_code', 'target']]
oof['predict'] = 0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
predictions = test_df[['ID_code']]
val_aucs = []

In [18]:
score = 0.0
prediction = np.zeros(len(X_test))
for fold_n, (train_index, valid_index) in enumerate(folds.split(X,y)):
    print('Fold', fold_n, 'started at', time.ctime())
    X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
    
    N = 10
    
    if not do_augment:
        N = 1
    
    p_valid,yp = 0,0
    for i in range(N):
        if do_augment:
            X_t, y_t = augment(X_train.values, y_train.values)
            X_t = pd.DataFrame(X_t)
            X_t = X_t.add_prefix('var_')

            trn_data = lgb.Dataset(X_t, label=y_t)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        else:
            trn_data = lgb.Dataset(X_train, label=y_train)
            val_data = lgb.Dataset(X_valid, label=y_valid)
        
        evals_result = {}
        
        early_stopping_rounds = 800 if fast else 3000
        
        lgb_clf = lgb.train(params,
                        trn_data,
                        num_boost_round=2000000,
                        valid_sets = [trn_data, val_data],
                        early_stopping_rounds=early_stopping_rounds,
                        verbose_eval = 300,
                        evals_result=evals_result
                       )
        p_valid += lgb_clf.predict(X_valid)
        yp += lgb_clf.predict(X_test)
    
    oof['predict'][valid_index] = p_valid/N
    val_score = roc_auc_score(y_valid, p_valid)
    val_aucs.append(val_score)
    
    predictions['fold{}'.format(fold_n+1)] = yp/N

Fold 0 started at Fri Apr  5 09:55:28 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.881722	valid_1's auc: 0.8698
[600]	training's auc: 0.891979	valid_1's auc: 0.88048
[900]	training's auc: 0.896987	valid_1's auc: 0.885143
[1200]	training's auc: 0.899844	valid_1's auc: 0.88737
[1500]	training's auc: 0.901886	valid_1's auc: 0.888909
[1800]	training's auc: 0.903758	valid_1's auc: 0.890295
[2100]	training's auc: 0.905489	valid_1's auc: 0.891671
[2400]	training's auc: 0.907253	valid_1's auc: 0.892763
[2700]	training's auc: 0.908709	valid_1's auc: 0.893823
[3000]	training's auc: 0.910016	valid_1's auc: 0.894512
[3300]	training's auc: 0.911337	valid_1's auc: 0.895334
[3600]	training's auc: 0.912516	valid_1's auc: 0.895943
[3900]	training's auc: 0.913585	valid_1's auc: 0.896433
[4200]	training's auc: 0.91467	valid_1's auc: 0.896962
[4500]	training's auc: 0.915671	valid_1's auc: 0.897424
[4800]	training's auc: 0.916671	valid_1's auc: 0.89787
[5100]

[12300]	training's auc: 0.934936	valid_1's auc: 0.900558
[12600]	training's auc: 0.935568	valid_1's auc: 0.900515
[12900]	training's auc: 0.936183	valid_1's auc: 0.900456
[13200]	training's auc: 0.936788	valid_1's auc: 0.900478
Early stopping, best iteration is:
[10306]	training's auc: 0.93072	valid_1's auc: 0.90059
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.882091	valid_1's auc: 0.87027
[600]	training's auc: 0.892831	valid_1's auc: 0.880479
[900]	training's auc: 0.897703	valid_1's auc: 0.885193
[1200]	training's auc: 0.900474	valid_1's auc: 0.887475
[1500]	training's auc: 0.902504	valid_1's auc: 0.889104
[1800]	training's auc: 0.904293	valid_1's auc: 0.890472
[2100]	training's auc: 0.905952	valid_1's auc: 0.891827
[2400]	training's auc: 0.907646	valid_1's auc: 0.892919
[2700]	training's auc: 0.909017	valid_1's auc: 0.893909
[3000]	training's auc: 0.910299	valid_1's auc: 0.894815
[3300]	training's auc: 0.911594	valid_1's auc: 0.895562
[3600]

[10200]	training's auc: 0.93019	valid_1's auc: 0.900105
[10500]	training's auc: 0.930856	valid_1's auc: 0.900152
[10800]	training's auc: 0.931514	valid_1's auc: 0.900201
[11100]	training's auc: 0.932152	valid_1's auc: 0.900271
[11400]	training's auc: 0.932802	valid_1's auc: 0.900248
[11700]	training's auc: 0.933443	valid_1's auc: 0.900261
[12000]	training's auc: 0.934073	valid_1's auc: 0.900247
[12300]	training's auc: 0.934719	valid_1's auc: 0.900233
[12600]	training's auc: 0.935352	valid_1's auc: 0.900203
[12900]	training's auc: 0.935988	valid_1's auc: 0.900217
[13200]	training's auc: 0.936592	valid_1's auc: 0.900171
[13500]	training's auc: 0.937212	valid_1's auc: 0.900154
[13800]	training's auc: 0.937815	valid_1's auc: 0.900163
[14100]	training's auc: 0.938406	valid_1's auc: 0.900094
Early stopping, best iteration is:
[11224]	training's auc: 0.932415	valid_1's auc: 0.900281
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.882972	valid_1's auc: 0

[9300]	training's auc: 0.929254	valid_1's auc: 0.900088
[9600]	training's auc: 0.929925	valid_1's auc: 0.900123
[9900]	training's auc: 0.930566	valid_1's auc: 0.90015
[10200]	training's auc: 0.931217	valid_1's auc: 0.900131
[10500]	training's auc: 0.931852	valid_1's auc: 0.900177
[10800]	training's auc: 0.932501	valid_1's auc: 0.90022
[11100]	training's auc: 0.93312	valid_1's auc: 0.900307
[11400]	training's auc: 0.933741	valid_1's auc: 0.900315
[11700]	training's auc: 0.934369	valid_1's auc: 0.900358
[12000]	training's auc: 0.93499	valid_1's auc: 0.900357
[12300]	training's auc: 0.935597	valid_1's auc: 0.900335
[12600]	training's auc: 0.936227	valid_1's auc: 0.900333
[12900]	training's auc: 0.936828	valid_1's auc: 0.900262
[13200]	training's auc: 0.93743	valid_1's auc: 0.900228
[13500]	training's auc: 0.938039	valid_1's auc: 0.900221
[13800]	training's auc: 0.938643	valid_1's auc: 0.900148
[14100]	training's auc: 0.939232	valid_1's auc: 0.900087
[14400]	training's auc: 0.939826	valid_

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 1 started at Fri Apr  5 10:57:40 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.881329	valid_1's auc: 0.870531
[600]	training's auc: 0.89187	valid_1's auc: 0.88063
[900]	training's auc: 0.897016	valid_1's auc: 0.886042
[1200]	training's auc: 0.899987	valid_1's auc: 0.888775
[1500]	training's auc: 0.902016	valid_1's auc: 0.890175
[1800]	training's auc: 0.903897	valid_1's auc: 0.891542
[2100]	training's auc: 0.905643	valid_1's auc: 0.892881
[2400]	training's auc: 0.907341	valid_1's auc: 0.893919
[2700]	training's auc: 0.908785	valid_1's auc: 0.895015
[3000]	training's auc: 0.910097	valid_1's auc: 0.895721
[3300]	training's auc: 0.91141	valid_1's auc: 0.89652
[3600]	training's auc: 0.912544	valid_1's auc: 0.897062
[3900]	training's auc: 0.913583	valid_1's auc: 0.897474
[4200]	training's auc: 0.914672	valid_1's auc: 0.89817
[4500]	training's auc: 0.915665	valid_1's auc: 0.898539
[4800]	training's auc: 0.916677	valid_1's auc: 0.898986
[5100

[2100]	training's auc: 0.906128	valid_1's auc: 0.892624
[2400]	training's auc: 0.907823	valid_1's auc: 0.893705
[2700]	training's auc: 0.909244	valid_1's auc: 0.894739
[3000]	training's auc: 0.910541	valid_1's auc: 0.895419
[3300]	training's auc: 0.911926	valid_1's auc: 0.896207
[3600]	training's auc: 0.913114	valid_1's auc: 0.896777
[3900]	training's auc: 0.914159	valid_1's auc: 0.897174
[4200]	training's auc: 0.91523	valid_1's auc: 0.897719
[4500]	training's auc: 0.916209	valid_1's auc: 0.898145
[4800]	training's auc: 0.917235	valid_1's auc: 0.898565
[5100]	training's auc: 0.918168	valid_1's auc: 0.898872
[5400]	training's auc: 0.919076	valid_1's auc: 0.899189
[5700]	training's auc: 0.919921	valid_1's auc: 0.899406
[6000]	training's auc: 0.920769	valid_1's auc: 0.89971
[6300]	training's auc: 0.921587	valid_1's auc: 0.899868
[6600]	training's auc: 0.922373	valid_1's auc: 0.900066
[6900]	training's auc: 0.923146	valid_1's auc: 0.900179
[7200]	training's auc: 0.923894	valid_1's auc: 0.9

[900]	training's auc: 0.897186	valid_1's auc: 0.886179
[1200]	training's auc: 0.900297	valid_1's auc: 0.888886
[1500]	training's auc: 0.902428	valid_1's auc: 0.89035
[1800]	training's auc: 0.90427	valid_1's auc: 0.891745
[2100]	training's auc: 0.905954	valid_1's auc: 0.892951
[2400]	training's auc: 0.907699	valid_1's auc: 0.894028
[2700]	training's auc: 0.90916	valid_1's auc: 0.89503
[3000]	training's auc: 0.910466	valid_1's auc: 0.895672
[3300]	training's auc: 0.911833	valid_1's auc: 0.896547
[3600]	training's auc: 0.913002	valid_1's auc: 0.897168
[3900]	training's auc: 0.914043	valid_1's auc: 0.897562
[4200]	training's auc: 0.91513	valid_1's auc: 0.898206
[4500]	training's auc: 0.916152	valid_1's auc: 0.898593
[4800]	training's auc: 0.917163	valid_1's auc: 0.898975
[5100]	training's auc: 0.918132	valid_1's auc: 0.899191
[5400]	training's auc: 0.919051	valid_1's auc: 0.899502
[5700]	training's auc: 0.91991	valid_1's auc: 0.899741
[6000]	training's auc: 0.920784	valid_1's auc: 0.899948

[4800]	training's auc: 0.917236	valid_1's auc: 0.898871
[5100]	training's auc: 0.918187	valid_1's auc: 0.899122
[5400]	training's auc: 0.919108	valid_1's auc: 0.899441
[5700]	training's auc: 0.919977	valid_1's auc: 0.899742
[6000]	training's auc: 0.920817	valid_1's auc: 0.900057
[6300]	training's auc: 0.921667	valid_1's auc: 0.900204
[6600]	training's auc: 0.922454	valid_1's auc: 0.9004
[6900]	training's auc: 0.92323	valid_1's auc: 0.900544
[7200]	training's auc: 0.923957	valid_1's auc: 0.900625
[7500]	training's auc: 0.924705	valid_1's auc: 0.900717
[7800]	training's auc: 0.925424	valid_1's auc: 0.900833
[8100]	training's auc: 0.926117	valid_1's auc: 0.900882
[8400]	training's auc: 0.926816	valid_1's auc: 0.900911
[8700]	training's auc: 0.927496	valid_1's auc: 0.900972
[9000]	training's auc: 0.928178	valid_1's auc: 0.900987
[9300]	training's auc: 0.928876	valid_1's auc: 0.901044
[9600]	training's auc: 0.92955	valid_1's auc: 0.901008
[9900]	training's auc: 0.930207	valid_1's auc: 0.900

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 2 started at Fri Apr  5 11:52:54 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.880953	valid_1's auc: 0.874931
[600]	training's auc: 0.891525	valid_1's auc: 0.885997
[900]	training's auc: 0.896143	valid_1's auc: 0.890138
[1200]	training's auc: 0.899126	valid_1's auc: 0.892599
[1500]	training's auc: 0.901129	valid_1's auc: 0.894346
[1800]	training's auc: 0.902964	valid_1's auc: 0.896009
[2100]	training's auc: 0.90464	valid_1's auc: 0.897348
[2400]	training's auc: 0.906373	valid_1's auc: 0.898764
[2700]	training's auc: 0.907819	valid_1's auc: 0.899574
[3000]	training's auc: 0.909099	valid_1's auc: 0.900416
[3300]	training's auc: 0.910408	valid_1's auc: 0.901172
[3600]	training's auc: 0.91158	valid_1's auc: 0.901976
[3900]	training's auc: 0.91263	valid_1's auc: 0.902555
[4200]	training's auc: 0.913699	valid_1's auc: 0.903155
[4500]	training's auc: 0.914745	valid_1's auc: 0.903661
[4800]	training's auc: 0.915739	valid_1's auc: 0.904106
[51

[600]	training's auc: 0.89114	valid_1's auc: 0.886574
[900]	training's auc: 0.895982	valid_1's auc: 0.890726
[1200]	training's auc: 0.898737	valid_1's auc: 0.893091
[1500]	training's auc: 0.900812	valid_1's auc: 0.894559
[1800]	training's auc: 0.902648	valid_1's auc: 0.896124
[2100]	training's auc: 0.904347	valid_1's auc: 0.897387
[2400]	training's auc: 0.906058	valid_1's auc: 0.898789
[2700]	training's auc: 0.907435	valid_1's auc: 0.899554
[3000]	training's auc: 0.908732	valid_1's auc: 0.900419
[3300]	training's auc: 0.910037	valid_1's auc: 0.901136
[3600]	training's auc: 0.911236	valid_1's auc: 0.901908
[3900]	training's auc: 0.912294	valid_1's auc: 0.902536
[4200]	training's auc: 0.913362	valid_1's auc: 0.903111
[4500]	training's auc: 0.91439	valid_1's auc: 0.903671
[4800]	training's auc: 0.915383	valid_1's auc: 0.90405
[5100]	training's auc: 0.916327	valid_1's auc: 0.904465
[5400]	training's auc: 0.917243	valid_1's auc: 0.904809
[5700]	training's auc: 0.918098	valid_1's auc: 0.9050

Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.880846	valid_1's auc: 0.875542
[600]	training's auc: 0.891184	valid_1's auc: 0.885966
[900]	training's auc: 0.896011	valid_1's auc: 0.890254
[1200]	training's auc: 0.898828	valid_1's auc: 0.89272
[1500]	training's auc: 0.900893	valid_1's auc: 0.894203
[1800]	training's auc: 0.902769	valid_1's auc: 0.895779
[2100]	training's auc: 0.904462	valid_1's auc: 0.89709
[2400]	training's auc: 0.906148	valid_1's auc: 0.89835
[2700]	training's auc: 0.907567	valid_1's auc: 0.899257
[3000]	training's auc: 0.908901	valid_1's auc: 0.900203
[3300]	training's auc: 0.910222	valid_1's auc: 0.901062
[3600]	training's auc: 0.911411	valid_1's auc: 0.901771
[3900]	training's auc: 0.912436	valid_1's auc: 0.902284
[4200]	training's auc: 0.913547	valid_1's auc: 0.902837
[4500]	training's auc: 0.914563	valid_1's auc: 0.903379
[4800]	training's auc: 0.915562	valid_1's auc: 0.903889
[5100]	training's auc: 0.916529	valid_1's auc:

Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.880545	valid_1's auc: 0.875817
[600]	training's auc: 0.891059	valid_1's auc: 0.886214
[900]	training's auc: 0.895873	valid_1's auc: 0.89053
[1200]	training's auc: 0.898679	valid_1's auc: 0.892986
[1500]	training's auc: 0.900709	valid_1's auc: 0.894587
[1800]	training's auc: 0.902512	valid_1's auc: 0.89628
[2100]	training's auc: 0.904218	valid_1's auc: 0.89754
[2400]	training's auc: 0.905921	valid_1's auc: 0.898696
[2700]	training's auc: 0.907411	valid_1's auc: 0.89965
[3000]	training's auc: 0.908732	valid_1's auc: 0.90057
[3300]	training's auc: 0.910064	valid_1's auc: 0.901364
[3600]	training's auc: 0.911259	valid_1's auc: 0.902032
[3900]	training's auc: 0.912304	valid_1's auc: 0.902636
[4200]	training's auc: 0.91341	valid_1's auc: 0.903181
[4500]	training's auc: 0.914404	valid_1's auc: 0.903738
[4800]	training's auc: 0.915405	valid_1's auc: 0.904177
[5100]	training's auc: 0.916352	valid_1's auc: 0.

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 3 started at Fri Apr  5 12:45:57 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.881296	valid_1's auc: 0.867869
[600]	training's auc: 0.892074	valid_1's auc: 0.878544
[900]	training's auc: 0.897007	valid_1's auc: 0.88367
[1200]	training's auc: 0.899817	valid_1's auc: 0.885955
[1500]	training's auc: 0.901885	valid_1's auc: 0.887469
[1800]	training's auc: 0.903762	valid_1's auc: 0.888633
[2100]	training's auc: 0.905439	valid_1's auc: 0.889886
[2400]	training's auc: 0.907126	valid_1's auc: 0.891076
[2700]	training's auc: 0.908611	valid_1's auc: 0.892033
[3000]	training's auc: 0.909889	valid_1's auc: 0.892764
[3300]	training's auc: 0.91121	valid_1's auc: 0.893534
[3600]	training's auc: 0.912404	valid_1's auc: 0.894402
[3900]	training's auc: 0.913454	valid_1's auc: 0.894892
[4200]	training's auc: 0.914541	valid_1's auc: 0.895503
[4500]	training's auc: 0.915535	valid_1's auc: 0.896009
[4800]	training's auc: 0.916504	valid_1's auc: 0.896467
[5

[12600]	training's auc: 0.935579	valid_1's auc: 0.899736
[12900]	training's auc: 0.936198	valid_1's auc: 0.899767
[13200]	training's auc: 0.936819	valid_1's auc: 0.899769
[13500]	training's auc: 0.937432	valid_1's auc: 0.899708
[13800]	training's auc: 0.938044	valid_1's auc: 0.899699
[14100]	training's auc: 0.938637	valid_1's auc: 0.899696
[14400]	training's auc: 0.939224	valid_1's auc: 0.899694
[14700]	training's auc: 0.939814	valid_1's auc: 0.899674
[15000]	training's auc: 0.940397	valid_1's auc: 0.899663
Early stopping, best iteration is:
[12146]	training's auc: 0.934631	valid_1's auc: 0.899805
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.880997	valid_1's auc: 0.867553
[600]	training's auc: 0.891661	valid_1's auc: 0.878715
[900]	training's auc: 0.896797	valid_1's auc: 0.883831
[1200]	training's auc: 0.899694	valid_1's auc: 0.886188
[1500]	training's auc: 0.901758	valid_1's auc: 0.887601
[1800]	training's auc: 0.903552	valid_1's auc: 0.88854

[8700]	training's auc: 0.927492	valid_1's auc: 0.899497
[9000]	training's auc: 0.928176	valid_1's auc: 0.899522
[9300]	training's auc: 0.928875	valid_1's auc: 0.899598
[9600]	training's auc: 0.929534	valid_1's auc: 0.899623
[9900]	training's auc: 0.930201	valid_1's auc: 0.899687
[10200]	training's auc: 0.93086	valid_1's auc: 0.899712
[10500]	training's auc: 0.931512	valid_1's auc: 0.899719
[10800]	training's auc: 0.932149	valid_1's auc: 0.899762
[11100]	training's auc: 0.932769	valid_1's auc: 0.899748
[11400]	training's auc: 0.933391	valid_1's auc: 0.899748
[11700]	training's auc: 0.93402	valid_1's auc: 0.899731
[12000]	training's auc: 0.934638	valid_1's auc: 0.899753
[12300]	training's auc: 0.935255	valid_1's auc: 0.899731
[12600]	training's auc: 0.935851	valid_1's auc: 0.899756
[12900]	training's auc: 0.936454	valid_1's auc: 0.89975
[13200]	training's auc: 0.937066	valid_1's auc: 0.899692
[13500]	training's auc: 0.93768	valid_1's auc: 0.899651
[13800]	training's auc: 0.938287	valid_1

[6000]	training's auc: 0.920567	valid_1's auc: 0.898142
[6300]	training's auc: 0.921386	valid_1's auc: 0.89842
[6600]	training's auc: 0.922175	valid_1's auc: 0.898594
[6900]	training's auc: 0.922948	valid_1's auc: 0.898758
[7200]	training's auc: 0.923668	valid_1's auc: 0.898905
[7500]	training's auc: 0.924411	valid_1's auc: 0.899057
[7800]	training's auc: 0.925146	valid_1's auc: 0.899205
[8100]	training's auc: 0.925853	valid_1's auc: 0.899359
[8400]	training's auc: 0.926567	valid_1's auc: 0.8995
[8700]	training's auc: 0.927267	valid_1's auc: 0.899578
[9000]	training's auc: 0.927962	valid_1's auc: 0.899626
[9300]	training's auc: 0.928633	valid_1's auc: 0.899696
[9600]	training's auc: 0.929315	valid_1's auc: 0.899776
[9900]	training's auc: 0.929979	valid_1's auc: 0.8998
[10200]	training's auc: 0.930652	valid_1's auc: 0.899848
[10500]	training's auc: 0.93128	valid_1's auc: 0.899905
[10800]	training's auc: 0.931925	valid_1's auc: 0.899928
[11100]	training's auc: 0.932567	valid_1's auc: 0.8

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Fold 4 started at Fri Apr  5 13:45:24 2019
Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.882003	valid_1's auc: 0.869444
[600]	training's auc: 0.892457	valid_1's auc: 0.878101
[900]	training's auc: 0.897421	valid_1's auc: 0.881563
[1200]	training's auc: 0.900356	valid_1's auc: 0.883721
[1500]	training's auc: 0.902388	valid_1's auc: 0.885566
[1800]	training's auc: 0.904299	valid_1's auc: 0.886856
[2100]	training's auc: 0.905987	valid_1's auc: 0.88804
[2400]	training's auc: 0.907676	valid_1's auc: 0.889219
[2700]	training's auc: 0.909131	valid_1's auc: 0.890285
[3000]	training's auc: 0.91042	valid_1's auc: 0.891124
[3300]	training's auc: 0.911732	valid_1's auc: 0.891892
[3600]	training's auc: 0.912892	valid_1's auc: 0.892634
[3900]	training's auc: 0.91396	valid_1's auc: 0.893257
[4200]	training's auc: 0.915056	valid_1's auc: 0.893739
[4500]	training's auc: 0.916028	valid_1's auc: 0.894238
[4800]	training's auc: 0.917057	valid_1's auc: 0.89473
[510

Training until validation scores don't improve for 3000 rounds.
[300]	training's auc: 0.88273	valid_1's auc: 0.867497
[600]	training's auc: 0.892793	valid_1's auc: 0.876698
[900]	training's auc: 0.897778	valid_1's auc: 0.88049
[1200]	training's auc: 0.900656	valid_1's auc: 0.883087
[1500]	training's auc: 0.902704	valid_1's auc: 0.884908
[1800]	training's auc: 0.90452	valid_1's auc: 0.886497
[2100]	training's auc: 0.906153	valid_1's auc: 0.887743
[2400]	training's auc: 0.907847	valid_1's auc: 0.889102
[2700]	training's auc: 0.909239	valid_1's auc: 0.889991
[3000]	training's auc: 0.91052	valid_1's auc: 0.890879
[3300]	training's auc: 0.911787	valid_1's auc: 0.891791
[3600]	training's auc: 0.912946	valid_1's auc: 0.892506
[3900]	training's auc: 0.913939	valid_1's auc: 0.893086
[4200]	training's auc: 0.915017	valid_1's auc: 0.893614
[4500]	training's auc: 0.916003	valid_1's auc: 0.894064
[4800]	training's auc: 0.917003	valid_1's auc: 0.894469
[5100]	training's auc: 0.91795	valid_1's auc: 0

[1500]	training's auc: 0.902614	valid_1's auc: 0.88479
[1800]	training's auc: 0.904448	valid_1's auc: 0.88626
[2100]	training's auc: 0.906143	valid_1's auc: 0.887486
[2400]	training's auc: 0.907788	valid_1's auc: 0.888726
[2700]	training's auc: 0.909206	valid_1's auc: 0.889778
[3000]	training's auc: 0.91052	valid_1's auc: 0.890788
[3300]	training's auc: 0.911794	valid_1's auc: 0.891637
[3600]	training's auc: 0.912976	valid_1's auc: 0.892305
[3900]	training's auc: 0.91401	valid_1's auc: 0.892879
[4200]	training's auc: 0.915092	valid_1's auc: 0.893393
[4500]	training's auc: 0.916096	valid_1's auc: 0.893959
[4800]	training's auc: 0.917094	valid_1's auc: 0.894366
[5100]	training's auc: 0.918039	valid_1's auc: 0.894734
[5400]	training's auc: 0.91897	valid_1's auc: 0.895106
[5700]	training's auc: 0.919824	valid_1's auc: 0.895376
[6000]	training's auc: 0.920695	valid_1's auc: 0.895691
[6300]	training's auc: 0.921519	valid_1's auc: 0.89586
[6600]	training's auc: 0.922306	valid_1's auc: 0.89608

[900]	training's auc: 0.898168	valid_1's auc: 0.881646
[1200]	training's auc: 0.901034	valid_1's auc: 0.88399
[1500]	training's auc: 0.903011	valid_1's auc: 0.885857
[1800]	training's auc: 0.904838	valid_1's auc: 0.887247
[2100]	training's auc: 0.906558	valid_1's auc: 0.8884
[2400]	training's auc: 0.908249	valid_1's auc: 0.889548
[2700]	training's auc: 0.9097	valid_1's auc: 0.890617
[3000]	training's auc: 0.911005	valid_1's auc: 0.891554
[3300]	training's auc: 0.912287	valid_1's auc: 0.892362
[3600]	training's auc: 0.913433	valid_1's auc: 0.893076
[3900]	training's auc: 0.914472	valid_1's auc: 0.893701
[4200]	training's auc: 0.915587	valid_1's auc: 0.894172
[4500]	training's auc: 0.916619	valid_1's auc: 0.894647
[4800]	training's auc: 0.917636	valid_1's auc: 0.895049
[5100]	training's auc: 0.918573	valid_1's auc: 0.895433
[5400]	training's auc: 0.919524	valid_1's auc: 0.895778
[5700]	training's auc: 0.920347	valid_1's auc: 0.896001
[6000]	training's auc: 0.921221	valid_1's auc: 0.89629

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._set_labels(key, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [19]:
mean_auc = np.mean(val_aucs)
std_auc = np.std(val_aucs)
all_auc = roc_auc_score(oof['target'], oof['predict'])
print("Mean auc: %.9f, std: %.9f. All auc: %.9f." % (mean_auc, std_auc, all_auc))

Mean auc: 0.901629791, std: 0.003150175. All auc: 0.901596101.


In [20]:
# submission
predictions['target'] = np.mean(predictions[[col for col in predictions.columns if col not in ['ID_code', 'target']]].values, axis=1)
predictions.to_csv('lgb_all_predictions.csv', index=None)
sub_df = pd.DataFrame({"ID_code":test_df["ID_code"].values})
sub_df["target"] = predictions['target']
sub_df.to_csv("lgb_submission.csv", index=False)
oof.to_csv('lgb_oof.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
