In [50]:
from scipy import stats
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import TimeSeriesSplit

In [17]:
df_train = pd.read_pickle("../input/train.pkl")
df_test = pd.read_pickle("../input/test.pkl")

In [18]:
df_train = df_train.drop('isFraud',axis=1)

In [19]:
df_train.head(2)

Unnamed: 0_level_0,TransactionDT,TransactionAmt,ProductCD,card1,card2,card3,card4,card5,card6,addr1,...,id_31,id_32,id_33,id_34,id_35,id_36,id_37,id_38,DeviceType,DeviceInfo
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2987000,86400,68.5,W,13926,,150.0,discover,142.0,credit,315.0,...,,,,,,,,,,
2987001,86401,29.0,W,2755,404.0,150.0,mastercard,102.0,credit,325.0,...,,,,,,,,,,


In [20]:
df_train = df_train.drop('TransactionDT',axis=1)
df_test = df_test.drop('TransactionDT',axis=1)

In [21]:
df_train['target'] = 1
df_test['target'] = 0

In [22]:
data = pd.concat((df_train,df_test))

In [23]:
# x_train, x_test, y_train, y_test = train_test_split( x, y, train_size = num_train )
for f in tqdm(data.select_dtypes(include='category').columns.tolist() + data.select_dtypes(include='object').columns.tolist()):
    lbl = LabelEncoder()
    lbl.fit(list(data[f].values))
    data[f] = lbl.transform(list(data[f].values))

100%|██████████| 31/31 [01:01<00:00,  2.19s/it]


In [24]:
# data = data.fillna(-999)

In [25]:
y = data['target']

In [26]:
x = data.drop('target',axis=1)

In [27]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size = 0.7)

In [29]:
params = {
        'objective': 'binary',
        "boosting_type": "gbdt",
        "metric": 'auc',
    }

In [30]:
while True:
    trn_data = lgb.Dataset(x_train, label=y_train)
    val_data = lgb.Dataset(x_test, label=y_test)

    lgb_model = lgb.train(params,
                        trn_data,
                        500,
                        valid_sets = [trn_data, val_data],
                        verbose_eval=200,
                        early_stopping_rounds=200)

    pred = lgb_model.predict(x_test)
    auc = roc_auc_score(y_test, pred)
    print(auc)
    if auc < 0.75:
        break
    
    importance = pd.DataFrame(lgb_model.feature_importance(), index=x_train.columns, columns=['importance']).sort_values(by='importance',ascending=False)
    list_drop = list(importance.index[:5])
    x_train = x_train.drop(list_drop,axis=1)
    x_test = x_test.drop(list_drop,axis=1)
    

# pred = lgb_model.predict(valid_df)

Training until validation scores don't improve for 200 rounds.
[200]	training's auc: 0.917157	valid_1's auc: 0.915705
[400]	training's auc: 0.9277	valid_1's auc: 0.924028
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.931379	valid_1's auc: 0.926687
0.9266870666292648
Training until validation scores don't improve for 200 rounds.
[200]	training's auc: 0.893405	valid_1's auc: 0.891475
[400]	training's auc: 0.90523	valid_1's auc: 0.900799
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.909222	valid_1's auc: 0.90366
0.9036595953754163
Training until validation scores don't improve for 200 rounds.
[200]	training's auc: 0.850551	valid_1's auc: 0.849007
[400]	training's auc: 0.863524	valid_1's auc: 0.858964
Did not meet early stopping. Best iteration is:
[500]	training's auc: 0.867899	valid_1's auc: 0.861932
0.8619323890566246
Training until validation scores don't improve for 200 rounds.
[200]	training's auc: 0.841429	valid_1's auc: 0.839625
[

In [32]:
pd.set_option('display.max_columns', 500)
x_train.head()

Unnamed: 0_level_0,ProductCD,addr2,C3,M1,M2,M3,M7,M8,M9,V1,V6,V8,V9,V10,V11,V14,V15,V16,V17,V18,V21,V22,V27,V28,V29,V30,V31,V32,V33,V34,V39,V40,V41,V42,V43,V46,V48,V49,V50,V51,V57,V58,V59,V60,V63,V64,V65,V68,V69,V70,V71,V72,V79,V80,V81,V84,V85,V88,V89,V90,V91,V92,V93,V94,V98,V104,V106,V107,V108,V109,V110,V111,V112,V113,V114,V115,V116,V117,V118,V119,V120,V121,V122,V123,V125,V138,V139,V140,V141,V142,V143,V144,V145,V146,V147,V148,V149,V150,V151,V152,V153,V154,V155,V156,V157,V158,V159,V161,V162,V163,V164,V165,V166,V167,V168,V169,V170,V171,V172,V173,V174,V175,V176,V177,V178,V179,V180,V181,V182,V183,V184,V185,V186,V187,V188,V189,V190,V191,V192,V193,V194,V195,V196,V197,V198,V199,V200,V201,V202,V204,V211,V212,V213,V214,V215,V216,V217,V218,V219,V220,V223,V224,V225,V226,V227,V228,V229,V230,V231,V232,V233,V235,V236,V237,V238,V239,V240,V241,V242,V243,V244,V245,V246,V247,V248,V249,V250,V251,V252,V253,V254,V255,V256,V257,V258,V259,V260,V261,V262,V269,V273,V274,V275,V276,V278,V284,V286,V297,V299,V302,V303,V304,V305,V322,V323,V324,V325,V326,V327,V328,V329,V330,V331,V332,V333,V334,V335,V336,V337,V338,V339,id_03,id_04,id_07,id_08,id_09,id_10,id_11,id_12,id_15,id_16,id_21,id_22,id_23,id_24,id_25,id_26,id_27,id_28,id_29,id_32,id_35,id_36,id_37,id_38,DeviceType
TransactionID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1
3040695,0,,0.0,2,2,2,2,2,2,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,2.0,2.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,11.077,11.077,0.0,0.0,0.0,0.0,0.0,0.0,,,,0.0,,,,,0.0,,,,,,,,,,0.0,0.0,,,,,,0.0,,,,,0.0,0.0,,,,0.0,0.0,,,0.0,,,,,,,,,,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,0.0,0.0,,,0.0,0.0,100.0,1,0,0,,,3,,,,2,0,0,,0,0,1,1,0
3355231,0,,0.0,2,2,2,2,2,2,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,0.0,-5.0,,,0.0,-5.0,100.0,1,0,0,,,3,,,,2,0,0,,0,0,1,0,0
3697333,4,87.0,0.0,1,1,1,0,1,1,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,2,3,2,,,3,,,,2,2,2,,2,2,2,2,2
3313651,0,,0.0,2,2,2,2,2,2,,,,,,,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,1.0,,,,,,,,,,,,,,,,,,,0.0,0.0,,,0.0,0.0,100.0,1,0,0,,,3,,,,2,0,0,,0,0,1,0,0
4149328,2,87.0,0.0,2,2,2,2,2,2,,,,,,,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,100.0,1,1,1,,,3,,,,2,1,1,32.0,1,0,0,1,1


In [33]:
x_train.to_pickle('adversarial_val_col.pkl')

In [36]:
y_sorted = np.sort(pred, axis=0)

In [38]:
len(y_sorted)

329170

In [35]:
clf = RandomForestClassifier(random_state=0)
clf = clf.fit(x_train, y_train)

array([3.85443614e-06, 4.59567527e-06, 4.70315625e-06, ...,
       9.99984355e-01, 9.99984422e-01, 9.99991540e-01])

In [49]:
# y_test
# len(y_test)
# accuracy_score(y_test,pred)


3.854436135102454e-06

In [None]:
while True:
    trn_data = lgb.Dataset(x_train, label=y_train)
    val_data = lgb.Dataset(x_test, label=y_test)

    lgb_model = lgb.train(params,
                        trn_data,
                        500,
                        valid_sets = [trn_data, val_data],
                        verbose_eval=200,
                        early_stopping_rounds=200)

    pred = lgb_model.predict(x_test)
    auc = roc_auc_score(y_test, pred)
    print(auc)
    if auc < 0.75:
        break
    y_sorted = np.sort(pred, axis=0)
#     importance = pd.DataFrame(lgb_model.feature_importance(), index=x_train.columns, columns=['importance']).sort_values(by='importance',ascending=False)
#     list_drop = list(importance.index[:5])
#     x_train = x_train.drop(list_drop,axis=1)
#     x_test = x_test.drop(list_drop,axis=1)

# Train_Test_Time_split

In [69]:
X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4, 5, 6])
tscv = TimeSeriesSplit(n_splits=5)
print(tscv)
TimeSeriesSplit(max_train_size=None, n_splits=5)
for train_index, test_index in tscv.split(df_train.index):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = df_train.iloc[train_index], df_train.iloc[test_index]
    print(X_train.shape)
    print(X_test.shape)
#     y_train, y_test = y[train_index], y[test_index]

TimeSeriesSplit(max_train_size=None, n_splits=5)
TRAIN: [    0     1     2 ... 98422 98423 98424] TEST: [ 98425  98426  98427 ... 196845 196846 196847]
(98425, 432)
(98423, 432)
TRAIN: [     0      1      2 ... 196845 196846 196847] TEST: [196848 196849 196850 ... 295268 295269 295270]
(196848, 432)
(98423, 432)
TRAIN: [     0      1      2 ... 295268 295269 295270] TEST: [295271 295272 295273 ... 393691 393692 393693]
(295271, 432)
(98423, 432)
TRAIN: [     0      1      2 ... 393691 393692 393693] TEST: [393694 393695 393696 ... 492114 492115 492116]
(393694, 432)
(98423, 432)
TRAIN: [     0      1      2 ... 492114 492115 492116] TEST: [492117 492118 492119 ... 590537 590538 590539]
(492117, 432)
(98423, 432)


In [59]:
from sklearn.model_selection import KFold
target = df_train['target']
splits = 5
folds = KFold(n_splits = splits)
oof = np.zeros(len(X_train))
predictions = np.zeros(len(X_test))

for fold_, (trn_idx, val_idx) in enumerate(folds.split(df_train.values, target.values)):
    print(trn_idx)
    print(val_idx)
    print(fold_)

[118108 118109 118110 ... 590537 590538 590539]
[     0      1      2 ... 118105 118106 118107]
0
[     0      1      2 ... 590537 590538 590539]
[118108 118109 118110 ... 236213 236214 236215]
1
[     0      1      2 ... 590537 590538 590539]
[236216 236217 236218 ... 354321 354322 354323]
2
[     0      1      2 ... 590537 590538 590539]
[354324 354325 354326 ... 472429 472430 472431]
3
[     0      1      2 ... 472429 472430 472431]
[472432 472433 472434 ... 590537 590538 590539]
4
