In [1]:
import os,sys
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from sklearn.externals import joblib
import matplotlib.pyplot as plt
sys.path.append('../LIB/')
from env import ENV
from sklearn.preprocessing import normalize
from tqdm import tqdm

import os
from keras.layers import Dense,Input,LSTM,Bidirectional,Activation,Conv1D,GRU,CuDNNGRU,Flatten,BatchNormalization
from keras.callbacks import Callback
from keras.layers import Dropout,Embedding,GlobalMaxPooling1D, MaxPooling1D, Add, Flatten
from keras.preprocessing import text, sequence
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
import pickle
from sklearn.preprocessing.data import QuantileTransformer
from sklearn.utils import shuffle
import gc

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def scan_nan_portion(df):
    portions = []
    columns = []
    for col in df.columns:
        columns.append(col)
        portions.append(np.sum(df[col].isnull())/len(df))
    return pd.Series(data=portions, index=columns)

In [3]:
X_Train = pd.read_pickle(ENV.application_train_cleaned.value)
print('Train shape: {}'.format(X_Train.shape))

X_Test = pd.read_pickle(ENV.application_test_cleaned.value)
print('Test shape: {}'.format(X_Test.shape))

X_bu = pd.read_pickle(ENV.bureau_cleaned.value)
print('Installment shape: {}'.format(X_bu.shape))

X_bu_fe = pd.read_pickle(ENV.bureau_cleaned_rnnALL.value)
print('Installment shape: {}'.format(X_bu_fe.shape))

Train shape: (307511, 122)
Test shape: (48744, 121)
Installment shape: (1716428, 17)
Installment shape: (1716428, 124)


In [4]:
def get_embeddings_index(sorted_df,nor_ebd):
    embeddings_index={}
    words_values = sorted_df['words'].values
    for index in range(len(words_values)):
        embeddings_index  [words_values[index]] = nor_ebd[index,:]
    return embeddings_index

def create_document(sorted_df):
    #Create document
    ids = sorted_df.SK_ID_CURR.values
    words = sorted_df.words.values
    document_dicts = {}

    id_list = []
    document_list = []

    for index in range(len(ids)) :
        if document_dicts.get(ids[index]) is None:
            document_dicts[ids[index]] = []
        document_dicts[ids[index]].append(words[index])

    for key in document_dicts :
        document_dicts[key] = ' '.join(document_dicts[key])
        id_list.append(key)
        document_list.append(document_dicts[key])


    df_doc = pd.DataFrame({'SK_ID_CURR':id_list, 'text':document_list})  
    df_doc_mapping  = df_doc.set_index('SK_ID_CURR').text

    train = X_Train[['SK_ID_CURR','TARGET']].copy()
    test = X_Test[['SK_ID_CURR']].copy()
    train['text'] = train.SK_ID_CURR.map(df_doc_mapping).fillna('notfound')
    test['text'] = test.SK_ID_CURR.map(df_doc_mapping).fillna('notfound')
    return train,test


def get_train_ebdMat(train,test,embeddings_index):
    X_train = train["text"].str.lower()
    X_test = test["text"].str.lower()
    y_train = train["TARGET"].values
    tok=text.Tokenizer(num_words=max_features,lower=True,filters='')
    tok.fit_on_texts(list(X_train)+list(X_test))
    X_train=tok.texts_to_sequences(X_train)
    X_test=tok.texts_to_sequences(X_test)
    x_train=sequence.pad_sequences(X_train,maxlen=maxlen)
    x_test=sequence.pad_sequences(X_test,maxlen=maxlen)
    print('...get ebd mat')
    word_index = tok.word_index
    #prepare embedding matrix
    num_words = min(max_features, len(word_index) + 1)
    print('num of words: {}'.format(num_words))
    embedding_matrix = np.zeros((num_words, embed_size))
    print(embedding_matrix.shape)
    for word, i in word_index.items():
        if i >= max_features:
            continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    return x_train,x_test,y_train,embedding_matrix,num_words

class_ratio =  sum(X_Train.TARGET ==0)/sum(X_Train.TARGET ==1)
class_ratio =  1
def get_rnn_model(num_words,embed_size,embedding_matrix):
    sequence_input = Input(shape=(maxlen, ))
    
    x = Embedding(num_words, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
    x = BatchNormalization()(x)
    x = SpatialDropout1D(0.2)(x)
#     x = Bidirectional(GRU(16, return_sequences=True,dropout=0.1,recurrent_dropout=0.1))(x)
    x = Bidirectional(CuDNNGRU(8, return_sequences=True))(x)
    x = Conv1D(32, kernel_size = 3, padding = "valid", kernel_initializer = "glorot_uniform")(x)
    x = BatchNormalization()(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    x = concatenate([avg_pool, max_pool]) 
#     x = BatchNormalization()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.2)(x)
    preds = Dense(1, activation="sigmoid")(x)
    model = Model(sequence_input, preds)
#     Adam,RMSprop,Adagrad,Adadelta,Adamax,Nadam
#     model.compile(loss='binary_crossentropy',optimizer=Adam(lr=1e-4),metrics=['accuracy'])
    model.compile(loss='binary_crossentropy',optimizer=Adam(),metrics=['accuracy'])
    return model

def train_each_epoch(x,y,batch_size,model):
    x,y = shuffle(x,y)
    model.fit(x, y, 
              batch_size=batch_size, 
              epochs=1,
              verbose=1,
              class_weight={0:1,1:class_ratio})
    return model

def load_model(model,filepath):
    model.load_weights(filepath)
    return model

def save_model(model, filepath):
    model.save_weights(filepath)

def train_each_fold(x,y,x_val,y_val,model,filepath,reportpath,predspath,
                    batch_size=512,total_epoch=40,patience=5,saving=True):
    ROC_AUC_SCORE = []
    for epoch in range(total_epoch):  
        model = train_each_epoch(x,y,batch_size,model)
        y_pred = model.predict(x_val,batch_size=3000,verbose=1)
        score = roc_auc_score(y_val,y_pred)
        if len(ROC_AUC_SCORE) == 0:
            if saving:
                save_model(model,filepath)
            best_score = 0 
            if saving:
                print('saving preds...')
                pickle.dump(y_pred,open(predspath,'wb'))
        else:
            best_score = max(ROC_AUC_SCORE)
            if score >= best_score:
                if saving:
                    print('saving model to... {}'.format(filepath))
                    save_model(model,filepath)
                    print('saving preds...')
                    pickle.dump(y_pred,open(predspath,'wb'))
        ROC_AUC_SCORE.append(score)
        if saving:
            print('saving report to... {}'.format(reportpath))
            pickle.dump(ROC_AUC_SCORE,open(reportpath,'wb'))
        print('======= current {} / {}'.format(epoch,total_epoch))
        print('previous best roc is {}'.format(best_score))
        print('current roc is {}'.format(score))
        try:
            best_round = ROC_AUC_SCORE.index(best_score)
        except ValueError:
            best_round = -1
        if len(ROC_AUC_SCORE) > patience + best_round:
            print('reach patience! end loop')
            break
            
def train_5_folds(model_file,report_file,pred_file,pred_test_file,batch_size=512,total_epoch=400,patience=30):
    train_fold_index = pickle.load(open(ENV.train_fold_index.value,'rb'))
    val_fold_index = pickle.load(open(ENV.val_fold_index.value,'rb'))

    for fold in range(0,len(train_fold_index)):
        print('!!!!!!!! Begin fold: {}'.format(fold))
        train_index = train_fold_index[fold]
        val_index = val_fold_index[fold]
        X_tra = x_train[train_index]
        y_tra = y_train[train_index]
        X_val = x_train[val_index]
        y_val = y_train[val_index]
        print('preparing train/val done!')
        print('before evaluating: {}'.format(model_file))
        model_file_evl = model_file.format(fold)
        report_file_evl = report_file.format(fold)
        pred_file_evl = pred_file.format(fold)
        pred_test_file_evl = pred_test_file.format(fold)
        model = get_rnn_model(num_words,embed_size,embedding_matrix)
        train_each_fold(X_tra, y_tra, X_val, y_val,
                        model,
                        filepath=model_file_evl,reportpath=report_file_evl,predspath=pred_file_evl,
                        batch_size=batch_size,total_epoch=total_epoch,patience=patience)
        gc.collect()
        #### predict test
        model = load_model(model,model_file_evl)
        test_preds = model.predict(x_test,batch_size=1500,verbose=1)
        pickle.dump(test_preds,open(pred_test_file_evl,'wb'))
        print('\n')


# bureau V1

In [5]:
qt = QuantileTransformer(n_quantiles=10000,output_distribution='normal')
trans_col = trans_col = [
             'AMT_ANNUITY',
             'AMT_CREDIT_MAX_OVERDUE',
             'DAYS_ENDDATE_FACT',
             'AMT_CREDIT_SUM_LIMIT',
             'AMT_CREDIT_SUM_DEBT',
             'DAYS_CREDIT_ENDDATE',
             'AMT_CREDIT_SUM',
             'CREDIT_DAY_OVERDUE',
             'CNT_CREDIT_PROLONG',
             'AMT_CREDIT_SUM_OVERDUE',
             'DAYS_CREDIT_UPDATE']
for col in trans_col:
    print(col)
    X_bu[col] = qt.fit_transform(X_bu[col].values.reshape(-1,1))

AMT_ANNUITY
AMT_CREDIT_MAX_OVERDUE
DAYS_ENDDATE_FACT
AMT_CREDIT_SUM_LIMIT
AMT_CREDIT_SUM_DEBT
DAYS_CREDIT_ENDDATE
AMT_CREDIT_SUM
CREDIT_DAY_OVERDUE
CNT_CREDIT_PROLONG
AMT_CREDIT_SUM_OVERDUE
DAYS_CREDIT_UPDATE


In [6]:

print('start')
label_mapping = X_Train.set_index('SK_ID_CURR').TARGET
test_mapping = pd.Series(index=X_Test.SK_ID_CURR, data=1)

#previous application
#10001358
max_features = 1730000
#295
maxlen = 13

sorted_df = X_bu.sort_values(['SK_ID_CURR','DAYS_CREDIT'])
col = 'DAYS_CREDIT'
sorted_df[col] = qt.fit_transform(sorted_df[col].values.reshape(-1,1))
sorted_df['words'] = sorted_df.index.astype(str)
feature = list(sorted_df.columns)
feature.remove('SK_ID_BUREAU')
feature.remove('SK_ID_CURR')
feature.remove('words')
ebd = sorted_df[feature].values
#normalize
print('start normalize')
# nor_ebd = normalize(ebd, norm='max',axis=0)
nor_ebd = ebd
print('get embedding')
embed_size = len(feature)
print('ebd size is {}'.format(embed_size))
embeddings_index = get_embeddings_index(sorted_df,nor_ebd)
print('create document')
train,test = create_document(sorted_df)
print('get embedding Mat')
x_train,x_test,y_train,embedding_matrix,num_words = get_train_ebdMat(train,test,embeddings_index)
model_file = ENV.bureau_rnn.value
report_file = ENV.bureau_report.value
pred_file = ENV.bureau_preds.value
pred_test_file = ENV.bureau_preds_test.value
train_5_folds(model_file,report_file,pred_file,pred_test_file,batch_size=2000,total_epoch=500,patience=35)

start
start normalize
get embedding
ebd size is 15
create document
get embedding Mat
...get ebd mat
num of words: 1716430
(1716430, 15)
!!!!!!!! Begin fold: 0
preparing train/val done!
before evaluating: ../LIB/../../data/rnn/bureau/fold_{}.hdf5
Epoch 1/1
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0
current roc is 0.5959567376944752
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.5959567376944752
current roc is 0.6178756721625701
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6178756721625701
current roc is 0.6210965629322069
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.p

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6471071848224688
current roc is 0.6464716181686189
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6471071848224688
current roc is 0.6462973813636651
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6471071848224688
current roc is 0.648987699496703
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.648987699496703
current roc is 0.6487656437409254
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.648987699496703
current roc is 0.6468158898657683
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.648987699496703
current roc is 0.6488933779433117
Epoch 1/1
saving model

Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6567109137824174
current roc is 0.6579516419561438
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6579516419561438
current roc is 0.655801950204016
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6579516419561438
current roc is 0.656337061910692
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6579516419561438
current roc is 0.6573032561506102
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6579516419561438
current roc is 0.656581827363975
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6579516419561438
current roc is 0.6572120497156402
Epoch 1/1
saving repo

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6591512064493174
current roc is 0.658979982539555
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6591512064493174
current roc is 0.6582576767852121
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6591512064493174
current roc is 0.659218752690085
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.659218752690085
current roc is 0.6585689788006955
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.659218752690085
current roc is 0.6582671656116751
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.659218752690085
current roc is 0.6575427759381581
Epoch 1/1
saving model 

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6594941671783806
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6586800344761292
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6584937514705798
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6592928699117939
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6592370327143029
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6609344368898887
current roc is 0.6607360072539039
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6603456023925257
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6617363996476349
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6618728246179362
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6612402959667376
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6602264316273652
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6626406430235694
current roc is 0.6604535393627583
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.6619121070323843
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.6623495543067185
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.6615094066892012
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.661414638581701
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.6624387216572646
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6629937130920338
current roc is 0.6627456962226903
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc 

Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6418957604361557
current roc is 0.6453261015040871
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6453261015040871
current roc is 0.6461714182081124
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6461714182081124
current roc is 0.6470663300074049
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6470663300074049
current roc is 0.6472064752147925
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best r

saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.654944998027275
current roc is 0.6530481280378237
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.654944998027275
current roc is 0.6529422348187133
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.654944998027275
current roc is 0.6537463038580682
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.654944998027275
current roc is 0.6558279670085017
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6558279670085017
current roc is 0.6546984195958763
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6558279670085017
current roc is 0.6547674478997698
Epoch 1/1
saving report to... ..

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6581235748748081
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6567540475422824
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6578593748641277
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6577102150587982
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6569060049213886
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6584971014905974
current roc is 0.6591235542082979
Epoch 1/1
saving r

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6570963113582136
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6570741833509368
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6583963470278019
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6594396904967834
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6570471447829646
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660324317815641
current roc is 0.6585723659801206
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.6587147548207735
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.6598688863919991
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.657966609385734
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.6569723610719984
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.6571591278637957
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603334944150532
current roc is 0.6603819973068598
Epoch 1/1
saving re

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.6586440228885154
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.6589561474631727
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.6560624650076824
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.659520574521178
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.6588799876106849
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6603819973068598
current roc is 0.6585378823123212
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc 

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.658669221955922
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.6592467606813108
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.6593772223814598
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.6579104192628709
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.6600040025860032
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.660534459851841
current roc is 0.6589516166478593
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6463262280689283
current roc is 0.6455012289962344
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6463262280689283
current roc is 0.6480064328362661
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6480064328362661
current roc is 0.6497815552172509
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6497815552172509
current roc is 0.6463773168142029
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6497815552172509
current roc is 0.6469118605039964
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best ro

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.658999685028121
current roc is 0.6565922509595808
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.658999685028121
current roc is 0.6587348866497132
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.658999685028121
current roc is 0.6587099657431721
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.658999685028121
current roc is 0.6595755406634131
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6595755406634131
current roc is 0.6602846388965253
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6607496452752841
current roc is 0.6600476508308336
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6607496452752841
current roc is 0.6584938136838246
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6607496452752841
current roc is 0.6595305712866486
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6607496452752841
current roc is 0.6611463896548802
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6611463896548802
current roc is 0.660255908017938
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6611463896548802
current roc is 0.6607055432524913
Epoch 1/1
saving re

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6614791467246204
current roc is 0.6600368612312256
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6614791467246204
current roc is 0.6599575967848547
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6614791467246204
current roc is 0.6600293831855335
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6614791467246204
current roc is 0.659892479608987
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6614791467246204
current roc is 0.6615674057558362
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6615674057558362
current roc is 0.6613826132429407
Epoch 1/1
saving re

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.661236466759349
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.6601711113076877
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.6586817261877953
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.6613283211412808
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.6616768785978872
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6624986211022956
current roc is 0.6614304933106396
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc 

saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6326253801523343
current roc is 0.6330897264518167
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6330897264518167
current roc is 0.6350153541625273
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6350153541625273
current roc is 0.6297435318711513
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6350153541625273
current roc is 0.6361969728684851
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6361969728684851
current roc is 0.6360676782386022
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
s

Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6487502739592677
current roc is 0.6489788286354858
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6489788286354858
current roc is 0.6493114596340299
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6493114596340299
current roc is 0.6498859160291054
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6498859160291054
current roc is 0.6522016262863104
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving

Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6576198687391119
current roc is 0.657661109668914
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.657661109668914
current roc is 0.6578456983699554
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6578456983699554
current roc is 0.6582577155622347
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6582577155622347
current roc is 0.6587246687382382
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving r

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6616971937099578
current roc is 0.6608929150091472
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6616971937099578
current roc is 0.6614851003130919
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6616971937099578
current roc is 0.6613892277738844
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6616971937099578
current roc is 0.6626641282564628
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6626641282564628
current roc is 0.66139985759937
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6626641282564628
current roc is 0.6605928090922553
Epoch 1/1
saving rep

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6646980129602943
current roc is 0.6637156949776142
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6646980129602943
current roc is 0.6637578042694443
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6646980129602943
current roc is 0.6638878043315725
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6646980129602943
current roc is 0.6648808094337275
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6648808094337275
current roc is 0.6644111938776502
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6648808094337275
current roc is 0.6634865512475668
Epoch 1/1
saving r

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6650224455470315
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6646328231428902
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6651947733343245
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6648763691495428
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6643818987446547
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6653064751285628
current roc is 0.6651352735262505
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.6648886658720349
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.6644662623537254
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.6649235901394968
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.665221526046537
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.6648367610984411
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.665449035107447
current roc is 0.6636933395790944
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.6647201839437856
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.6641574513313074
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.6645192449704964
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.6648105401460686
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.6649315772797175
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6658122861624935
current roc is 0.664176490840186
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc 

saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6132176306988204
current roc is 0.6204509204306089
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6204509204306089
current roc is 0.6277977434963729
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6277977434963729
current roc is 0.629658728949149
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.629658728949149
current roc is 0.6319378922305553
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6319378922305553
current roc is 0.6254490505634135
Epoch 1/1
sav

saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6543168512926364
current roc is 0.6545786041308668
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6545786041308668
current roc is 0.6550632395262196
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6550632395262196
current roc is 0.6549246847032004
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6550632395262196
current roc is 0.655177986143845
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.655

saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6618133331678493
current roc is 0.6626886549393526
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6626886549393526
current roc is 0.662163860036003
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6626886549393526
current roc is 0.6613895138414726
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6626886549393526
current roc is 0.6622029093261739
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6626886549393526
current roc is 0.661075804978804
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6626886549393526
current roc is 0.6620811994368108
Epoch 1/1
saving report to... 

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.663129154655701
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.6626569432125968
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.6629270244926927
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.6630518299731272
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.6632252318937648
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6642166744938831
current roc is 0.662530947954753
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc i

In [7]:
a = train.text.apply(lambda x: len(x.split(' ')))
a.describe()

count    307511.000000
mean          4.908263
std           4.355940
min           1.000000
25%           1.000000
50%           4.000000
75%           7.000000
max         116.000000
Name: text, dtype: float64

In [8]:
aaaa

NameError: name 'aaaa' is not defined

# Bureau V2

In [5]:
qt = QuantileTransformer(n_quantiles=10000,output_distribution='normal')
trans_col = [
             'AMT_ANNUITY',
             'AMT_CREDIT_MAX_OVERDUE',
             'DAYS_ENDDATE_FACT',
             'AMT_CREDIT_SUM_LIMIT',
             'AMT_CREDIT_SUM_DEBT',
             'DAYS_CREDIT_ENDDATE',
             'AMT_CREDIT_SUM',
             'CREDIT_DAY_OVERDUE',
             'CNT_CREDIT_PROLONG',
             'BUREAU_LENGTH',
             'AMT_CREDIT_SUM_OVERDUE',
             'DAYS_CREDIT_UPDATE',
    'AMT_ANNUITY_squre',
 'AMT_CREDIT_MAX_OVERDUE_squre',
 'DAYS_ENDDATE_FACT_squre',
 'AMT_CREDIT_SUM_LIMIT_squre',
 'AMT_CREDIT_SUM_DEBT_squre',
 'DAYS_CREDIT_ENDDATE_squre',
 'AMT_CREDIT_SUM_squre',
 'CREDIT_DAY_OVERDUE_squre',
 'CNT_CREDIT_PROLONG_squre',
 'BUREAU_LENGTH_squre',
 'AMT_CREDIT_SUM_OVERDUE_squre',
 'DAYS_CREDIT_UPDATE_squre',
    'AMT_ANNUITY_AMT_CREDIT_MAX_OVERDUE',
 'AMT_ANNUITY_DAYS_ENDDATE_FACT',
 'AMT_ANNUITY_AMT_CREDIT_SUM_LIMIT',
 'AMT_ANNUITY_AMT_CREDIT_SUM_DEBT',
 'AMT_ANNUITY_DAYS_CREDIT_ENDDATE',
 'AMT_ANNUITY_AMT_CREDIT_SUM',
 'AMT_ANNUITY_CREDIT_DAY_OVERDUE',
 'AMT_ANNUITY_CNT_CREDIT_PROLONG',
 'AMT_ANNUITY_BUREAU_LENGTH',
 'AMT_ANNUITY_AMT_CREDIT_SUM_OVERDUE',
 'AMT_ANNUITY_DAYS_CREDIT_UPDATE',
 'AMT_CREDIT_MAX_OVERDUE_DAYS_ENDDATE_FACT',
 'AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM_LIMIT',
 'AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM_DEBT',
 'AMT_CREDIT_MAX_OVERDUE_DAYS_CREDIT_ENDDATE',
 'AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM',
 'AMT_CREDIT_MAX_OVERDUE_CREDIT_DAY_OVERDUE',
 'AMT_CREDIT_MAX_OVERDUE_CNT_CREDIT_PROLONG',
 'AMT_CREDIT_MAX_OVERDUE_BUREAU_LENGTH',
 'AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM_OVERDUE',
 'AMT_CREDIT_MAX_OVERDUE_DAYS_CREDIT_UPDATE',
 'DAYS_ENDDATE_FACT_AMT_CREDIT_SUM_LIMIT',
 'DAYS_ENDDATE_FACT_AMT_CREDIT_SUM_DEBT',
 'DAYS_ENDDATE_FACT_DAYS_CREDIT_ENDDATE',
 'DAYS_ENDDATE_FACT_AMT_CREDIT_SUM',
 'DAYS_ENDDATE_FACT_CREDIT_DAY_OVERDUE',
 'DAYS_ENDDATE_FACT_CNT_CREDIT_PROLONG',
 'DAYS_ENDDATE_FACT_BUREAU_LENGTH',
 'DAYS_ENDDATE_FACT_AMT_CREDIT_SUM_OVERDUE',
 'DAYS_ENDDATE_FACT_DAYS_CREDIT_UPDATE',
 'AMT_CREDIT_SUM_LIMIT_AMT_CREDIT_SUM_DEBT',
 'AMT_CREDIT_SUM_LIMIT_DAYS_CREDIT_ENDDATE',
 'AMT_CREDIT_SUM_LIMIT_AMT_CREDIT_SUM',
 'AMT_CREDIT_SUM_LIMIT_CREDIT_DAY_OVERDUE',
 'AMT_CREDIT_SUM_LIMIT_CNT_CREDIT_PROLONG',
 'AMT_CREDIT_SUM_LIMIT_BUREAU_LENGTH',
 'AMT_CREDIT_SUM_LIMIT_AMT_CREDIT_SUM_OVERDUE',
 'AMT_CREDIT_SUM_LIMIT_DAYS_CREDIT_UPDATE',
 'AMT_CREDIT_SUM_DEBT_DAYS_CREDIT_ENDDATE',
 'AMT_CREDIT_SUM_DEBT_AMT_CREDIT_SUM',
 'AMT_CREDIT_SUM_DEBT_CREDIT_DAY_OVERDUE',
 'AMT_CREDIT_SUM_DEBT_CNT_CREDIT_PROLONG',
 'AMT_CREDIT_SUM_DEBT_BUREAU_LENGTH',
 'AMT_CREDIT_SUM_DEBT_AMT_CREDIT_SUM_OVERDUE',
 'AMT_CREDIT_SUM_DEBT_DAYS_CREDIT_UPDATE',
 'DAYS_CREDIT_ENDDATE_AMT_CREDIT_SUM',
 'DAYS_CREDIT_ENDDATE_CREDIT_DAY_OVERDUE',
 'DAYS_CREDIT_ENDDATE_CNT_CREDIT_PROLONG',
 'DAYS_CREDIT_ENDDATE_BUREAU_LENGTH',
 'DAYS_CREDIT_ENDDATE_AMT_CREDIT_SUM_OVERDUE',
 'DAYS_CREDIT_ENDDATE_DAYS_CREDIT_UPDATE',
 'AMT_CREDIT_SUM_CREDIT_DAY_OVERDUE',
 'AMT_CREDIT_SUM_CNT_CREDIT_PROLONG',
 'AMT_CREDIT_SUM_BUREAU_LENGTH',
 'AMT_CREDIT_SUM_AMT_CREDIT_SUM_OVERDUE',
 'AMT_CREDIT_SUM_DAYS_CREDIT_UPDATE',
 'CREDIT_DAY_OVERDUE_CNT_CREDIT_PROLONG',
 'CREDIT_DAY_OVERDUE_BUREAU_LENGTH',
 'CREDIT_DAY_OVERDUE_AMT_CREDIT_SUM_OVERDUE',
 'CREDIT_DAY_OVERDUE_DAYS_CREDIT_UPDATE',
 'CNT_CREDIT_PROLONG_BUREAU_LENGTH',
 'CNT_CREDIT_PROLONG_AMT_CREDIT_SUM_OVERDUE',
 'CNT_CREDIT_PROLONG_DAYS_CREDIT_UPDATE',
 'BUREAU_LENGTH_AMT_CREDIT_SUM_OVERDUE',
 'BUREAU_LENGTH_DAYS_CREDIT_UPDATE',
 'AMT_CREDIT_SUM_OVERDUE_DAYS_CREDIT_UPDATE']
for col in trans_col:
    print(col)
    X_bu_fe[col] = qt.fit_transform(X_bu_fe[col].values.reshape(-1,1))

AMT_ANNUITY
AMT_CREDIT_MAX_OVERDUE
DAYS_ENDDATE_FACT
AMT_CREDIT_SUM_LIMIT
AMT_CREDIT_SUM_DEBT
DAYS_CREDIT_ENDDATE
AMT_CREDIT_SUM
CREDIT_DAY_OVERDUE
CNT_CREDIT_PROLONG
BUREAU_LENGTH
AMT_CREDIT_SUM_OVERDUE
DAYS_CREDIT_UPDATE
AMT_ANNUITY_squre
AMT_CREDIT_MAX_OVERDUE_squre
DAYS_ENDDATE_FACT_squre
AMT_CREDIT_SUM_LIMIT_squre
AMT_CREDIT_SUM_DEBT_squre
DAYS_CREDIT_ENDDATE_squre
AMT_CREDIT_SUM_squre
CREDIT_DAY_OVERDUE_squre
CNT_CREDIT_PROLONG_squre
BUREAU_LENGTH_squre
AMT_CREDIT_SUM_OVERDUE_squre
DAYS_CREDIT_UPDATE_squre
AMT_ANNUITY_AMT_CREDIT_MAX_OVERDUE
AMT_ANNUITY_DAYS_ENDDATE_FACT
AMT_ANNUITY_AMT_CREDIT_SUM_LIMIT
AMT_ANNUITY_AMT_CREDIT_SUM_DEBT
AMT_ANNUITY_DAYS_CREDIT_ENDDATE
AMT_ANNUITY_AMT_CREDIT_SUM
AMT_ANNUITY_CREDIT_DAY_OVERDUE
AMT_ANNUITY_CNT_CREDIT_PROLONG
AMT_ANNUITY_BUREAU_LENGTH
AMT_ANNUITY_AMT_CREDIT_SUM_OVERDUE
AMT_ANNUITY_DAYS_CREDIT_UPDATE
AMT_CREDIT_MAX_OVERDUE_DAYS_ENDDATE_FACT
AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM_LIMIT
AMT_CREDIT_MAX_OVERDUE_AMT_CREDIT_SUM_DEBT
AMT_CREDIT_

In [7]:

print('start')
label_mapping = X_Train.set_index('SK_ID_CURR').TARGET
test_mapping = pd.Series(index=X_Test.SK_ID_CURR, data=1)

#previous application
#10001358
max_features = 1730000
#295
maxlen = 13

sorted_df = X_bu_fe.sort_values(['SK_ID_CURR','DAYS_CREDIT'])
col = 'DAYS_CREDIT'
sorted_df[col] = qt.fit_transform(sorted_df[col].values.reshape(-1,1))
sorted_df['words'] = sorted_df.index.astype(str)
feature = list(sorted_df.columns)
feature.remove('SK_ID_BUREAU')
feature.remove('SK_ID_CURR')
feature.remove('words')
ebd = sorted_df[feature].values
#normalize
print('start normalize')
# nor_ebd = normalize(ebd, norm='max',axis=0)
nor_ebd = ebd
print('get embedding')
embed_size = len(feature)
print('ebd size is {}'.format(embed_size))
embeddings_index = get_embeddings_index(sorted_df,nor_ebd)
print('create document')
train,test = create_document(sorted_df)
print('get embedding Mat')
x_train,x_test,y_train,embedding_matrix,num_words = get_train_ebdMat(train,test,embeddings_index)
model_file = ENV.bureau_rnn.value
report_file = ENV.bureau_report.value
pred_file = ENV.bureau_preds.value
pred_test_file = ENV.bureau_preds_test.value
train_5_folds(model_file,report_file,pred_file,pred_test_file,batch_size=2000,total_epoch=500,patience=35)

start
start normalize
get embedding
ebd size is 122
create document
get embedding Mat
...get ebd mat
num of words: 1716430
(1716430, 122)
!!!!!!!! Begin fold: 0
preparing train/val done!
before evaluating: ../LIB/../../data/rnn/bureau/fold_{}.hdf5
Epoch 1/1
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0
current roc is 0.6101660302523371
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6101660302523371
current roc is 0.6262922971030296
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6262922971030296
current roc is 0.635171354827232
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.

saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6627320825992391
current roc is 0.6611197962995908
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6627320825992391
current roc is 0.6621830272853527
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6627320825992391
current roc is 0.6621426598699577
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6627320825992391
current roc is 0.6627625612622022
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_0.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6627625612622022
current roc is 0.6630493404628954
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.663

saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6613608189479545
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6608979467836626
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6598394090995173
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6614960064791594
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6621351043179092
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635246569603617
current roc is 0.6612511065216269
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_0.pkl
previous best roc is 0.6635

saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6057125892129902
current roc is 0.6160188301032818
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6160188301032818
current roc is 0.6267563485205845
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6267563485205845
current roc is 0.6360500802217345
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6360500802217345
current roc is 0.6384424256111546
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to

saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580788345977949
current roc is 0.6573352617848998
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580788345977949
current roc is 0.6580929809842465
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580929809842465
current roc is 0.6561761168778525
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580929809842465
current roc is 0.657446776280833
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580929809842465
current roc is 0.6541123296315893
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6580

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.6615297568202549
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.6625446768699861
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.66092881149534
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.6627378610103474
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.662951180365706
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6631983649961548
current roc is 0.6629491544763522
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6639269274147569
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6625298006059426
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6610347412954276
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6614271123403063
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6622287757449252
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6644221902966678
current roc is 0.6626727155158862
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_1.hdf5
saving preds...
saving r

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.6640318712704125
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.664077123681106
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.6637063928971421
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.6630134899634619
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.6631406245366409
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc is 0.6660311261890699
current roc is 0.6629180484521104
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_1.pkl
previous best roc 

saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6570108476928129
current roc is 0.6582514955594739
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6582514955594739
current roc is 0.6576566929402906
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6582514955594739
current roc is 0.6588376015804531
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6588376015804531
current roc is 0.6584423950247696
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6588376015804531
current roc is 0.6592303320090523
Epoch 1/1
saving report to.

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6645576112329497
current roc is 0.6620779615210702
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6645576112329497
current roc is 0.664737990705598
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.664737990705598
current roc is 0.6622910698597326
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.664737990705598
current roc is 0.6649122791894161
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6649122791894161
current roc is 0.6651642730109327
Epoch 1/1
saving rep

saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6656163577666202
current roc is 0.6638457903242829
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_2.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6656163577666202
current roc is 0.6657843991733681
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.66441857385921
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6651098567481728
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6633646820245576
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6637619638443445
Epoch 1/1
saving report to... 

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6643480343907232
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6636653540922747
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6635003244790718
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6638722561496566
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6654895786409306
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc is 0.6657843991733681
current roc is 0.6649419962618852
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_2.pkl
previous best roc

Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6533333114899998
current roc is 0.6544572261963661
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6544572261963661
current roc is 0.6530263356656821
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6544572261963661
current roc is 0.6560711137294464
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6560711137294464
current roc is 0.6529063441473412
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6560711137294464
current roc is 0.658522265187343
Epoch 1/1
saving r

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6648381272020026
current roc is 0.6629534718555182
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6648381272020026
current roc is 0.6649240180539807
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6649240180539807
current roc is 0.6618388262423874
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6649240180539807
current roc is 0.6663645071183395
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_3.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6663645071183395
current roc is 0.6663705158254861
Epoch 1/1
saving 

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6691961228942467
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6700571172733303
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6693852593057033
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6694615641570665
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6686701927178229
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6708454700355031
current roc is 0.6694543128865229
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6703266514755235
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6703093182532688
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6679554125998901
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6699002463301365
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6703374979439067
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6714571585715542
current roc is 0.6701448397425345
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc

Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.6701410995192837
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.6700848421929267
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.6706735558391966
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.6703113235429006
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.67123381048663
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is 0.6716918311715865
current roc is 0.670890202317789
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_3.pkl
previous best roc is

saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6594872243075994
current roc is 0.6594611305723497
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6594872243075994
current roc is 0.6603650503978139
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6603650503978139
current roc is 0.6625743061196451
Epoch 1/1
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6625743061196451
current roc is 0.6617000421289694
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6625743061196451
current roc is 0.663073580478381
Epoch 1/1
saving model to...

saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6687650958885372
current roc is 0.6693724139699802
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6693724139699802
current roc is 0.6696555394054422
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6696555394054422
current roc is 0.6697528914050163
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to... ../LIB/../../data/rnn/bureau/report_fold_4.pkl
previous best roc is 0.6697528914050163
current roc is 0.6698080786334522
Epoch 1/1
saving model to... ../LIB/../../data/rnn/bureau/fold_4.hdf5
saving preds...
saving report to

In [10]:
a = train.text.apply(lambda x: len(x.split(' ')))
a.describe()

count    307511.000000
mean          4.908263
std           4.355940
min           1.000000
25%           1.000000
50%           4.000000
75%           7.000000
max         116.000000
Name: text, dtype: float64

In [None]:
a = train.text.apply(lambda x: len(x.split(' ')))
a.describe()